Package com.alimama.quanjingmonitor.kmeans

Source Code of com.alimama.quanjingmonitor.kmeans.KMeansGroupReducer$Clusterlist

package com.alimama.quanjingmonitor.kmeans;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.PriorityQueue;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.TaskID;


public class KMeansGroupReducer extends Reducer<Text, Cluster, Text, Cluster> {

  public static class Clusterlist{
    ArrayList<Cluster> list=new ArrayList<Cluster>();
    @Override
    public String toString() {
      return "Clusterlist [ key=" + key + ",list=" + list.toString() + "]";
    }
    Text key;
    public int count()
      {
        int rtn=0;
        for(Cluster cl:this.list)
        {
          rtn+=cl.getCenter().getNumPoints();
        }
        return rtn;
      }
  }
    PriorityQueue<Clusterlist> res;
    static Comparator<Clusterlist>  cmp=new Comparator<Clusterlist>() {
       
      @Override
      public int compare(Clusterlist o1, Clusterlist o2) {
        int t1=o1.count();
        int t2=o2.count();
        return t1 == t2 ? 0 : t1 < t2 ? 1 : -1;


      }
    };
   
    int Index=0;
  @Override
  protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    this.res= new PriorityQueue<Clusterlist>(limit,Collections.reverseOrder(cmp));

  TaskID taskId = context.getTaskAttemptID().getTaskID();
  this.Index = taskId.getId()*10000;
  }
 
  int limit=256;
 
  int outputrecord=32;
 
  protected void cleanup(Context context) throws IOException,
      InterruptedException {

   
    ArrayList<Clusterlist> clusters_list=new ArrayList<Clusterlist>();
    for(Clusterlist list:this.res)
    {
      clusters_list.add(list);
    }
   
    System.out.println(clusters_list.size()+"##################");
    int index=0;
    int writecount=0;

    while(true)
    {
      boolean iswrite=false;
      for(Clusterlist list:clusters_list)
      {
        if(list.list.size()>index)
        {
          Cluster tmp=list.list.get(index);
          if(tmp.getCenter().getNumPoints()<10)
          {
            continue;
          }
          Cluster w=new Cluster(tmp.getCenter(),Index+writecount);
          System.out.println(list.key+"\t"+w.toString());
          context.write(list.key, w);
          if(writecount++>outputrecord)
          {
            return ;
          }
          iswrite=true;
        }
      }
      index++;
      if(!iswrite)
      {
        break;
      }
    }
   
   
   
   
   
 


  }
 

  @Override
  protected void reduce(Text key, Iterable<Cluster> values, Context context)
    throws IOException, InterruptedException {

    Clusterlist list=new Clusterlist();
    list.key=new Text(key.toString());
   
    int eachMaxSize=3;

    int last_size=0;
    for (Cluster value : values) {
     
      int listsize=list.list.size();
      if(listsize>eachMaxSize)
      {
        int index=(int) (Math.random()*100000)%eachMaxSize;
        list.list.get(index).getCenter().merger(value.getCenter());
      }else if(list.list.size()<=0||last_size>40)
      {
        last_size=0;
        list.list.add(new Cluster(value));
      }else{
        list.list.get(listsize-1).getCenter().merger(value.getCenter());
          last_size+=value.getCenter().getNumPoints();
      }
     
      context.progress();
    }
   
   
   System.out.println(">>>>>"+list.toString());
    if (this.res.size() < limit) {
      this.res.add(list);
  } else if (cmp.compare(res.peek(), list) > 0) {
    this.res.add(list);
    this.res.poll();
  }

  
   
   
   
  }
 
}
TOP

Related Classes of com.alimama.quanjingmonitor.kmeans.KMeansGroupReducer$Clusterlist

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.