Source Code of de.jungblut.clustering.mapreduce.KMeansMapper

package de.jungblut.clustering.mapreduce;


import java.io.IOException;
import java.util.ArrayList;
import java.util.List;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.mapreduce.Mapper;


import de.jungblut.clustering.model.ClusterCenter;
import de.jungblut.clustering.model.VectorWritable;
import de.jungblut.distance.DistanceMeasurer;
import de.jungblut.distance.ManhattanDistance;


// first iteration, k-random centers, in every follow-up iteration we have new calculated centers
@SuppressWarnings("deprecation")
public class KMeansMapper extends
    Mapper<ClusterCenter, VectorWritable, ClusterCenter, VectorWritable> {


  private final List<ClusterCenter> centers = new ArrayList<>();
  private DistanceMeasurer distanceMeasurer;


  @Override
  protected void setup(Context context) throws IOException,
      InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    Path centroids = new Path(conf.get("centroid.path"));
    FileSystem fs = FileSystem.get(conf);


    try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, centroids,
        conf)) {
      ClusterCenter key = new ClusterCenter();
      IntWritable value = new IntWritable();
      int index = 0;
      while (reader.next(key, value)) {
        ClusterCenter clusterCenter = new ClusterCenter(key);
        clusterCenter.setClusterIndex(index++);
        centers.add(clusterCenter);
      }
    }
    distanceMeasurer = new ManhattanDistance();
  }


  @Override
  protected void map(ClusterCenter key, VectorWritable value, Context context)
      throws IOException, InterruptedException {


    ClusterCenter nearest = null;
    double nearestDistance = Double.MAX_VALUE;
    for (ClusterCenter c : centers) {
      double dist = distanceMeasurer.measureDistance(c.getCenterVector(),
          value.getVector());
      if (nearest == null) {
        nearest = c;
        nearestDistance = dist;
      } else {
        if (nearestDistance > dist) {
          nearest = c;
          nearestDistance = dist;
        }
      }
    }
    context.write(nearest, value);
  }


}
Source Code of de.jungblut.clustering.mapreduce.KMeansMapper

Related Classes of de.jungblut.clustering.mapreduce.KMeansMapper