Package com.chine.kmeans.mapreduce.canopydata

Source Code of com.chine.kmeans.mapreduce.canopydata.CanopyDataMapper

package com.chine.kmeans.mapreduce.canopydata;

import java.io.IOException;
import java.util.List;
import java.util.ArrayList;

import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.Configuration;

import com.chine.kmeans.models.Movie;
import com.chine.kmeans.mapreduce.ConfiguredKmeans;

public class CanopyDataMapper extends Mapper<Text, Text, Text, Text> {
 
  private boolean hasLoadCanopyCenters = false;
  private List<Movie> canopyMovieCenters = new ArrayList<Movie>();
 
  @Override
  public void setup(Context context)
    throws IOException, InterruptedException{
    if(hasLoadCanopyCenters)
      return;
    else
      hasLoadCanopyCenters = true;
   
    Configuration conf = context.getConfiguration();
    FileSystem fs = FileSystem.get(conf);
    String folderPath = conf.get(ConfiguredKmeans.CANOPY_CENTERS_OUTPUT_KEY);
    String filePath = folderPath.endsWith("/") ? folderPath+"part-r-00000"
        : folderPath+"/part-r-00000";
    Path path = new Path(filePath);
   
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
    Text key = new Text();
    Text value = new Text();
    try {
      while(reader.next(key, value)) {
        Movie movie = new Movie(Integer.valueOf(key.toString()), value.toString());
       
        this.canopyMovieCenters.add(movie);
      }
    }
    finally {
      reader.close();
    }
  }
 
  @Override
  public void map(Text key, Text value, Context context)
    throws IOException, InterruptedException {
   
    int movieId = Integer.valueOf(key.toString());
    String data = value.toString();
    Movie currentMovie = new Movie(movieId, data);
   
    boolean emit = false;
    StringBuilder sb = new StringBuilder();
    for(Movie canopyMovie: canopyMovieCenters) {
      if(currentMovie.getMatchCount(canopyMovie) >= ConfiguredKmeans.T2) {
        if(!emit) emit = true;
        if(sb.length() > 0)
          sb.append(":");
        sb.append(currentMovie.getMovieId());
      }
    }
   
    if(emit) {
      sb.append(":");
      sb.append(movieId);
      sb.append(":");
      sb.append(data);
     
      context.write(key, new Text(sb.toString()));
    }
   
  }
}
TOP

Related Classes of com.chine.kmeans.mapreduce.canopydata.CanopyDataMapper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.