Package edu.umd.cloud9.example.hits

Source Code of edu.umd.cloud9.example.hits.InlinkCounter$AFormatReducer

/**
*
*/
package edu.umd.cloud9.example.hits;

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;

import edu.umd.cloud9.io.array.ArrayListOfIntsWritable;
import edu.umd.cloud9.util.map.HMapIV;
import edu.umd.cloud9.util.map.MapIV;

/**
* @author michaelmcgrath
*
*/
public class InlinkCounter extends Configured implements Tool {

  private static final Logger sLogger = Logger.getLogger(InlinkCounter.class);

  /**
   * @param args
   */
  private static class AFormatMapper extends MapReduceBase implements
      Mapper<LongWritable, Text, IntWritable, LongWritable> {
    private LongWritable valOut = new LongWritable(1);
    private IntWritable keyOut = new IntWritable();

    public void map(LongWritable key, Text value,
        OutputCollector<IntWritable, LongWritable> output,
        Reporter reporter) throws IOException {

      ArrayListOfIntsWritable links = new ArrayListOfIntsWritable();
      String line = ((Text) value).toString();
      StringTokenizer itr = new StringTokenizer(line);
      if (itr.hasMoreTokens()) {
        itr.nextToken();
      }
      while (itr.hasMoreTokens()) {
        keyOut.set(Integer.parseInt(itr.nextToken()));
        output.collect(keyOut, valOut);
      }
      // emit mentioned mentioner -> mentioned (mentioners) in links
      // emit mentioner mentioned -> mentioner (mentions) outlinks
      // emit mentioned a
      // emit mentioner 1

    }

  }

  private static class AFormatMapperIMC extends MapReduceBase implements
      Mapper<LongWritable, Text, IntWritable, HITSNode> {
    private HITSNode valOut = new HITSNode();
    private IntWritable keyOut = new IntWritable();
    private static OutputCollector<IntWritable, HITSNode> mOutput;
    private static HMapIV<ArrayListOfIntsWritable> adjLists = new HMapIV<ArrayListOfIntsWritable>();

    public void configure(JobConf jc) {
      adjLists.clear();
    }

    public void map(LongWritable key, Text value,
        OutputCollector<IntWritable, HITSNode> output, Reporter reporter)
        throws IOException {

      mOutput = output;

      ArrayListOfIntsWritable links = new ArrayListOfIntsWritable();
      String line = ((Text) value).toString();
      StringTokenizer itr = new StringTokenizer(line);
      if (itr.hasMoreTokens()) {
        links.add(Integer.parseInt(itr.nextToken()));
        // add to HMap here
      }
      while (itr.hasMoreTokens()) {
        int curr = Integer.parseInt(itr.nextToken());
        if (adjLists.containsKey(curr)) {
          ArrayListOfIntsWritable list = adjLists.get(curr);
          list.trimToSize();
          links.trimToSize();
          //FIXME
          //list.addAll(links.getArray());
          adjLists.put(curr, list);
        } else {
          links.trimToSize();
          adjLists.put(curr, links);
        }
      }
    }

    public void close() throws IOException {
      for (MapIV.Entry<ArrayListOfIntsWritable> e : adjLists.entrySet()) {
        keyOut.set(e.getKey());
        valOut.setNodeId(e.getKey());
        valOut.setARank((float) 0.0);
        valOut.setHRank((float) 0.0);
        valOut.setType(HITSNode.TYPE_AUTH_COMPLETE);
        //FIXME
        //valOut.setAdjacencyList(e.getValue());
        mOutput.collect(keyOut, valOut);
      }
    }

  }

  private static class AFormatCombiner extends MapReduceBase implements
      Reducer<IntWritable, LongWritable, IntWritable, LongWritable> {
    private LongWritable valIn;
    private LongWritable valOut = new LongWritable();
    ArrayListOfIntsWritable adjList = new ArrayListOfIntsWritable();

    public void reduce(IntWritable key, Iterator<LongWritable> values,
        OutputCollector<IntWritable, LongWritable> output,
        Reporter reporter) throws IOException {
      // ArrayListOfIntsWritable adjList = new ArrayListOfIntsWritable();
      long sum = 0;
      // System.out.println(key.toString());
      // System.out.println(adjList.toString());
      while (values.hasNext()) {
        sum += values.next().get();
      }
      valOut.set(sum);
      output.collect(key, valOut);
    }
  }

  private static class AFormatReducer extends MapReduceBase implements
      Reducer<IntWritable, LongWritable, IntWritable, LongWritable> {
    private LongWritable valIn;
    private LongWritable valOut = new LongWritable();
    ArrayListOfIntsWritable adjList = new ArrayListOfIntsWritable();

    public void reduce(IntWritable key, Iterator<LongWritable> values,
        OutputCollector<IntWritable, LongWritable> output,
        Reporter reporter) throws IOException {
      // ArrayListOfIntsWritable adjList = new ArrayListOfIntsWritable();
      long sum = 0;
      // System.out.println(key.toString());
      // System.out.println(adjList.toString());
      while (values.hasNext()) {
        sum += values.next().get();
      }

      if (sum > 100000) {
        valOut.set(sum);
        output.collect(key, valOut);
      }

    }
  }

  private static int printUsage() {
    System.out
        .println("usage: [input-path] [output-path] [num-mappers] [num-reducers]");
    ToolRunner.printGenericCommandUsage(System.out);
    return -1;
  }

  public int run(String[] args) throws Exception {

    if (args.length != 4) {
      printUsage();
      return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];

    int mapTasks = Integer.parseInt(args[2]);
    int reduceTasks = Integer.parseInt(args[3]);

    sLogger.info("Tool: Counter");
    sLogger.info(" - input path: " + inputPath);
    sLogger.info(" - output path: " + outputPath);
    sLogger.info(" - number of mappers: " + mapTasks);
    sLogger.info(" - number of reducers: " + reduceTasks);

    JobConf conf = new JobConf(InlinkCounter.class);
    conf.setJobName("InlinkCounter -- Web Graph");

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    // conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(LongWritable.class);
    // conf.setOutputFormat(SequenceFileOutputFormat.class);

    // InputSampler.Sampler<IntWritable, Text> sampler = new
    // InputSampler.RandomSampler<IntWritable, Text>(0.1, 10, 10);
    // InputSampler.writePartitionFile(conf, sampler);
    // conf.setPartitionerClass(TotalOrderPartitioner.class);
    conf.setMapperClass(AFormatMapper.class);
    conf.setCombinerClass(AFormatCombiner.class);
    conf.setReducerClass(AFormatReducer.class);

    // Delete the output directory if it exists already
    Path outputDir = new Path(outputPath);
    FileSystem.get(conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    sLogger.info("Starting job");
    JobClient.runJob(conf);
    sLogger.info("Job Finished in "
        + (System.currentTimeMillis() - startTime) / 1000.0
        + " seconds");

    return 0;
  }

  public static void main(String[] args) throws Exception {
    int res = ToolRunner
        .run(new Configuration(), new InlinkCounter(), args);
    System.exit(res);
  }

}
TOP

Related Classes of edu.umd.cloud9.example.hits.InlinkCounter$AFormatReducer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.