Package edu.umd.cloud9.example.hits

Source Code of edu.umd.cloud9.example.hits.HubsAndAuthoritiesSchimmy$Norm2Mapper

/**
*
*/
package edu.umd.cloud9.example.hits;

import java.io.IOException;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Partitioner;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.lib.HashPartitioner;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;

import edu.umd.cloud9.io.array.ArrayListOfIntsWritable;
import edu.umd.cloud9.util.map.HMapIF;
import edu.umd.cloud9.util.map.MapIF;

/**
* <p>
* Main driver program for running the schimmy version of Kleinberg's
* Hubs and Authorities/Hyperlink-Induced Topic Search (HITS) algorithm
* Command line arguments are:
* </p>
*
* <ul>
* <li>[basePath]: the base path</li>
* <li>[numNodes]: number of nodes in the graph</li>
* <li>[start]: starting iteration</li>
* <li>[end]: ending iteration</li>
* <li>[useCombiner?]: 1 for using combiner, 0 for not</li>
* <li>[useInMapCombiner?]: 1 for using in-mapper combining, 0 for not</li>
* <li>[useRange?]: 1 for range partitioning, 0 for not</li>
* <li>[num Mappers]: number of mappers to use</li>
* <li>[numReducers]: number of reducers to use. This should remain constant between iterations</li>
* </ul>
*
* <p>
* The starting and ending iterations will correspond to paths
* <code>/base/path/iterXXXX</code> and <code>/base/path/iterYYYY</code>. As a
* example, if you specify 0 and 10 as the starting and ending iterations, the
* driver program will start with the graph structure stored at
* <code>/base/path/iter0000</code>; final results will be stored at
* <code>/base/path/iter0010</code>.
* </p>
*
* @see HubsAndAuthorities
* @author Mike McGrath
*
*/

public class HubsAndAuthoritiesSchimmy extends Configured implements Tool {

  private static final Logger sLogger = Logger
      .getLogger(HubsAndAuthoritiesSchimmy.class);

  /**
   * @param args
   */
  private static class HAMapper extends MapReduceBase implements
      Mapper<IntWritable, HITSNode, IntWritable, HITSNode> {
    // private Tuple valIn = MAP_SCHEMA.instantiate();
    private HITSNode valOut = new HITSNode();
    private ArrayListOfIntsWritable empty = new ArrayListOfIntsWritable();

    public void map(IntWritable key, HITSNode value,
        OutputCollector<IntWritable, HITSNode> output, Reporter reporter)
        throws IOException {

      int typeOut = 0;

      valOut.setType(typeOut);
      valOut.setARank(value.getARank());
      valOut.setHRank(value.getHRank());
      valOut.setType(HITSNode.TYPE_NODE_MASS);
      valOut.setNodeId(value.getNodeId());

      output.collect(key, valOut);

      int curr;
      //auth score for a node X is sum of all hub scores from nodes linking to X
      // so for each outgoing link X1...XN, contribute this node's hub score as part of node X1...XN's auth score
      // ( total auth score will be summed in reducer)
      typeOut = HITSNode.TYPE_AUTH_MASS;
      ArrayListOfIntsWritable adjList = value.getOutlinks();
     
      for (int i = 0; i < adjList.size(); i++) {
        curr = adjList.get(i);
        valOut.setType(typeOut);
        valOut.setARank(value.getHRank());
        output.collect(new IntWritable(curr), valOut);
      }
     
      //hub score for a node X is sum of all auth scores from nodes linked from X
      // so for each incoming link X1...XN, contribute this node's auth score as part of node X1...XN's hub score
      // ( total hub score will be summed in reducer)
      typeOut = HITSNode.TYPE_HUB_MASS;
      adjList = value.getInlinks();
     
      for (int i = 0; i < adjList.size(); i++) {
        curr = adjList.get(i);
        valOut.setType(typeOut);
        valOut.setHRank(value.getARank());
        output.collect(new IntWritable(curr), valOut);
      }
    }

  }

  // mapper using in-mapper combining
  private static class HAMapperIMC extends MapReduceBase implements
      Mapper<IntWritable, HITSNode, IntWritable, HITSNode> {

    // for buffering rank values
    private static HMapIF rankmapA = new HMapIF();
    private static HMapIF rankmapH = new HMapIF();

    // save a reference to the output collector
    private static OutputCollector<IntWritable, HITSNode> mOutput;

    private static HITSNode valOut = new HITSNode();

    // private static ArrayListOfIntsWritable empty = new
    // ArrayListOfIntsWritable();

    public void configure(JobConf job) {
      rankmapA.clear();
      rankmapH.clear();
    }

    public void map(IntWritable key, HITSNode value,
        OutputCollector<IntWritable, HITSNode> output, Reporter reporter)
        throws IOException {

      mOutput = output;

      ArrayListOfIntsWritable adjList;
      valOut.setNodeId(value.getNodeId());
      valOut.setType(HITSNode.TYPE_NODE_MASS);
      valOut.setARank(value.getARank());
      valOut.setHRank(value.getHRank());
      output.collect(key, valOut);

      // check type using new types
      //emit hvals to outlinks as avals



      //emit avals to inlinks as hvals


      int curr;
     
      adjList = value.getOutlinks();
      for (int i = 0; i < adjList.size(); i++) {
        curr = adjList.get(i);
        // System.out.println("[key: " + key.toString() + "] [curr: " +
        // curr + "]");
        if (rankmapA.containsKey(curr)) {
          rankmapA.put(curr, sumLogProbs(rankmapA.get(curr),
              value.getHRank()));
        } else {
          rankmapA.put(curr, value.getHRank());
        }
      }
     
      adjList = value.getInlinks();
      for (int i = 0; i < adjList.size(); i++) {
        curr = adjList.get(i);
        if (rankmapH.containsKey(curr)) {
          rankmapH.put(curr, sumLogProbs(rankmapH.get(curr),
              value.getARank()));
        } else {
          rankmapH.put(curr, value.getARank());
        }
      }
    }

    public void close() throws IOException {
      IntWritable n = new IntWritable();
      HITSNode mass = new HITSNode();
      for (MapIF.Entry e : rankmapH.entrySet()) {
        n.set(e.getKey());
        mass.setType(HITSNode.TYPE_HUB_MASS);
        mass.setHRank(e.getValue());
        mass.setNodeId(e.getKey());
        // System.out.println(e.getKey() + " " + e.getValue());
        mOutput.collect(n, mass);
      }
      for (MapIF.Entry e : rankmapA.entrySet()) {
        n.set(e.getKey());
        mass.setType(HITSNode.TYPE_AUTH_MASS);
        mass.setARank(e.getValue());
        mass.setNodeId(e.getKey());
        // System.out.println(e.getKey() + " " + e.getValue());
        mOutput.collect(n, mass);
      }
    }

  }

  private static class HAReducer extends MapReduceBase implements
      Reducer<IntWritable, HITSNode, IntWritable, HITSNode> {
    private HITSNode valIn;
    private HITSNode valOut = new HITSNode();

    private OutputCollector<IntWritable, HITSNode> mOutput;
    private Reporter mReporter;

    private JobConf mJobConf;
    private String mTaskId;

    private SequenceFile.Reader reader;

    private IntWritable mStateNid = new IntWritable();
    private HITSNode mStateNode = new HITSNode();

    private int jobIter = 0;

    public void configure(JobConf jconf) {
      mJobConf = jconf;
      mTaskId = jconf.get("mapred.task.id");
      jobIter = jconf.getInt("jobIter", 0);

      // we want to reconstruct the mapping from partition file stored on
      // disk and the actual partition...
      String pMappingString = jconf.get("PartitionMapping");

      Map<Integer, String> m = new HashMap<Integer, String>();
      for (String s : pMappingString.split("\\t")) {
        String[] arr = s.split("=");

        sLogger.info(arr[0] + "\t" + arr[1]);

        m.put(Integer.parseInt(arr[0]), arr[1]);
      }

      int partno = Integer.parseInt(mTaskId.substring(
          mTaskId.length() - 7, mTaskId.length() - 2));
      String f = m.get(partno);

      sLogger.info("task id: " + mTaskId);
      sLogger.info("partno: " + partno);
      sLogger.info("file: " + f);

      try {
        FileSystem fs = FileSystem.get(jconf);
        reader = new SequenceFile.Reader(fs, new Path(f), jconf);
      } catch (IOException e) {
        e.printStackTrace();
        throw new RuntimeException("Couldn't open + " + f
            + " for partno: " + partno + " within: " + mTaskId);
      }
    }

    public void reduce(IntWritable key, Iterator<HITSNode> values,
        OutputCollector<IntWritable, HITSNode> output, Reporter reporter)
        throws IOException {
      ArrayListOfIntsWritable adjList = new ArrayListOfIntsWritable();

      float hrank = Float.NEGATIVE_INFINITY;
      float arank = Float.NEGATIVE_INFINITY;
      long pos;

      valOut.setInlinks(adjList);
      valOut.setOutlinks(adjList);

      mOutput = output;
      mReporter = reporter;

      // we're going to read the node structure until we get to the node
      // of the current message we're processing...
      while (reader.next(mStateNid, mStateNode)) {

        /*
         * if (mStateNid.get() == key.get() && (mStateNode.getType() ==
         * HITSNode.TYPE_AUTH_COMPLETE || mStateNode.getType() ==
         * HITSNode.TYPE_AUTH_STRUCTURE)) { afound = true; } if
         * (mStateNid.get() == key.get() && (mStateNode.getType() ==
         * HITSNode.TYPE_HUB_COMPLETE || mStateNode.getType() ==
         * HITSNode.TYPE_HUB_STRUCTURE)) { hfound = true; }
         */
        if (mStateNid.get() == key.get())
          break;

        // nodes are sorted in each partition, so if we come across a
        // larger nid than the current message we're processing, there's
        // something seriously wrong...
        if (mStateNid.get() > key.get()) {
          Partitioner<WritableComparable, Writable> p = new HashPartitioner<WritableComparable, Writable>();

          int sp = p.getPartition(mStateNid, mStateNode, mJobConf
              .getNumReduceTasks());
          int kp = p.getPartition(key, mStateNode, mJobConf
              .getNumReduceTasks());

          throw new RuntimeException(
              "Unexpected Schimmy failure during merge! nids: "
                  + mStateNid.get() + " " + key.get()
                  + " parts: " + sp + " " + kp);
        }

        // mStateNode.setHARank(Float.NEGATIVE_INFINITY);

        // do something smarter here
        // output.collect(mStateNid, mStateNode);
      }

      while (values.hasNext()) {
        valIn = values.next();

        // get type
        int type = valIn.getType();
        float arankIn = valIn.getARank();
        float hrankIn = valIn.getHRank();
        if (type == HITSNode.TYPE_HUB_MASS ) {
          // hrank += rankIn;
          hrank = sumLogProbs(hrank, hrankIn);
        } else if (type == HITSNode.TYPE_AUTH_MASS) {
          // arank += rankIn;
          arank = sumLogProbs(arank, arankIn);
        }
      }
      // System.out.println(key.toString() + " " + "H" + " " +
      // hpayloadOut.toString());

      // if this is the first run, set rank to 0 for nodes with no inlinks
      // or outlinks
      if (jobIter == 0) {
        if (hrank == Float.NEGATIVE_INFINITY) {
          hrank = 0;
        }
        if (arank == Float.NEGATIVE_INFINITY) {
          arank = 0;
        }
      }
      // build output tuple and write to output
      if (mStateNode.getType() == HITSNode.TYPE_NODE_COMPLETE)
      {
        valOut.setInlinks(mStateNode.getInlinks()); //????
        valOut.setOutlinks(mStateNode.getOutlinks());
      }
      /*
      pos = reader.getPosition();
      // read ahead to seek if there is another adjlist
      reader.next(mStateNid, mStateNode);
      if (mStateNid.get() == key.get()) {
        if (mStateNode.getType() == HITSNode.TYPE_AUTH_COMPLETE)
          avalOut.setAdjacencyList(mStateNode.getAdjacencyList());
        else if (mStateNode.getType() == HITSNode.TYPE_HUB_COMPLETE)
          hvalOut.setAdjacencyList(mStateNode.getAdjacencyList());
      }
      // if not, go back
      else {
        reader.seek(pos);
      }*/
      valOut.setHRank(hrank);
      valOut.setARank(arank);
      valOut.setType(HITSNode.TYPE_NODE_COMPLETE);
      valOut.setNodeId(key.get());

      output.collect(key, valOut);
    }

    public void close() throws IOException {

      // we have to write out the rest of the nodes we haven't finished
      // reading yet (i.e., these are the ones who don't have any messages
      // sent to them)
      // while (reader.next(mStateNid, mStateNode)) {
      // mStateNode.setHARank(Float.NEGATIVE_INFINITY);
      // mOutput.collect(mStateNid, mStateNode);
      // }

      reader.close();
    }
  }

  private static class Norm1Mapper extends MapReduceBase implements
      Mapper<IntWritable, HITSNode, Text, FloatWritable> {

    FloatWritable rank = new FloatWritable();

    public void map(IntWritable key, HITSNode value,
        OutputCollector<Text, FloatWritable> output, Reporter reporter)
        throws IOException {

      int type = value.getType();

      // System.out.println(key.toString() + " " + valOut.toString());
      if (type == HITSNode.TYPE_NODE_COMPLETE) {
        rank.set(value.getARank() * 2);
        output.collect(new Text("A"), rank);
        rank.set(value.getHRank() * 2);
        output.collect(new Text("H"), rank);
      } else {
        System.err.println("Bad Type: " + type);
      }
    }

  }

  private static class Norm1MapperIMC extends MapReduceBase implements
      Mapper<IntWritable, HITSNode, Text, FloatWritable> {

    private static float hsum = Float.NEGATIVE_INFINITY;
    private static float asum = Float.NEGATIVE_INFINITY;
    private static OutputCollector<Text, FloatWritable> mOutput;

    public void configure(JobConf conf) {
      hsum = Float.NEGATIVE_INFINITY;
      asum = Float.NEGATIVE_INFINITY;
    }

    public void map(IntWritable key, HITSNode value,
        OutputCollector<Text, FloatWritable> output, Reporter reporter)
        throws IOException {

      mOutput = output;

      int type = value.getType();
      float arank = value.getARank() * 2;
      float hrank = value.getHRank() * 2;// <===FIXME

      if (type == HITSNode.TYPE_NODE_COMPLETE) {
        asum = sumLogProbs(asum, arank);
        hsum = sumLogProbs(hsum, hrank);
      } else {
        System.err.println("Bad Type: " + type);
      }
    }

    public void close() throws IOException {
      if (hsum != Float.NEGATIVE_INFINITY)
        mOutput.collect(new Text("H"), new FloatWritable(hsum));
      if (asum != Float.NEGATIVE_INFINITY)
        mOutput.collect(new Text("A"), new FloatWritable(asum));
    }

  }

  private static class Norm1Combiner extends MapReduceBase implements
      Reducer<Text, FloatWritable, Text, FloatWritable> {

    public void reduce(Text key, Iterator<FloatWritable> values,
        OutputCollector<Text, FloatWritable> output, Reporter reporter)
        throws IOException {
      float sum = Float.NEGATIVE_INFINITY;
      FloatWritable valIn;

      while (values.hasNext()) {
        valIn = values.next();
        sum = sumLogProbs(sum, valIn.get());
      }

      if (sum != Float.NEGATIVE_INFINITY)
        output.collect(key, new FloatWritable(sum));
    }
  }

  private static class Norm1Reducer extends MapReduceBase implements
      Reducer<Text, FloatWritable, Text, FloatWritable> {

    public void reduce(Text key, Iterator<FloatWritable> values,
        OutputCollector<Text, FloatWritable> output, Reporter reporter)
        throws IOException {
      float sum = Float.NEGATIVE_INFINITY;
      FloatWritable valIn;

      while (values.hasNext()) {
        valIn = values.next();
        sum = sumLogProbs(sum, valIn.get());
      }

      sum = sum / 2; // sqrt

      output.collect(key, new FloatWritable(sum));
    }
  }

  private static class Norm2Mapper extends MapReduceBase implements
      Mapper<IntWritable, HITSNode, IntWritable, HITSNode> {

    private HITSNode nodeOut = new HITSNode();

    private float rootSumA;
    private float rootSumH;

    public void configure(JobConf jconf) {
      rootSumA = jconf.getFloat("rootSumA", 0);
      rootSumH = jconf.getFloat("rootSumH", 0);
    }

    public void map(IntWritable key, HITSNode value,
        OutputCollector<IntWritable, HITSNode> output, Reporter reporter)
        throws IOException {

      // System.out.println("H: " + rootSumH);
      // System.out.println("A: " + rootSumA);
      float arank = value.getARank();
      float hrank = value.getHRank();

      hrank = hrank - rootSumH;
      arank = arank - rootSumA;

      nodeOut.setNodeId(key.get());
      nodeOut.setType(HITSNode.TYPE_NODE_COMPLETE);
      nodeOut.setARank(arank);
      nodeOut.setHRank(hrank);
      nodeOut.setInlinks(value.getInlinks());
      nodeOut.setOutlinks(value.getOutlinks());
      // System.out.println(tupleOut.toString());

      // System.out.println(key.toString() + " " + valOut.toString());
      output.collect(key, nodeOut);
 
    }

  }

  private ArrayList<Float> readSums(JobConf jconf, String pathIn)
      throws Exception {
    ArrayList<Float> output = new ArrayList<Float>();
    float rootSumA = -1;
    float rootSumH = -1;
    SequenceFile.Reader reader = null;
    try {
      Configuration cfg = new Configuration();
      FileSystem fs = FileSystem.get(cfg);
      Path sumsIn = new Path(pathIn);
      // FSDataInputStream in = fs.open(sumsIn);

      reader = new SequenceFile.Reader(fs, sumsIn, jconf);
      Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(),
          jconf);
      FloatWritable value = (FloatWritable) ReflectionUtils.newInstance(
          reader.getValueClass(), jconf);

      while (reader.next(key, value)) {
        // System.out.printf("%s\t%s\n", key, value);
        if (key.toString().equals("A")) {
          rootSumA = value.get();
        } else if (key.toString().equals("H")) {
          rootSumH = value.get();
        } else {
          System.out.println("PROBLEM");
        }
      }
    } catch (IOException e) {
      e.printStackTrace();
    } finally {
      IOUtils.closeStream(reader);
    }

    if (rootSumA == -1 || rootSumH == -1) {
      throw new Exception("error: rootSum == - 1");
    }

    output.add(new Float(rootSumA));
    output.add(new Float(rootSumH));

    return output;
  }

  // adds two log probs
  private static float sumLogProbs(float a, float b) {
    if (a == Float.NEGATIVE_INFINITY)
      return b;

    if (b == Float.NEGATIVE_INFINITY)
      return a;

    if (a < b) {
      return (float) (b + StrictMath.log1p(StrictMath.exp(a - b)));
    }

    return (float) (a + StrictMath.log1p(StrictMath.exp(b - a)));
  }

  public int run(String[] args) throws Exception {

    if (args.length != 9) {
      printUsage();
      return -1;
    }

    String basePath = args[0];
    int n = Integer.parseInt(args[1]);
    int s = Integer.parseInt(args[2]);
    int e = Integer.parseInt(args[3]);
    boolean useCombiner = Integer.parseInt(args[4]) != 0;
    boolean useInmapCombiner = Integer.parseInt(args[5]) != 0;
    boolean useRange = Integer.parseInt(args[6]) != 0;
    int mapTasks = Integer.parseInt(args[7]);
    int reduceTasks = Integer.parseInt(args[8]);

    sLogger.info("Tool name: HubsAndAuthorities");
    sLogger.info(" - base dir: " + basePath);
    sLogger.info(" - node count: " + n);
    sLogger.info(" - start iteration: " + s);
    sLogger.info(" - end iteration: " + e);
    sLogger.info(" - useCombiner: " + useCombiner);
    sLogger.info(" - useInmapCombiner: " + useInmapCombiner);
    sLogger.info(" - useRange: " + useRange);
    sLogger.info(" - number of mappers: " + mapTasks);
    sLogger.info(" - number of reducers: " + reduceTasks);

    for (int i = s; i < e; i++) {
      iterateHA(basePath, i, i + 1, n, useCombiner, useInmapCombiner,
          useRange, mapTasks, reduceTasks);
    }

    return 0;
  }

  public HubsAndAuthoritiesSchimmy() {
  }

  private NumberFormat sFormat = new DecimalFormat("0000");

  private void iterateHA(String path, int i, int j, int n,
      boolean useCombiner, boolean useInmapCombiner, boolean useRange,
      int mapTasks, int reduceTasks) throws IOException {
    HACalc(path, i, j, n, useCombiner, useInmapCombiner, useRange,
        mapTasks, reduceTasks);
    Norm(path, i, j, n, useCombiner, useInmapCombiner, useRange, mapTasks,
        reduceTasks);
  }

  private static int printUsage() {
    System.out
        .println("usage: [base-path] [num-nodes] [start] [end] [useCombiner?] [useInMapCombiner?] [useRange?] [num-mappers] [num-reducers]");
    ToolRunner.printGenericCommandUsage(System.out);
    return -1;
  }

  public int HACalc(String path, int iter, int jter, int nodeCount,
      boolean useCombiner, boolean useInmapCombiner, boolean useRange,
      int mapTasks, int reduceTasks) throws IOException {

    JobConf conf = new JobConf(HubsAndAuthoritiesSchimmy.class);

    String inputPath = path + "/iter" + sFormat.format(iter);
    String outputPath = path + "/iter" + sFormat.format(jter) + "t";

    FileSystem fs = FileSystem.get(conf);

    // int numPartitions = FileSystem.get(conf).listStatus(new
    // Path(inputPath)).length - 1;
    // we need to actually count the number of part files to get the number
    // of partitions (because the directory might contain _log)
    int numPartitions = 0;
    for (FileStatus s : FileSystem.get(conf)
        .listStatus(new Path(inputPath))) {
      if (s.getPath().getName().contains("part-"))
        numPartitions++;
    }
    conf.setInt("NodeCount", nodeCount);

    Partitioner p = null;

    if (useRange) {
      p = new RangePartitioner<IntWritable, Writable>();
      p.configure(conf);
    } else {
      p = new HashPartitioner<WritableComparable, Writable>();
    }

    // this is really annoying: the mapping between the partition numbers on
    // disk (i.e., part-XXXX) and what partition the file contains (i.e.,
    // key.hash % #reducer) is arbitrary... so this means that we need to
    // open up each partition, peek inside to find out.
    IntWritable key = new IntWritable();
    HITSNode value = new HITSNode();
    FileStatus[] status = fs.listStatus(new Path(inputPath));

    StringBuilder sb = new StringBuilder();

    for (FileStatus f : status) {
      if (f.getPath().getName().contains("_logs"))
        continue;

      SequenceFile.Reader reader = new SequenceFile.Reader(fs, f
          .getPath(), conf);

      reader.next(key, value);
      @SuppressWarnings("unchecked")
      int np = p.getPartition(key, value, numPartitions);
      reader.close();

      sLogger.info(f.getPath() + "\t" + np);
      sb.append(np + "=" + f.getPath() + "\t");
    }

    sLogger.info(sb.toString().trim());

    sLogger.info("Tool: HubsAndAuthorities");
    sLogger.info(" - iteration: " + iter);
    sLogger.info(" - number of mappers: " + mapTasks);
    sLogger.info(" - number of reducers: " + reduceTasks);

    conf.setJobName("Iter" + iter + "HubsAndAuthorities");

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(HITSNode.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    if (useInmapCombiner == true) {
      conf.setMapperClass(HAMapperIMC.class);
    } else {
      conf.setMapperClass(HAMapper.class);
    }

    if (useRange == true) {
      conf.setPartitionerClass(RangePartitioner.class);
    }
    conf.setReducerClass(HAReducer.class);

    conf.setInt("jobIter", iter);
    conf.setInt("NodeCount", nodeCount);
    conf.set("PartitionMapping", sb.toString().trim());

    // Delete the output directory if it exists already
    Path outputDir = new Path(outputPath);
    FileSystem.get(conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    JobClient.runJob(conf);
    sLogger.info("Job Finished in "
        + (System.currentTimeMillis() - startTime) / 1000.0
        + " seconds");

    return 0;
  }

  public int Norm(String path, int iter, int jter, int nodeCount,
      boolean useCombiner, boolean useInmapCombiner, boolean useRange,
      int mapTasks, int reduceTasks) throws IOException {

    // FIXME
    String inputPath = path + "/iter" + sFormat.format(jter) + "t";
    String outputPath = path + "/iter" + sFormat.format(jter);
    String tempPath = path + "/sqrt";

    sLogger.info("Tool: Normalizer");
    sLogger.info(" - input path: " + inputPath);
    sLogger.info(" - output path: " + outputPath);
    sLogger.info(" - iteration: " + iter);
    sLogger.info(" - number of mappers: " + mapTasks);
    sLogger.info(" - number of reducers: " + reduceTasks);

    JobConf conf = new JobConf(HubsAndAuthoritiesSchimmy.class);
    conf.setJobName("Iter" + iter + "NormalizerStep1");

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(tempPath));
    FileOutputFormat.setCompressOutput(conf, false);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(FloatWritable.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    if (useInmapCombiner == true) {
      conf.setMapperClass(Norm1MapperIMC.class);
    } else {
      conf.setMapperClass(Norm1Mapper.class);
    }
    if (useCombiner == true) {
      conf.setCombinerClass(Norm1Combiner.class);
    }
    conf.setReducerClass(Norm1Reducer.class);

    JobConf conf2 = new JobConf(HubsAndAuthoritiesSchimmy.class);
    conf2.setJobName("Iter" + iter + "NormalizerStep2");
    conf2.setInt("NodeCount", nodeCount);

    conf2.setNumMapTasks(mapTasks);
    conf2.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(conf2, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf2, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf2, false);

    conf2.setInputFormat(SequenceFileInputFormat.class);
    conf2.setOutputKeyClass(IntWritable.class);
    conf2.setOutputValueClass(HITSNode.class);
    conf2.setOutputFormat(SequenceFileOutputFormat.class);

    conf2.setMapperClass(Norm2Mapper.class);
    if (useRange == true) {
      conf2.setPartitionerClass(RangePartitioner.class);
    }
    conf2.setReducerClass(IdentityReducer.class);

    // Delete the output directory if it exists already
    Path tempDir = new Path(tempPath);
    FileSystem.get(conf).delete(tempDir, true);

    long startTime = System.currentTimeMillis();
    JobClient.runJob(conf);
    sLogger.info("Job Finished in "
        + (System.currentTimeMillis() - startTime) / 1000.0
        + " seconds");

    Path outputDir = new Path(outputPath);

    // read sums
    ArrayList<Float> sums = new ArrayList<Float>();
    try {
      sums = readSums(conf2, tempPath + "/part-00000");
    } catch (Exception e) {
      System.err.println("Failed to read in Sums");
      System.exit(1);
    }

    // conf2.set("rootSumA", sums.get(0).toString());
    conf2.setFloat("rootSumA", sums.get(0));
    // conf2.set("rootSumH", sums.get(1).toString());
    conf2.setFloat("rootSumH", sums.get(1));

    FileSystem.get(conf2).delete(outputDir, true);

    startTime = System.currentTimeMillis();
    JobClient.runJob(conf2);
    sLogger.info("Job Finished in "
        + (System.currentTimeMillis() - startTime) / 1000.0
        + " seconds");

    return 0;
  }

  public static void main(String[] args) throws Exception {
    int res = ToolRunner.run(new Configuration(),
        new HubsAndAuthoritiesSchimmy(), args);
    System.exit(res);
  }

}
TOP

Related Classes of edu.umd.cloud9.example.hits.HubsAndAuthoritiesSchimmy$Norm2Mapper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.