Package com.intel.hadoop.graphbuilder.partition.mapreduce.edge

Source Code of com.intel.hadoop.graphbuilder.partition.mapreduce.edge.EdgeIngressMR

/* Copyright (C) 2012 Intel Corporation.
*     All rights reserved.
*          
*  Licensed under the Apache License, Version 2.0 (the "License");
*  you may not use this file except in compliance with the License.
*  You may obtain a copy of the License at
*
*       http://www.apache.org/licenses/LICENSE-2.0
*
*   Unless required by applicable law or agreed to in writing, software
*   distributed under the License is distributed on an "AS IS" BASIS,
*   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*   See the License for the specific language governing permissions and
*   limitations under the License.
*
* For more about this software visit:
*      http://www.01.org/GraphBuilder
*/
package com.intel.hadoop.graphbuilder.partition.mapreduce.edge;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.log4j.Logger;

import com.intel.hadoop.graphbuilder.graph.GraphOutput;
import com.intel.hadoop.graphbuilder.graph.simplegraph.SimpleGraphOutput;
import com.intel.hadoop.graphbuilder.parser.FieldParser;
import com.intel.hadoop.graphbuilder.parser.GraphParser;
import com.intel.hadoop.graphbuilder.partition.mapreduce.keyvalue.IngressKeyType;
import com.intel.hadoop.graphbuilder.partition.mapreduce.keyvalue.IngressValueType;

/**
* The MapRedue class takes from input directory a list of edges and vertices,
* and output 2 parts: partitioned graphs and a list of distributed vertex
* records.
* <p>
* Input directory: Can take multiple input directories containing list of
* edges. Output directory structure:
* <ul>
* <li>$outputdir/partition{$i}/subpart{$j}/edata for edge data.</li>
* <li>Metafile: $outputdir/partition{$i}/subpart{$j} for meta info.</li>
* <li>Graph structure: $outputdir/partition{$i}/subpart{$j}/edgelist for
* adjacency structure.</li>
* <li>VertexRecords: $outputdir/vrecord list of vertex records.</li>
* </ul>
* </p>
*
*/
public class EdgeIngressMR {

  private static final Logger LOG = Logger.getLogger(EdgeIngressMR.class);

  /** MapReduce Job Counters. */
  public static enum COUNTER {
    NUM_VERTICES, NUM_EDGES
  };

  /**
   * Default constructor, initialize with parsers.
   *
   * @param graphparser
   * @param vidparser
   * @param vdataparser
   * @param edataparser
   */
  public EdgeIngressMR(Class graphparser, Class vidparser, Class vdataparser,
      Class edataparser) {
    gzip = false;
    jobName = "Ingress Mapreduce Driver";
    setParser(graphparser, vidparser, vdataparser, edataparser);
    conf = new JobConf(EdgeIngressMR.class);
  }

  /**
   * Set the parser class.
   *
   * @param parser
   */
  public void setParser(Class graphparser, Class vidparser, Class vdataparser,
      Class edataparser) {
    try {
      this.graphparser = (GraphParser) graphparser.newInstance();
      this.vidparser = (FieldParser) vidparser.newInstance();
      this.vdataparser = (FieldParser) vdataparser.newInstance();
      this.edataparser = (FieldParser) edataparser.newInstance();
    } catch (InstantiationException e) {
      e.printStackTrace();
      LOG.fatal("Parser classes: \n" + graphparser + "\n" + vidparser + "\n"
          + vdataparser + "\n" + edataparser + " do not exist.");
    } catch (IllegalAccessException e) {
      e.printStackTrace();
      LOG.fatal("Parser classes: \n" + graphparser + "\n" + vidparser + "\n"
          + vdataparser + "\n" + edataparser + " do not exist.");
    }
  }

  /**
   * Set the job name.
   *
   * @param name
   */
  public void setJobName(String name) {
    this.jobName = name;
  }

  /**
   * Set option for using gzip compression in output.
   *
   * @param gzip
   */
  public void useGzip(boolean gzip) {
    this.gzip = gzip;
  }

  /**
   * Set the ingress strategy {random, oblivious}.
   *
   * @see {ObliviousIngress}
   * @see {RandomIngress}
   * @param ingress
   */
  public void setIngress(String ingress) {
    if (ingress.equals("random") || ingress.equals("greedy"))
      this.ingress = ingress;
    else {
      LOG.error("Unknown ingress method: " + ingress
          + "\n Supported ingress methods: oblivious, random");
      LOG.error("Use the default oblivious ingress");
      this.ingress = "greedy";
    }
  }

  /**
   * Set the intermediate key value class.
   *
   * @param keyClass
   * @param valClass
   */
  public void setKeyValueClass(Class keyClass, Class valClass) {
    try {
      this.mapkeytype = (IngressKeyType) keyClass.newInstance();
      this.mapvaltype = (IngressValueType) valClass.newInstance();
    } catch (InstantiationException e) {
      e.printStackTrace();
    } catch (IllegalAccessException e) {
      e.printStackTrace();
    }
  }

  /**
   * @return JobConf of the current job.
   */
  public JobConf getConf() {
    return conf;
  }


  /**
   * @param inputpath
   * @param outputpath
   * @param numProcs
   * @param ingress
   * @throws IOException
   */
  public void run(String[] inputpaths, String outputpath, int numProcs,
      String ingress) throws IOException {
    this.setIngress(ingress);
    conf.setJobName(jobName);
    if (this.subpartPerPartition <= 0)
      this.subpartPerPartition = 8;

    LOG.info("===== Job: Partition edges and create vertex records =========");
    LOG.info("input: " + StringUtils.join(inputpaths, ","));
    LOG.info("output: " + outputpath);
    LOG.info("numProc = " + numProcs);
    LOG.info("subpartPerPartition = " + subpartPerPartition);
    LOG.info("keyclass = " + this.mapkeytype.getClass().getName());
    LOG.info("valclass = " + this.mapvaltype.getClass().getName());
    LOG.debug("graphparser = " + this.graphparser.getClass().getName());
    LOG.debug("vidparser = " + this.vidparser.getClass().getName());
    LOG.debug("vdataparser = " + this.vdataparser.getClass().getName());
    LOG.debug("edataparser = " + this.edataparser.getClass().getName());
    LOG.info("ingress = " + this.ingress);
    LOG.info("gzip = " + Boolean.toString(gzip));
    LOG.info("===============================================================");

    conf.set("ingress", this.ingress);
    conf.setInt("numProcs", numProcs);
    conf.set("GraphParser", graphparser.getClass().getName());
    conf.set("VidParser", vidparser.getClass().getName());
    conf.set("VdataParser", vdataparser.getClass().getName());
    conf.set("EdataParser", edataparser.getClass().getName());
    conf.setInt("subpartPerPartition", subpartPerPartition);

    conf.setMapOutputKeyClass(this.mapkeytype.getClass());
    conf.setMapOutputValueClass(this.mapvaltype.getClass());

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(EdgeIngressMapper.class);
    conf.setCombinerClass(EdgeIngressCombiner.class);
    conf.setReducerClass(EdgeIngressReducer.class);

    // GraphOutput output = new GLGraphOutput(numProcs);
    GraphOutput output = new SimpleGraphOutput();
    output.init(conf);

    conf.setInputFormat(TextInputFormat.class);
    // conf.setOutputFormat(PartitionedGraphOutputFormat.class);

    if (gzip) {
      TextOutputFormat.setCompressOutput(conf, true);
      TextOutputFormat.setOutputCompressorClass(conf, GzipCodec.class);
    }

    for (String path : inputpaths)
      FileInputFormat.addInputPath(conf, new Path(path));
    FileOutputFormat.setOutputPath(conf, new Path(outputpath));

    if (!checkTypes()) {
      LOG.fatal("Type check failed."
          + "Please check the parsers are consistent with key/val types.");
      return;
    }

    JobClient.runJob(conf);
    LOG.info("================== Done ====================================\n");
  }

  /**
   * Ensure the keytype, valuetype are consistent with the parser type.
   * @return true if type check.
   */
  private boolean checkTypes() {
    boolean check = true;

    if (!(mapkeytype.createVid().getClass()).equals(mapvaltype
        .getGraphTypeFactory().createVid().getClass())) {
      LOG.fatal("VidType is not consistant between MapKeyType: "
          + mapkeytype.createVid().getClass().getName() + " and MapValueType: "
          + mapvaltype.getGraphTypeFactory().createVid().getClass().getName());
      check = false;
    }

    if (!(vidparser.getType()).equals(mapkeytype.createVid().getClass())) {
      LOG.fatal("VidType is not consistant between MapKeyType: "
          + mapkeytype.createVid().getClass().getName() + " and Parser: "
          + vidparser.getType().getName());
      check = false;
    }

    if (!(vdataparser.getType().equals(mapvaltype.getGraphTypeFactory()
        .createVdata().getClass()))) {
      LOG.fatal("VertexDataType is not consistant between MapValueType: "
          + mapvaltype.getGraphTypeFactory().createVdata().getClass().getName()
          + " and Parser: " + vdataparser.getType().getName());
      check = false;
    }

    if (!(edataparser.getType().equals(mapvaltype.getGraphTypeFactory()
        .createEdata().getClass()))) {
      LOG.fatal("EdgeDataType is not consistant between MapValueType: "
          + mapvaltype.getGraphTypeFactory().createEdata().getClass().getName()
          + " and Parser: " + edataparser.getType().getName());
      check = false;
    }

    return check;
  }

  /**
   * Set the number of subpartitions per real partition.
   * @param n number of subpartitions per real partition.
   */
  public void setTotalSubPartition(int n) {
    this.subpartPerPartition = n;
  }

  private JobConf   conf;
  private GraphParser graphparser;
  private FieldParser vidparser;
  private FieldParser vdataparser;
  private FieldParser edataparser;

  private boolean gzip;
  private String jobName;
  private String ingress;
  private int subpartPerPartition;
  private IngressKeyType mapkeytype;
  private IngressValueType mapvaltype;
}
TOP

Related Classes of com.intel.hadoop.graphbuilder.partition.mapreduce.edge.EdgeIngressMR

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.