Source Code of com.intel.hadoop.graphbuilder.job.AbstractPartitionJob

/* Copyright (C) 2012 Intel Corporation.
 *     All rights reserved.
 *           
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *   limitations under the License.
 *
 * For more about this software visit:
 *      http://www.01.org/GraphBuilder 
 */
package com.intel.hadoop.graphbuilder.job;


import javassist.CannotCompileException;
import javassist.NotFoundException;


import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;


import com.intel.hadoop.graphbuilder.parser.BasicGraphParser;
import com.intel.hadoop.graphbuilder.parser.GraphParser;
import com.intel.hadoop.graphbuilder.parser.ParserFactory;
import com.intel.hadoop.graphbuilder.partition.mapreduce.edge.EdgeIngressMR;
import com.intel.hadoop.graphbuilder.partition.mapreduce.keyvalue.IngressJobKeyValueFactory;
import com.intel.hadoop.graphbuilder.partition.mapreduce.vrecord.VrecordIngressMR;
import com.intel.hadoop.graphbuilder.util.FsUtil;


/**
 * An abstract wrapper class for running the Partitioning Job. User needs to
 * override 3 methods: {@code vidClass(), vdataClass(), and edataClass()} to
 * generate the right parsers for parsing the input data.
 * {@code BasicGraphParser} is used, and can be replaced by overriding the
 * {@graphParserClass()} method.
 * <p>
 * See an example in {@code PartitionJobTest}.
 * </p>
 *
 * Input directories contains hybrid of edge and vertex data. Output
 * directories:
 * <ul>
 * <li>$outputdir/edges/partition{$i} edges for partition{$i}</li>
 * <li>$outputdir/vrecords/partition{$i} vertex records for partition{$i}</li>
 * </ul>
 *
 * @see BasicGraphParser
 * @see GraphParser
 * @see PartitionJobTest
 *
 * @param <VidType>
 * @param <VertexData>
 * @param <EdgeData>
 */
public abstract class AbstractPartitionJob<VidType extends WritableComparable<VidType>, VertexData extends Writable, EdgeData extends Writable> {


  /**
   * @return the class of vertex id type
   */
  public abstract Class vidClass();


  /**
   * @return the class of vertex data type
   */
  public abstract Class vdataClass();


  /**
   * @return the class of edge data type
   */
  public abstract Class edataClass();


  /**
   * @return the class of graph parser type
   */
  public Class graphParserClass() {
    return BasicGraphParser.class;
  }


  /**
   * Running the partitioning job with {@code nparts} partitions. Reads input
   * from {@code inputs}, and outputs to {@code output} directory.
   *
   * @param nparts
   * @param inputs
   * @param output
   * @return
   * @throws NotFoundException
   * @throws CannotCompileException
   */
  public boolean run(int nparts, String[] inputs, String output)
      throws NotFoundException, CannotCompileException {
    EdgeIngressMR job1 = new EdgeIngressMR(
        graphParserClass(),
        ParserFactory
        .getParserByClassName(vidClass().getName()).getClass(),
        ParserFactory
        .getParserByClassName(vdataClass().getName()).getClass(),
        ParserFactory
        .getParserByClassName(edataClass().getName()).getClass());
    job1.setKeyValueClass(IngressJobKeyValueFactory
        .getKeyClassByClassName(vidClass().getName()),
        IngressJobKeyValueFactory.getValueClassByClassName(
            vidClass().getName(), vdataClass().getName(), edataClass()
                .getName()));
    // Distribute new class file to cluster.
    FsUtil.distributedTempClassToClassPath(job1.getConf());
    VrecordIngressMR job2 = new VrecordIngressMR();


    try {
      job1.run(inputs, output + "/edges", nparts, "greedy");
      job2.run(nparts, output + "/edges/vrecord", output + "/vrecords");
    } catch (Exception e) {
      e.printStackTrace();
      return false;
    }
    return true;
  }
}
Source Code of com.intel.hadoop.graphbuilder.job.AbstractPartitionJob

Related Classes of com.intel.hadoop.graphbuilder.job.AbstractPartitionJob