Package edu.cmu.graphchi.hadoop

Source Code of edu.cmu.graphchi.hadoop.PigGraphChiBase

package edu.cmu.graphchi.hadoop;

import edu.cmu.graphchi.ChiLogger;
import edu.cmu.graphchi.preprocessing.EdgeProcessor;
import edu.cmu.graphchi.preprocessing.FastSharder;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.pig.*;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigTextInputFormat;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.tools.pigstats.PigStatusReporter;

import java.io.IOException;
import java.util.logging.Logger;

/**
* Special PIG loader that wraps a graphchi application. This
* allows execution of GraphChi programs under Hadoop/PIG.
*
* Generally, if you have an GraphChi application extending PigGraphChibase,
* you can use it by calling in PIG:
*   results = LOAD '$GRAPH' USING  my.app.GraphChiApp() as (...)
*
*  Above, $GRAPH is a path where you have stored a file in edge list format:
*   mydata = FOREACH mygraph GENERATE from_id, to_id, edge_value;
*   STORE mydata INTO '$GRAPH'
*
* For an example, see
* @see edu.cmu.graphchi.apps.pig.PigPagerank
*/
public abstract class PigGraphChiBase  extends LoadFunc implements LoadMetadata {

    private static final Logger logger = ChiLogger.getLogger("pig-graphchi-base");
    private String location;
    private boolean activeNode = false;
    private Job job;
    private boolean ready = false;
    private String status = "initializing";

    protected PigGraphChiBase() {
    }

    // Example: (vertex:int, value:float)"
    protected abstract String getSchemaString();

    @Override
    public ResourceSchema getSchema(String str, Job job) throws IOException {
        return null;
    }

    @Override
    public ResourceStatistics getStatistics(String s, Job job) throws IOException {
        return null;
    }

    @Override
    public String[] getPartitionKeys(String s, Job job) throws IOException {
        return null; // Disable partition
    }

    @Override
    public void setPartitionFilter(Expression expression) throws IOException {
    }

    @Override
    public InputFormat getInputFormat() throws IOException {
        return new PigTextInputFormat();
    }

    protected abstract int getNumShards();

    protected String getGraphName() {
        return "pigudfgraph";
    }

    @Override
    public void setLocation(String location, Job job)
            throws IOException {
        logger.info("Set HDFS location for GraphChi Pig: " + location);
        PigTextInputFormat.setInputPaths(job, location);
        this.location = location;
        this.job = job;
    }


    public void setStatusString(String status) {
         this.status = status;
    }

    protected abstract void runGraphChi() throws Exception;

    protected abstract FastSharder createSharder(String graphName, int numShards) throws IOException;

    @Override
    public void prepareToRead(final RecordReader recordReader, final PigSplit pigSplit) throws IOException {
        try {

            int j = 0;
            for(String s : pigSplit.getLocations()) {
                System.out.println((j++) + "Split : " + s);
            }
            System.out.println("Num paths: " + pigSplit.getNumPaths());
            System.out.println("" + pigSplit.getConf());
            System.out.println("split index " + pigSplit.getSplitIndex());


            Thread progressThread = new Thread(new Runnable() {
                public void run() {
                    int i = 0;
                    while(!ready) {
                        PigStatusReporter.getInstance().progress();
                        PigStatusReporter.getInstance().setStatus("GraphChi running (" + i + "): " + getStatusString());
                        try {
                            Thread.sleep(5000);
                        } catch (InterruptedException ioe) {}
                    }

                }

            });
            progressThread.start();

            if (pigSplit.getSplitIndex() > 0) {
                PigStatusReporter.getInstance().setStatus("Redundant GraphChi-mapper - will die");
                throw new RuntimeException("Split index > 0 -- this mapper will die (expected, not an error).");
            }

            activeNode = true;

            Thread chiThread = new Thread(new Runnable() {
                public void run() {
                    try {
                        setStatusString("Preprocessing: reading data from HDFS: " + location);
                        final FastSharder sharder = createSharder(getGraphName(), getNumShards());


                        HDFSGraphLoader hdfsLoader = new HDFSGraphLoader(location, new EdgeProcessor<Float>() {
                            long counter = 0;

                            public Float receiveEdge(int from, int to, String token) {
                                try {
                                    sharder.addEdge(from, to, token);
                                    counter++;
                                    if (counter % 100000 == 0) {
                                        setStatusString("Preprocessing, read " + counter + " edges");
                                    }
                                } catch (IOException e) {
                                    throw new RuntimeException(e);
                                }
                                return null;
                            }
                        });

                        hdfsLoader.load(pigSplit.getConf());

                        setStatusString("Sharding...");
                        sharder.process();

                        logger.info("Starting to run GraphChi");
                        setStatusString("Start GraphChi engine");
                        runGraphChi();
                        logger.info("Ready.");
                    } catch (Exception err) {
                        err.printStackTrace();
                    }
                    ready = true;
                }});
            chiThread.start();

        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    protected String getStatusString() {
        return this.status;
    }

    protected abstract Tuple getNextResult(TupleFactory tupleFactory) throws ExecException;

    @Override
    public Tuple getNext() throws IOException {
        if (!activeNode) return null;
        while (!ready) {
            logger.info("GraphChi-Java running: waiting for graphchi-engine to finish: " + this.getStatusString());
            PigStatusReporter.getInstance().setStatus(getStatusString());
            PigStatusReporter.getInstance().progress();
            try {
                Thread.sleep(5000);
            } catch (InterruptedException ioe) {
            }
        }
        return getNextResult(TupleFactory.getInstance());
    }

}
TOP

Related Classes of edu.cmu.graphchi.hadoop.PigGraphChiBase

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.