Package edu.cmu.graphchi.apps.pig

Source Code of edu.cmu.graphchi.apps.pig.PigWeightedPagerank

package edu.cmu.graphchi.apps.pig;

import edu.cmu.graphchi.ChiVertex;
import edu.cmu.graphchi.GraphChiContext;
import edu.cmu.graphchi.GraphChiProgram;
import edu.cmu.graphchi.datablocks.FloatConverter;
import edu.cmu.graphchi.datablocks.FloatPair;
import edu.cmu.graphchi.datablocks.FloatPairConverter;
import edu.cmu.graphchi.engine.GraphChiEngine;
import edu.cmu.graphchi.engine.VertexInterval;
import edu.cmu.graphchi.hadoop.PigGraphChiBase;
import edu.cmu.graphchi.preprocessing.EdgeProcessor;
import edu.cmu.graphchi.preprocessing.FastSharder;
import edu.cmu.graphchi.preprocessing.VertexProcessor;
import edu.cmu.graphchi.vertexdata.VertexAggregator;
import edu.cmu.graphchi.vertexdata.VertexIdValue;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;

import java.io.IOException;
import java.util.Iterator;
import java.util.logging.Logger;

/**
* Example application:  WeightedPageRank (http://en.wikipedia.org/wiki/Pagerank)
* Iteratively computes a pagerank for each vertex by averaging the pageranks
* of in-neighbors pageranks. Uses edge weights to implemented weighted version of pagerank.
*
* This version can be used with <a href="http://pig.apache.org">Pig</a> in a Hadoop cluster.
*
* Example PIG script for running this:
*
* <pre>
*     REGISTER graphchi-java-0.2-jar-with-dependencies.jar;
*
*     pagerank = LOAD 'graphs/soc-LiveJournal1.txt' USING edu.cmu.graphchi.demo.pig.PigPagerank as (vertex:int, rank:float);
*
*     STORE pagerank INTO 'pagerank-livejournal';
* </pre>
*
* (To get the livejournal graph, visit: http://snap.stanford.edu/data/soc-LiveJournal1.html)
*
* @see edu.cmu.graphchi.hadoop.PigGraphChiBase
* @author Jerry Ye
*/
public class PigWeightedPagerank extends PigGraphChiBase implements GraphChiProgram<Float, FloatPair> {

    private static Logger logger = Logger.getLogger("weighted_pagerank");

    public void update(ChiVertex<Float, FloatPair> vertex, GraphChiContext context)  {
        if (context.getIteration() == 0) {
            /* Initialize on first iteration */
            vertex.setValue(1.0f);
        } else {
            /* On other iterations, set my value to be the weighted
               average of my in-coming neighbors pageranks.
             */
            float sum = 0.f;
            for(int i=0; i<vertex.numInEdges(); i++) {
                sum += vertex.inEdge(i).getValue().second;
            }
            vertex.setValue(0.15f + 0.85f * sum);
        }

        /* Accumulate edge weights */
        float edgeWeightSum = 0.f;
        for(int i=0; i<vertex.numOutEdges(); i++) {
            edgeWeightSum += vertex.outEdge(i).getValue().first;
        }

        /* Write my value (divided by my out-degree) to my out-edges so neighbors can read it. */
        for(int i=0; i<vertex.numOutEdges(); i++) {
            FloatPair curValue = vertex.outEdge(i).getValue();
            float edgeWeight = vertex.outEdge(i).getValue().first;
            vertex.outEdge(i).setValue(new FloatPair(edgeWeight, vertex.getValue() * edgeWeight/edgeWeightSum));
        }

    }

    /**
     * Callbacks (not needed for Pagerank)
     */
    public void beginIteration(GraphChiContext ctx) {}
    public void endIteration(GraphChiContext ctx) {}
    public void beginInterval(GraphChiContext ctx, VertexInterval interval) {}
    public void endInterval(GraphChiContext ctx, VertexInterval interval) {}
    public void beginSubInterval(GraphChiContext ctx, VertexInterval interval) {}
    public void endSubInterval(GraphChiContext ctx, VertexInterval interval) {}


    /**
     * PIG integration
     */

    // Objects needed for iterating the results
    private Iterator<VertexIdValue<Float>> vertexIterator;


    @Override
    /**
     * Pig column names
     */
    protected String getSchemaString() {
        return "(vertex:int, weight:float)";
    }

    @Override
    protected int getNumShards() {
        return 12// Unfortunately, currently hard-coded.
    }

    @Override
    /**
     * Runs the GraphChi program
     */
    protected void runGraphChi() throws Exception {
        /* Run GraphChi */
        GraphChiEngine<Float, FloatPair> engine = new GraphChiEngine<Float, FloatPair>(getGraphName(), getNumShards());
        engine.setEdataConverter(new FloatPairConverter());
        engine.setVertexDataConverter(new FloatConverter());
        engine.setModifiesInedges(false); // Important optimization

        engine.run(this, 4);

        logger.info("Ready.");

        /* Create iterator for the vertex values */
        this.vertexIterator = VertexAggregator.vertexIterator(engine.numVertices(), getGraphName(), new FloatConverter(),
                engine.getVertexIdTranslate());
    }

    @Override
    /**
     * Constructs "sharder", which takes an edge list and
     * converts it to internal binary representation of GraphChi.
     */
    protected FastSharder createSharder(String graphName, int numShards) throws IOException {
        return new FastSharder<Float, FloatPair>(graphName, numShards, new VertexProcessor<Float>() {
            public Float receiveVertexValue(int vertexId, String token) {
                return (token == null ? 0.0f : Float.parseFloat(token));
            }
        }, new EdgeProcessor<FloatPair>() {
            public FloatPair receiveEdge(int from, int to, String token) {
                return new FloatPair(Float.parseFloat(token), 0.f);
            }
        }, new FloatConverter(), new FloatPairConverter());
    }

    @Override
    /**
     * Generates the output to the Pig script, tuple by tuple
     */
    protected Tuple getNextResult(TupleFactory tupleFactory) throws ExecException {
        if (vertexIterator.hasNext()) {
            Tuple t = tupleFactory.newTuple(2);
            VertexIdValue<Float> val = vertexIterator.next();
            t.set(0, val.getVertexId());
            t.set(1, val.getValue());
            return t;
        } else {
            return null;
        }
    }
}
TOP

Related Classes of edu.cmu.graphchi.apps.pig.PigWeightedPagerank

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.