package edu.cmu.graphchi.apps;
import edu.cmu.graphchi.*;
import edu.cmu.graphchi.datablocks.IntConverter;
import edu.cmu.graphchi.engine.GraphChiEngine;
import edu.cmu.graphchi.engine.VertexInterval;
import edu.cmu.graphchi.preprocessing.EdgeProcessor;
import edu.cmu.graphchi.preprocessing.FastSharder;
import edu.cmu.graphchi.preprocessing.VertexProcessor;
import edu.cmu.graphchi.util.LabelAnalysis;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.logging.Logger;
/**
* Example application for computing the weakly connected components
* of a graph. The algorithm uses label exchange: each vertex first chooses
* a label equaling its id; on the subsequent iterations each vertex sets
* its label to be the minimum of the neighbors' labels and its current label.
* Algorithm finishes when no labels change. Each vertex with same label belongs
* to same component.
* @author akyrola
*/
public class ConnectedComponents implements GraphChiProgram<Integer, Integer> {
private static Logger logger = ChiLogger.getLogger("connectedcomponents");
public void update(ChiVertex<Integer, Integer> vertex, GraphChiContext context) {
final int iteration = context.getIteration();
final int numEdges = vertex.numEdges();
/* On first iteration, each vertex chooses a label equalling its id */
if (iteration == 0) {
vertex.setValue(vertex.getId());
/* Schedule the vertex itself for execution on next iteration */
context.getScheduler().addTask(vertex.getId());
}
/* Choose the smallest id of neighbor vertices. Each vertex
writes its label to its edges, so it can be accessed by neighbors.
*/
int curMin = vertex.getValue();
for(int i=0; i < numEdges; i++) {
int nbLabel = vertex.edge(i).getValue();
if (iteration == 0) nbLabel = vertex.edge(i).getVertexId(); // Note!
if (nbLabel < curMin) {
curMin = nbLabel;
}
}
/**
* Set my new label
*/
vertex.setValue(curMin);
int label = curMin;
/**
* Broadcast my value to neighbors by writing the value to my edges.
*/
if (iteration > 0) {
for(int i=0; i < numEdges; i++) {
if (vertex.edge(i).getValue() > label) {
vertex.edge(i).setValue(label);
context.getScheduler().addTask(vertex.edge(i).getVertexId());
}
}
} else {
// Special case for first iteration to avoid overwriting
for(int i=0; i < vertex.numOutEdges(); i++) {
vertex.outEdge(i).setValue(label);
}
}
}
public void beginIteration(GraphChiContext ctx) {}
public void endIteration(GraphChiContext ctx) {}
public void beginInterval(GraphChiContext ctx, VertexInterval interval) {}
public void endInterval(GraphChiContext ctx, VertexInterval interval) {}
public void beginSubInterval(GraphChiContext ctx, VertexInterval interval) {}
public void endSubInterval(GraphChiContext ctx, VertexInterval interval) {}
/**
* Initialize the sharder-program.
* @param graphName
* @param numShards
* @return
* @throws java.io.IOException
*/
protected static FastSharder createSharder(String graphName, int numShards) throws IOException {
return new FastSharder<Integer, Integer>(graphName, numShards, new VertexProcessor<Integer>() {
public Integer receiveVertexValue(int vertexId, String token) {
return 0;
}
}, new EdgeProcessor<Integer>() {
public Integer receiveEdge(int from, int to, String token) {
return 0;
}
}, new IntConverter(), new IntConverter());
}
/**
* Usage: java edu.cmu.graphchi.demo.ConnectedComponents graph-name num-shards filetype(edgelist|adjlist)
* For specifying the number of shards, 20-50 million edges/shard is often a good configuration.
*/
public static void main(String[] args) throws Exception {
String baseFilename = args[0];
int nShards = Integer.parseInt(args[1]);
String fileType = (args.length >= 3 ? args[2] : null);
/* Create shards */
FastSharder sharder = createSharder(baseFilename, nShards);
if (baseFilename.equals("pipein")) { // Allow piping graph in
sharder.shard(System.in, fileType);
} else {
if (!new File(ChiFilenames.getFilenameIntervals(baseFilename, nShards)).exists()) {
sharder.shard(new FileInputStream(new File(baseFilename)), fileType);
} else {
logger.info("Found shards -- no need to preprocess");
}
}
/* Run GraphChi ... */
GraphChiEngine<Integer, Integer> engine = new GraphChiEngine<Integer, Integer>(baseFilename, nShards);
engine.setEdataConverter(new IntConverter());
engine.setVertexDataConverter(new IntConverter());
engine.setEnableScheduler(true);
engine.run(new ConnectedComponents(), 5);
logger.info("Ready. Going to output...");
/* Process output. The output file has format <vertex-id, component-id> */
LabelAnalysis.computeLabels(baseFilename, engine.numVertices(), engine.getVertexIdTranslate());
logger.info("Finished. See file: " + baseFilename + ".components");
}
}