package edu.cmu.graphchi.queries.demo;
import edu.cmu.graphchi.ChiFilenames;
import edu.cmu.graphchi.ChiLogger;
import edu.cmu.graphchi.datablocks.FloatConverter;
import edu.cmu.graphchi.preprocessing.VertexIdTranslate;
import edu.cmu.graphchi.queries.VertexQuery;
import edu.cmu.graphchi.util.IdCount;
import edu.cmu.graphchi.util.MultinomialSampler;
import edu.cmu.graphchi.vertexdata.VertexAggregator;
import edu.cmu.graphchi.vertexdata.VertexIdValue;
import java.io.*;
import java.util.*;
import java.util.logging.Logger;
/**
* Demonstration of the queryAndCombine capabilities of GraphChi.
* With this app, after you have computed PageRank for each vertex,
* you can make simple friends-recommendations queries: querying for user X
* will recommend the top friends (out-edges) of user X's friends (out-edges).
* @author Aapo Kyrola
*/
public class FriendsOfFriends {
private VertexQuery queryEngine;
private static final Logger logger = ChiLogger.getLogger("fof");
private VertexIdTranslate translator;
private String baseFilename;
private boolean weightByPagerank;
private int numShards;
private float[] ranks; // Note: ranks are by the *internal vertex id*.
private BufferedWriter logWriter = new BufferedWriter(new FileWriter("fof.log"));
/**
* Construct friends-of-friends (of followees of followees in Twitter parlance)
* engine.
* @param baseFilename graph name
* @param numShards number of shards
* @param weightByPagerank whether to weight sampling by pagerank
* @throws IOException
*/
public FriendsOfFriends(String baseFilename, int numShards, boolean weightByPagerank) throws IOException {
this.queryEngine = new VertexQuery(baseFilename, numShards);
this.baseFilename = baseFilename;
this.weightByPagerank = weightByPagerank;
this.numShards = numShards;
this.translator = VertexIdTranslate.fromFile(new File(ChiFilenames.getVertexTranslateDefFile(baseFilename, numShards)));
if (weightByPagerank) {
loadRanks();
}
}
private void loadRanks() throws IOException {
logger.info("Loading ranks...");
long st = System.currentTimeMillis();
int numVertices = ChiFilenames.numVertices(baseFilename, numShards);
ranks = new float[numVertices];
Iterator<VertexIdValue<Float>> iter = VertexAggregator.vertexIterator(numVertices, baseFilename,
new FloatConverter(), VertexIdTranslate.identity());
while(iter.hasNext()) {
VertexIdValue<Float> idVal = iter.next();
ranks[idVal.getVertexId()] = idVal.getValue();
}
logger.info("Loaded ranks to memory in " + (System.currentTimeMillis() - st) + " ms");
}
/**
* Recommend friends
* @param vertexId queryAndCombine vertex
* @param fanOut maximum of queryAndCombine vertex's friends to consider. Selected randomly.
* @throws IOException
*/
public String recommendFriends(int vertexId, int fanOut) throws IOException {
int internalId = translator.forward(vertexId);
logger.info("Querying for " + namify(vertexId) + " --> " + internalId);
long stTime = System.currentTimeMillis();
int total = 0;
HashMap<Integer, Integer> friendsOfFriends;
final HashSet<Integer> friends = queryEngine.queryOutNeighbors(internalId);
long t = System.currentTimeMillis() - stTime;
logger.info("Found " + friends.size() + " friends in " + t + " ms.");
int origFriendsSize = friends.size();
Random r = new Random();
if (friends.size() > fanOut) {
ArrayList<Integer> friendsAll = new ArrayList<Integer>(friends);
friends.clear();
if (ranks != null) {
// Going to sample by PageRank
float[] weights = new float[friendsAll.size()];
for(int i=0; i<weights.length; i++) {
weights[i] = ranks[friendsAll.get(i)];
}
int[] samples = MultinomialSampler.generateSamplesAliasMethod(r, weights, fanOut * 2);
for(int i : samples) {
friends.add(friendsAll.get(i));
if (friends.size() == fanOut) break;
}
} else {
for(int i=0; i < fanOut; i++) friends.add(friendsAll.get(Math.abs(r.nextInt()) % friendsAll.size()));
}
}
if (friends.size() == 0) {
return "";
}
stTime = System.currentTimeMillis();
friendsOfFriends = queryEngine.queryOutNeighborsAndCombine(friends);
friendsOfFriends.remove(internalId);
long t2 = (System.currentTimeMillis() - stTime);
for(int friend : friends) friendsOfFriends.remove(friend);
logger.info("Found " + friendsOfFriends.size() + " friends-of-friends (that are not friends) in " +
t2 + "ms");
/* Take only top */
int k = 20;
TreeSet<IdCount> counts = new TreeSet<IdCount>();
for(Map.Entry<Integer, Integer> e : friendsOfFriends.entrySet()) {
if (counts.size() < k) {
counts.add(new IdCount(translator.backward(e.getKey()), e.getValue()));
} else {
int smallest = counts.last().count;
if (e.getValue() > smallest) {
//counts.remove(counts.last());
counts.pollLast();
counts.add(new IdCount(translator.backward(e.getKey()), e.getValue()));
}
}
}
String result = "";
for(IdCount top : counts) {
System.out.println(namify(top.id) + " : " + top.count);
result += namify(top.id) + " : " + top.count + "\n";
}
logWriter.write(origFriendsSize + "," + t + "," + t2 + "\n");
return result;
}
private String namify(Integer value) throws IOException {
File f = new File(baseFilename + "_names.dat");
if (!f.exists()) {
// System.out.println("didn't find name file: " + f.getPath());
return value+"";
}
int i = value * 16;
RandomAccessFile raf = new RandomAccessFile(f.getAbsolutePath(), "r");
raf.seek(i);
byte[] tmp = new byte[16];
raf.read(tmp);
raf.close();
return new String(tmp) + "(" + value + ")";
}
public static void main(String[] args) throws Exception {
String baseFilename = args[0];
int numShards = Integer.parseInt(args[1]);
FriendsOfFriends fof = new FriendsOfFriends(baseFilename, numShards, false);
BufferedReader cmdIn = new BufferedReader(new InputStreamReader(System.in));
while(true) {
System.out.print("Enter vertex id to get friends-of-friends >> :: ");
String ln = cmdIn.readLine();
if (ln.startsWith("q")) break;
if (ln.startsWith("t")) {
for(int i=10; i < 1000; i++) {
fof.recommendFriends(i, 1000);
}
break;
}
if (ln.startsWith("b")) {
// Benchmark
fof.queryEngine.shutdown();
fof = new FriendsOfFriends(baseFilename, numShards, false);
int numVertices = ChiFilenames.numVertices(baseFilename, numShards);
Random r = new Random();
for(int i=0; i<1000000; i++) {
int vId = r.nextInt(numVertices);
if (vId % 10 <= 4) { // 50% of time look for lower ids which have higher degree
vId = r.nextInt(numVertices % 100000);
}
fof.recommendFriends(vId, 4000);
if (i % 1000 == 0) {
logger.info("Benchmark round " + i);
}
fof.logWriter.flush();
}
}
int queryId = Integer.parseInt(ln);
fof.recommendFriends(queryId, 500);
}
fof.queryEngine.shutdown();
}
}