package edu.cmu.graphchi.queries;
import java.io.*;
import java.util.*;
import java.util.concurrent.*;
import java.util.logging.Logger;
import edu.cmu.graphchi.ChiFilenames;
import edu.cmu.graphchi.ChiLogger;
import edu.cmu.graphchi.datablocks.BytesToValueConverter;
import edu.cmu.graphchi.engine.auxdata.DegreeData;
import edu.cmu.graphchi.engine.auxdata.VertexDegree;
import edu.cmu.graphchi.io.CompressedIO;
import edu.cmu.graphchi.shards.ShardIndex;
import edu.cmu.graphchi.vertexdata.VertexIdValue;
import ucar.unidata.io.RandomAccessFile;
/**
* Disk-based queries of out-edges of a vertex.
* <b>Note:</b> all vertex-ids in *internal* vertex id space.
* @author Aapo Kyrola
*/
public class VertexQuery {
private static final int NTHREADS = 4;
private static final Logger logger = ChiLogger.getLogger("vertexquery");
private ArrayList<Shard> shards;
private ExecutorService executor;
public VertexQuery(String baseFilename, int numShards) throws IOException{
shards = new ArrayList<Shard>();
for(int i=0; i<numShards; i++) {
shards.add(new Shard(baseFilename, i, numShards));
}
executor = Executors.newFixedThreadPool(NTHREADS);
}
/**
* Queries all out neighbors of given vertices and returns a hashmap with (vertex-id, count),
* where count is the number of queryAndCombine vertices who had the vertex-id as neighbor.
* @param queryVertices
* @return
*/
public HashMap<Integer, Integer> queryOutNeighborsAndCombine(final Collection<Integer> queryVertices) {
HashMap<Integer, Integer> results;
List<Future<HashMap<Integer, Integer>>> queryFutures = new ArrayList<Future<HashMap<Integer, Integer>>>();
/* Check which ones are in cache */
long st = System.currentTimeMillis();
HashMap<Integer, Integer> fromCache = new HashMap<Integer, Integer>(1000000);
logger.info("Cached queries took: " + (System.currentTimeMillis() - st));
/* Execute queries in parallel */
for(Shard shard : shards) {
final Shard _shard = shard;
queryFutures.add(executor.submit(new Callable<HashMap<Integer, Integer>>() {
@Override
public HashMap<Integer, Integer> call() throws Exception {
HashMap<Integer, Integer> edges = _shard.queryAndCombine(queryVertices);
return edges;
}
}));
}
/* Combine
*/
try {
results = fromCache;
for(int i=0; i < queryFutures.size(); i++) {
HashMap<Integer, Integer> shardResults = queryFutures.get(i).get();
for(Map.Entry<Integer, Integer> e : shardResults.entrySet()) {
if (results.containsKey(e.getKey())) {
results.put(e.getKey(), e.getValue() + results.get(e.getKey()));
} else {
results.put(e.getKey(), e.getValue());
}
}
}
} catch (InterruptedException e) {
throw new RuntimeException(e);
} catch (ExecutionException e) {
throw new RuntimeException(e);
}
return results;
}
/**
* Queries out=neighbors for a given set of vertices.
* @param queryVertices
* @return
*/
public HashMap<Integer, ArrayList<Integer>> queryOutNeighbors(final Collection<Integer> queryVertices) {
HashMap<Integer, ArrayList<Integer>> results;
List<Future<HashMap<Integer, ArrayList<Integer>> >> queryFutures
= new ArrayList<Future<HashMap<Integer, ArrayList<Integer>> >>();
/* Check which ones are in cache */
long st = System.currentTimeMillis();
HashMap<Integer, ArrayList<Integer>> fromCache = new HashMap<Integer, ArrayList<Integer>>(1000);
/* Execute queries in parallel */
for(Shard shard : shards) {
final Shard _shard = shard;
queryFutures.add(executor.submit(new Callable<HashMap<Integer, ArrayList<Integer>>>() {
@Override
public HashMap<Integer, ArrayList<Integer>> call() throws Exception {
HashMap<Integer, ArrayList<Integer>> edges = _shard.query(queryVertices);
return edges;
}
}));
}
/* Combine
*/
try {
results = fromCache;
for(int i=0; i < queryFutures.size(); i++) {
HashMap<Integer, ArrayList<Integer>> shardResults = queryFutures.get(i).get();
for(Map.Entry<Integer, ArrayList<Integer>> e : shardResults.entrySet()) {
ArrayList<Integer> existing = results.get(e.getKey());
if (existing == null) {
results.put(e.getKey(), e.getValue());
} else {
existing.addAll(e.getValue());
}
}
}
} catch (InterruptedException e) {
throw new RuntimeException(e);
} catch (ExecutionException e) {
throw new RuntimeException(e);
}
return results;
}
/**
* Return out-neighbors of given vertex
* @param internalId
* @return
* @throws IOException
*/
public HashSet<Integer> queryOutNeighbors(final int internalId) throws IOException {
HashSet<Integer> friends;
List<Future<HashSet<Integer>>> queryFutures = new ArrayList<Future<HashSet<Integer>>>();
/* Query from shards in parallel */
for(Shard shard : shards) {
final Shard _shard = shard;
queryFutures.add(executor.submit(new Callable<HashSet<Integer>>() {
@Override
public HashSet<Integer> call() throws Exception {
return _shard.query(internalId);
}
}));
}
try {
friends = queryFutures.get(0).get();
for(int i=1; i < queryFutures.size(); i++) {
HashSet<Integer> shardFriends = queryFutures.get(i).get();
for(Integer fr : shardFriends) {
friends.add(fr);
}
}
} catch (Exception err) {
throw new RuntimeException(err);
}
return friends;
}
/**
* Shutdowns the executor threads.
*/
public void shutdown() {
executor.shutdown();
}
static class Shard {
RandomAccessFile adjFile;
ShardIndex index;
int shardNum;
int numShards;
String fileName;
private Shard(String fileName, int shardNum, int numShards) throws IOException {
this.shardNum = shardNum;
this.numShards = numShards;
this.fileName = fileName;
File f = new File(ChiFilenames.getFilenameShardsAdj(fileName, shardNum, numShards));
adjFile = new RandomAccessFile(f.getAbsolutePath(), "r", 64 * 1024);
index = new ShardIndex(f);
}
/**
* Query efficiently all vertices
* @param queryIds
* @return
* @throws IOException
*/
public HashMap<Integer, Integer> queryAndCombine(Collection<Integer> queryIds) throws IOException {
/* Sort the ids because the index-entries will be in same order */
ArrayList<Integer> sortedIds = new ArrayList<Integer>(queryIds);
Collections.sort(sortedIds);
ArrayList<ShardIndex.IndexEntry> indexEntries = new ArrayList<ShardIndex.IndexEntry>(sortedIds.size());
for(Integer a : sortedIds) {
indexEntries.add(index.lookup(a));
}
HashMap<Integer, Integer> results = new HashMap<Integer, Integer>(5000);
ShardIndex.IndexEntry entry = null, lastEntry = null;
int curvid=0, adjOffset=0;
for(int qIdx=0; qIdx < sortedIds.size(); qIdx++) {
entry = indexEntries.get(qIdx);
int vertexId = sortedIds.get(qIdx);
/* If consecutive vertices are in same indexed block, i.e their
index entries are the same, then we just continue.
*/
if (qIdx == 0 || !entry.equals(lastEntry)) {
curvid = entry.vertex;
adjOffset = entry.fileOffset;
adjFile.seek(adjOffset);
}
while(curvid <= vertexId) {
int n;
int ns = adjFile.readUnsignedByte();
assert(ns >= 0);
adjOffset++;
if (ns == 0) {
curvid++;
int nz = adjFile.readUnsignedByte();
adjOffset++;
assert(nz >= 0);
curvid += nz;
continue;
}
if (ns == 0xff) {
n = adjFile.readInt();
adjOffset += 4;
} else {
n = ns;
}
if (curvid == vertexId) {
while (--n >= 0) {
int target = adjFile.readInt();
Integer curCount = results.get(target);
if (curCount == null) {
results.put(target, 1);
} else {
results.put(target, 1 + curCount);
}
}
} else {
adjFile.skipBytes(n * 4);
}
curvid++;
}
}
return results;
}
public HashMap<Integer, ArrayList<Integer>> query(Collection<Integer> queryIds) throws IOException {
/* Sort the ids because the index-entries will be in same order */
ArrayList<Integer> sortedIds = new ArrayList<Integer>(queryIds);
Collections.sort(sortedIds);
ArrayList<ShardIndex.IndexEntry> indexEntries = new ArrayList<ShardIndex.IndexEntry>(sortedIds.size());
for(Integer a : sortedIds) {
indexEntries.add(index.lookup(a));
}
HashMap<Integer, ArrayList<Integer>> results = new HashMap<Integer, ArrayList<Integer>>(queryIds.size());
ShardIndex.IndexEntry entry = null, lastEntry = null;
int curvid=0, adjOffset=0;
for(int qIdx=0; qIdx < sortedIds.size(); qIdx++) {
entry = indexEntries.get(qIdx);
int vertexId = sortedIds.get(qIdx);
boolean found = false;
/* If consecutive vertices are in same indexed block, i.e their
index entries are the same, then we just continue.
*/
if (qIdx == 0 || !entry.equals(lastEntry)) {
curvid = entry.vertex;
adjOffset = entry.fileOffset;
adjFile.seek(adjOffset);
}
while(curvid <= vertexId) {
int n;
int ns = adjFile.readUnsignedByte();
assert(ns >= 0);
adjOffset++;
if (ns == 0) {
curvid++;
int nz = adjFile.readUnsignedByte();
adjOffset++;
assert(nz >= 0);
curvid += nz;
continue;
}
if (ns == 0xff) {
n = adjFile.readInt();
adjOffset += 4;
} else {
n = ns;
}
if (curvid == vertexId) {
ArrayList<Integer> nbrs = new ArrayList<Integer>(n);
found = true;
while (--n >= 0) {
int target = adjFile.readInt();
nbrs.add(target);
}
results.put(vertexId, nbrs);
} else {
adjFile.skipBytes(n * 4);
}
curvid++;
}
if (!found) {
results.put(vertexId, new ArrayList<Integer>(0));
}
}
return results;
}
public HashSet<Integer> query(int vertexId) throws IOException {
return new HashSet<Integer>(query(Collections.singletonList(vertexId)).get(vertexId));
}
public <VT> List<VertexIdValue<VT>> queryWithValues(int vertexId, BytesToValueConverter<VT> conv) throws
IOException {
List<VertexIdValue<VT>> results = new ArrayList<VertexIdValue<VT>>();
ShardIndex.IndexEntry entry = index.lookup(vertexId);
int curvid = entry.vertex;
int adjOffset = entry.fileOffset;
int edgeOffset = entry.edgePointer;
String edataShardName = ChiFilenames.getFilenameShardEdata(fileName, conv, shardNum, numShards);
int fileSize = ChiFilenames.getShardEdataSize(edataShardName);
adjFile.seek(adjOffset);
/* Edge data block*/
int blockSize = ChiFilenames.getBlocksize(conv.sizeOf());
byte[] edgeDataBlock = new byte[blockSize];
int curBlockId = (-1);
byte[] tmp = new byte[conv.sizeOf()];
while(curvid <= vertexId) {
int n;
int ns = adjFile.readUnsignedByte();
assert(ns >= 0);
adjOffset++;
if (ns == 0) {
curvid++;
int nz = adjFile.readUnsignedByte();
adjOffset++;
assert(nz >= 0);
curvid += nz;
continue;
}
if (ns == 0xff) {
n = adjFile.readInt();
adjOffset += 4;
} else {
n = ns;
}
if (curvid == vertexId) {
while (--n >= 0) {
int target = adjFile.readInt();
int blockId = edgeOffset * conv.sizeOf() / blockSize;
if (blockId != curBlockId) {
String blockFileName = ChiFilenames.getFilenameShardEdataBlock(
edataShardName,
blockId, blockSize);
curBlockId = blockId;
int len = Math.min(blockSize, fileSize - blockId * blockSize);
CompressedIO.readCompressed(new File(blockFileName), edgeDataBlock, len);
}
System.arraycopy(edgeDataBlock, (edgeOffset * conv.sizeOf()) % blockSize, tmp, 0, conv.sizeOf());
VT value = conv.getValue(tmp);
results.add(new VertexIdValue<VT>(target, value));
edgeOffset++;
}
} else {
adjFile.skipBytes(n * 4);
edgeOffset += n;
}
curvid++;
}
return results;
}
}
}