package edu.cmu.graphchi.shards;
import com.yammer.metrics.Metrics;
import com.yammer.metrics.core.Timer;
import com.yammer.metrics.core.TimerContext;
import edu.cmu.graphchi.ChiFilenames;
import edu.cmu.graphchi.ChiLogger;
import edu.cmu.graphchi.ChiVertex;
import edu.cmu.graphchi.datablocks.BytesToValueConverter;
import edu.cmu.graphchi.datablocks.DataBlockManager;
import edu.cmu.graphchi.io.CompressedIO;
import nom.tam.util.BufferedDataInputStream;
import java.io.*;
import java.util.ArrayList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Logger;
import java.util.zip.GZIPInputStream;
/**
* Copyright [2012] [Aapo Kyrola, Guy Blelloch, Carlos Guestrin / Carnegie Mellon University]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Used only internally - do not modify. To understand Memory shards, see
* http://code.google.com/p/graphchi/wiki/IntroductionToGraphChi
* @param <EdgeDataType>
*/
public class MemoryShard <EdgeDataType> {
private String edgeDataFilename;
private String adjDataFilename;
private int rangeStart;
private int rangeEnd;
private byte[] adjData;
private int[] blockIds = new int[0];
private int[] blockSizes = new int[0];;
private int edataFilesize;
private boolean loaded = false;
private boolean onlyAdjacency = false;
private boolean hasSetRangeOffset = false, hasSetOffset = false;
private int rangeStartOffset, rangeStartEdgePtr, rangeContVid;
private int adjDataLength;
private DataBlockManager dataBlockManager;
private BytesToValueConverter<EdgeDataType> converter;
private int streamingOffset, streamingOffsetEdgePtr, streamingOffsetVid;
private int blocksize = 0;
private final Timer loadAdjTimer = Metrics.defaultRegistry().newTimer(MemoryShard.class, "load-adj", TimeUnit.SECONDS, TimeUnit.MINUTES);
private final Timer loadVerticesTimers = Metrics.defaultRegistry().newTimer(MemoryShard.class, "load-vertices", TimeUnit.SECONDS, TimeUnit.MINUTES);
private static final Logger logger = ChiLogger.getLogger("memoryshard");
private ArrayList<ShardIndex.IndexEntry> index;
private MemoryShard() {}
public MemoryShard(String edgeDataFilename, String adjDataFilename, int rangeStart, int rangeEnd) {
this.edgeDataFilename = edgeDataFilename;
this.adjDataFilename = adjDataFilename;
this.rangeStart = rangeStart;
this.rangeEnd = rangeEnd;
}
public void commitAndRelease(boolean modifiesInedges, boolean modifiesOutedges) throws IOException {
int nblocks = blockIds.length;
if (!onlyAdjacency && loaded) {
if (modifiesInedges) {
if (blocksize == 0) {
blocksize = ChiFilenames.getBlocksize(converter.sizeOf());
}
int startStreamBlock = rangeStartEdgePtr / blocksize;
for(int i=0; i < nblocks; i++) {
String blockFilename = ChiFilenames.getFilenameShardEdataBlock(edgeDataFilename, i, blocksize);
if (i >= startStreamBlock) {
// Synchronous write
CompressedIO.writeCompressed(new File(blockFilename),
dataBlockManager.getRawBlock(blockIds[i]),
blockSizes[i]);
} else {
// Asynchronous write (not implemented yet, so is same as synchronous)
CompressedIO.writeCompressed(new File(blockFilename),
dataBlockManager.getRawBlock(blockIds[i]),
blockSizes[i]);
}
}
} else if (modifiesOutedges) {
int last = streamingOffsetEdgePtr;
if (last == 0) {
last = edataFilesize;
}
int startblock = (int) (rangeStartEdgePtr / blocksize);
int endblock = (int) (last / blocksize);
for(int i=startblock; i <= endblock; i++) {
String blockFilename = ChiFilenames.getFilenameShardEdataBlock(edgeDataFilename, i, blocksize);
CompressedIO.writeCompressed(new File(blockFilename),
dataBlockManager.getRawBlock(blockIds[i]),
blockSizes[i]);
}
}
/* Release all blocks */
for(Integer blockId : blockIds) {
dataBlockManager.release(blockId);
}
}
}
public void loadVertices(final int windowStart, final int windowEnd, final ChiVertex[] vertices, final boolean disableOutEdges, final ExecutorService parallelExecutor)
throws IOException {
DataInput compressedInput = null;
if (adjData == null) {
compressedInput = loadAdj();
if (!onlyAdjacency) loadEdata();
}
TimerContext _timer = loadVerticesTimers.time();
if (compressedInput != null) {
// This means we are using compressed data and cannot read in parallel (or could we?)
// A bit ugly.
index = new ArrayList<ShardIndex.IndexEntry>();
index.add(new ShardIndex.IndexEntry(0, 0, 0));
}
final int sizeOf = (converter == null ? 0 : converter.sizeOf());
/* Load in parallel */
if (compressedInput == null) {
final AtomicInteger countDown = new AtomicInteger(index.size());
final Object waitLock = new Object();
for(int chunk=0; chunk<index.size(); chunk++) {
final int _chunk = chunk;
parallelExecutor.submit(new Runnable() {
@Override
public void run() {
try {
loadAdjChunk(windowStart, windowEnd, vertices, disableOutEdges, null, sizeOf, _chunk);
} catch (IOException ioe) {
ioe.printStackTrace();
throw new RuntimeException(ioe);
} finally {
countDown.decrementAndGet();
synchronized (waitLock) {
waitLock.notifyAll();
}
}
}
} );
}
/* Wait for finishing */
while (countDown.get() > 0) {
synchronized (waitLock) {
try {
waitLock.wait(10000);
} catch (InterruptedException e) {}
}
}
} else {
loadAdjChunk(windowStart, windowEnd, vertices, disableOutEdges, compressedInput, sizeOf, 0);
}
_timer.stop();
}
private void loadAdjChunk(int windowStart, int windowEnd, ChiVertex[] vertices, boolean disableOutEdges, DataInput compressedInput, int sizeOf, int chunk) throws IOException {
ShardIndex.IndexEntry indexEntry = index.get(chunk);
int vid = indexEntry.vertex;
int viden = (chunk < index.size() - 1 ? index.get(chunk + 1).vertex : Integer.MAX_VALUE);
int edataPtr = indexEntry.edgePointer * sizeOf;
int adjOffset = indexEntry.fileOffset;
int end = adjDataLength;
if (chunk < index.size() - 1) {
end = index.get(chunk + 1).fileOffset;
}
boolean containsRangeEnd = (vid < rangeEnd && viden > rangeEnd);
boolean containsRangeSt = (vid <= rangeStart && viden > rangeStart);
DataInput adjInput = (compressedInput != null ? compressedInput : new DataInputStream(new ByteArrayInputStream(adjData)));
adjInput.skipBytes(adjOffset);
try {
while(adjOffset < end) {
if (containsRangeEnd) {
if (!hasSetOffset && vid > rangeEnd) {
streamingOffset = adjOffset;
streamingOffsetEdgePtr = edataPtr;
streamingOffsetVid = vid;
hasSetOffset = true;
}
}
if (containsRangeSt) {
if (!hasSetRangeOffset && vid >= rangeStart) {
rangeStartOffset = adjOffset;
rangeStartEdgePtr = edataPtr;
hasSetRangeOffset = true;
}
}
int n = 0;
int ns = adjInput.readUnsignedByte();
adjOffset += 1;
assert(ns >= 0);
if (ns == 0) {
// next value tells the number of vertices with zeros
vid++;
int nz = adjInput.readUnsignedByte();
adjOffset += 1;
vid += nz;
continue;
}
if (ns == 0xff) { // If 255 is not enough, then stores a 32-bit integer after.
n = Integer.reverseBytes(adjInput.readInt());
adjOffset += 4;
} else {
n = ns;
}
ChiVertex vertex = null;
if (vid >= windowStart && vid <= windowEnd) {
vertex = vertices[vid - windowStart];
}
while (--n >= 0) {
int target = Integer.reverseBytes(adjInput.readInt());
adjOffset += 4;
if (!(target >= rangeStart && target <= rangeEnd))
throw new IllegalStateException("Target " + target + " not in range!");
if (vertex != null && !disableOutEdges) {
vertex.addOutEdge((onlyAdjacency ? -1 : blockIds[edataPtr / blocksize]), (onlyAdjacency ? -1 : edataPtr % blocksize), target);
}
if (target >= windowStart) {
if (target <= windowEnd) {
ChiVertex dstVertex = vertices[target - windowStart];
if (dstVertex != null) {
dstVertex.addInEdge((onlyAdjacency ? -1 : blockIds[edataPtr / blocksize]),
(onlyAdjacency ? -1 : edataPtr % blocksize),
vid);
}
if (vertex != null && dstVertex != null) {
dstVertex.parallelSafe = false;
vertex.parallelSafe = false;
}
}
}
edataPtr += sizeOf;
// TODO: skip
}
vid++;
}
} catch (EOFException eof) {
return;
}
if (adjInput instanceof InputStream) {
((InputStream) adjInput).close();
}
}
private DataInput loadAdj() throws FileNotFoundException, IOException {
File compressedFile = new File(adjDataFilename + ".gz");
InputStream adjStreamRaw;
long fileSizeEstimate = 0;
if (compressedFile.exists()) {
logger.info("Note: using compressed: " + compressedFile.getAbsolutePath());
adjStreamRaw = new GZIPInputStream(new FileInputStream(compressedFile));
fileSizeEstimate = compressedFile.length() * 3 / 2;
} else {
adjStreamRaw = new FileInputStream(adjDataFilename);
fileSizeEstimate = new File(adjDataFilename).length();
}
/* Load index */
index = new ShardIndex(new File(adjDataFilename)).sparserIndex(1204 * 1024);
BufferedInputStream adjStream = new BufferedInputStream(adjStreamRaw, (int) fileSizeEstimate /
4);
// Hack for cases when the load is not divided into subwindows
TimerContext _timer = loadAdjTimer.time();
ByteArrayOutputStream adjDataStream = new ByteArrayOutputStream((int) fileSizeEstimate);
try {
byte[] buf = new byte[(int) fileSizeEstimate / 4]; // Read in 16 chunks
while (true) {
int read = adjStream.read(buf);
if (read > 0) {
adjDataStream.write(buf, 0, read);
} else break;
}
} catch (EOFException err) {
// Done
}
adjData = adjDataStream.toByteArray();
adjDataLength = adjData.length;
adjStream.close();
adjDataStream.close();
_timer.stop();
return null;
}
private void loadEdata() throws FileNotFoundException, IOException {
/* Load the edge data from file. Should be done asynchronously. */
blocksize = ChiFilenames.getBlocksize(converter.sizeOf());
if (!loaded) {
edataFilesize = ChiFilenames.getShardEdataSize(edgeDataFilename);
int nblocks = edataFilesize / blocksize + (edataFilesize % blocksize == 0 ? 0 : 1);
blockIds = new int[nblocks];
blockSizes = new int[nblocks];
for(int fileBlockId=0; fileBlockId < nblocks; fileBlockId++) {
int fsize = Math.min(edataFilesize - blocksize * fileBlockId, blocksize);
blockIds[fileBlockId] = dataBlockManager.allocateBlock(fsize);
blockSizes[fileBlockId] = fsize;
String blockfilename = ChiFilenames.getFilenameShardEdataBlock(edgeDataFilename, fileBlockId, blocksize);
CompressedIO.readCompressed(new File(blockfilename), dataBlockManager.getRawBlock(blockIds[fileBlockId]), fsize);
}
loaded = true;
}
}
public DataBlockManager getDataBlockManager() {
return dataBlockManager;
}
public void setDataBlockManager(DataBlockManager dataBlockManager) {
this.dataBlockManager = dataBlockManager;
}
public void setConverter(BytesToValueConverter<EdgeDataType> converter) {
this.converter = converter;
}
public int getStreamingOffset() {
return streamingOffset;
}
public int getStreamingOffsetEdgePtr() {
return streamingOffsetEdgePtr;
}
public int getStreamingOffsetVid() {
return streamingOffsetVid;
}
public void setOnlyAdjacency(boolean onlyAdjacency) {
this.onlyAdjacency = onlyAdjacency;
}
}