package edu.cmu.graphchi.shards;
import edu.cmu.graphchi.ChiFilenames;
import edu.cmu.graphchi.ChiLogger;
import edu.cmu.graphchi.ChiVertex;
import edu.cmu.graphchi.datablocks.BytesToValueConverter;
import edu.cmu.graphchi.datablocks.ChiPointer;
import edu.cmu.graphchi.datablocks.DataBlockManager;
import edu.cmu.graphchi.io.CompressedIO;
import nom.tam.util.BufferedDataInputStream;
import java.io.*;
import java.util.ArrayList;
import java.util.logging.Logger;
import java.util.zip.GZIPInputStream;
/**
* Copyright [2012] [Aapo Kyrola, Guy Blelloch, Carlos Guestrin / Carnegie Mellon University]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Used only internally - do not modify. To understand Sliding shards, see
* http://code.google.com/p/graphchi/wiki/IntroductionToGraphChi
* @param <EdgeDataType>
*/
public class SlidingShard <EdgeDataType> {
private String edgeDataFilename;
private String adjDataFilename;
private int rangeStart;
private int rangeEnd;
private DataBlockManager blockManager;
private ArrayList<Block> activeBlocks;
public long edataFilesize, adjFilesize;
private Block curBlock = null;
private int edataOffset = 0;
private int blockSize = -1;
private int sizeOf = -1;
private int adjOffset = 0;
private int curvid = 0;
private boolean onlyAdjacency = false;
private boolean asyncEdataLoading = true;
private BytesToValueConverter<EdgeDataType> converter;
private BufferedDataInputStream adjFile;
private boolean modifiesOutedges = true;
private static final Logger logger = ChiLogger.getLogger("slidingshard");
public SlidingShard(String edgeDataFilename, String adjDataFilename,
int rangeStart, int rangeEnd) throws IOException {
this.edgeDataFilename = edgeDataFilename;
this.adjDataFilename = adjDataFilename;
this.rangeStart = rangeStart;
this.rangeEnd = rangeEnd;
adjFilesize = new File(adjDataFilename).length();
if (edgeDataFilename != null) {
edataFilesize = ChiFilenames.getShardEdataSize(edgeDataFilename);
activeBlocks = new ArrayList<Block>();
} else {
onlyAdjacency = true;
}
}
public void finalize() {
for (Block b : activeBlocks) b.release();
}
private void checkCurblock(int toread) {
if (curBlock == null || curBlock.end < edataOffset + toread) {
if (curBlock != null) {
if (!curBlock.active) {
curBlock.release();
}
}
// Load next
int start = (edataOffset / blockSize) * blockSize; // align
int fileBlockId = edataOffset / blockSize;
curBlock = new Block(edgeDataFilename, start,
(int) Math.min(start + blockSize, edataFilesize), fileBlockId, blockSize);
curBlock.ptr = edataOffset - start; // Correction due to alignment.
activeBlocks.add(curBlock);
}
}
private ChiPointer readEdgePtr() {
assert(sizeOf >= 0);
if (onlyAdjacency) return null;
checkCurblock(sizeOf);
ChiPointer ptr = new ChiPointer(curBlock.blockId, curBlock.ptr);
curBlock.ptr += sizeOf;
edataOffset += sizeOf;
return ptr;
}
public void skip(int n) throws IOException {
int tot = n * 4;
adjOffset += tot;
adjFile.skipBytes(tot);
edataOffset += sizeOf * n;
if (curBlock != null) {
curBlock.ptr += sizeOf * n;
}
}
public void readNextVertices(ChiVertex[] vertices, int start, boolean disableWrites) throws IOException {
int nvecs = vertices.length;
curBlock = null;
releasePriorToOffset(false, disableWrites);
assert(activeBlocks.size() <= 1);
/* Read next */
if (!onlyAdjacency && !activeBlocks.isEmpty()) {
curBlock = activeBlocks.get(0);
}
if (adjFile == null) {
File compressedFile = new File(adjDataFilename + ".gz");
if (compressedFile.exists()) {
logger.info("Note: using compressed: " + compressedFile.getName());
adjFile = new BufferedDataInputStream(new GZIPInputStream(new FileInputStream(compressedFile)), 1024 * 1024);
} else {
adjFile = new BufferedDataInputStream(new FileInputStream(adjDataFilename), 1024 * 1024);
}
adjFile.skipBytes(adjOffset);
}
try {
for(int i=(curvid - start); i < nvecs; i++) {
int n;
int ns = adjFile.readUnsignedByte();
assert(ns >= 0);
adjOffset++;
if (ns == 0) {
curvid++;
int nz = adjFile.readUnsignedByte();
adjOffset++;
assert(nz >= 0);
curvid += nz;
i += nz;
continue;
}
if (ns == 0xff) {
n = adjFile.readIntReversed();
adjOffset += 4;
} else {
n = ns;
}
if (i < 0) {
skip(n);
} else {
ChiVertex vertex = vertices[i];
assert(vertex == null || vertex.getId() == curvid);
if (vertex != null) {
while (--n >= 0) {
int target = adjFile.readIntReversed();
adjOffset += 4;
ChiPointer eptr = readEdgePtr();
if (!onlyAdjacency) {
if (!curBlock.active) {
if (asyncEdataLoading) {
curBlock.readAsync();
} else {
curBlock.readNow();
}
}
curBlock.active = true;
}
vertex.addOutEdge(eptr == null ? -1 : eptr.blockId, eptr == null ? -1 : eptr.offset, target);
if (!(target >= rangeStart && target <= rangeEnd)) {
throw new IllegalStateException("Target " + target + " not in range!");
}
}
} else {
skip(n);
}
}
curvid++;
}
} catch (EOFException e) {} // kosher
}
public void flush() throws IOException {
releasePriorToOffset(true, false);
}
public void setOffset(int newoff, int _curvid, int edgeptr) {
try {
if (adjFile != null) adjFile.close();
} catch (IOException ioe) {}
adjFile = null;
adjOffset = newoff;
curvid = _curvid;
edataOffset = edgeptr;
}
public void releasePriorToOffset(boolean all, boolean disableWrites)
throws IOException {
if (onlyAdjacency) return;
for(int i=activeBlocks.size() - 1; i >= 0; i--) {
Block b = activeBlocks.get(i);
if (b.end <= edataOffset || all) {
commit(b, all, disableWrites);
activeBlocks.remove(i);
}
}
}
public long getEdataFilesize() {
return edataFilesize;
}
public long getAdjFilesize() {
return adjFilesize;
}
public DataBlockManager getDataBlockManager() {
return blockManager;
}
public void setDataBlockManager(DataBlockManager dataBlockManager) {
this.blockManager = dataBlockManager;
}
public BytesToValueConverter<EdgeDataType> getConverter() {
return converter;
}
public void setConverter(BytesToValueConverter<EdgeDataType> converter) {
this.converter = converter;
if (converter == null) {
sizeOf = 0;
return;
}
sizeOf = converter.sizeOf();
blockSize = ChiFilenames.getBlocksize(sizeOf);
}
void commit(Block b, boolean synchronously, boolean disableWrites) throws IOException {
disableWrites = disableWrites || !modifiesOutedges;
if (synchronously) {
if (!disableWrites) b.commitNow();
b.release();
} else {
if (!disableWrites) b.commitAsync();
else b.release();
}
}
public void setModifiesOutedges(boolean modifiesOutedges) {
this.modifiesOutedges = modifiesOutedges;
}
public void setOnlyAdjacency(boolean onlyAdjacency) {
this.onlyAdjacency = onlyAdjacency;
}
public long getNumEdges() {
if (converter == null) return edataFilesize / 4; // TODO: fix.
return edataFilesize / converter.sizeOf();
}
class Block {
String blockFileName;
int offset;
int end;
int blockId;
int ptr;
boolean active = false;
Block(String edataFileName, int offset, int end, int fileBlockId, int blockSize) {
this.end = end;
this.offset = offset;
blockId = blockManager.allocateBlock(end - offset);
ptr = 0;
blockFileName = ChiFilenames.getFilenameShardEdataBlock(edataFileName, fileBlockId, blockSize);
}
void readAsync() throws IOException {
// TODO: actually async
readNow();
}
void readNow() throws IOException {
byte[] data = blockManager.getRawBlock(blockId);
CompressedIO.readCompressed(new File(blockFileName), data, end - offset);
}
void commitNow() throws IOException {
byte[] data = blockManager.getRawBlock(blockId);
CompressedIO.writeCompressed(new File(blockFileName), data, end - offset);
}
void commitAsync() throws IOException {
commitNow();
// TODO asynchronous implementation
release();
}
void release() {
blockManager.release(blockId);
}
}
}