Package edu.cmu.graphchi.shards

Source Code of edu.cmu.graphchi.shards.MemoryShard

package edu.cmu.graphchi.shards;

import com.yammer.metrics.Metrics;
import com.yammer.metrics.core.Timer;
import com.yammer.metrics.core.TimerContext;
import edu.cmu.graphchi.ChiFilenames;
import edu.cmu.graphchi.ChiLogger;
import edu.cmu.graphchi.ChiVertex;
import edu.cmu.graphchi.datablocks.BytesToValueConverter;
import edu.cmu.graphchi.datablocks.DataBlockManager;
import edu.cmu.graphchi.io.CompressedIO;
import nom.tam.util.BufferedDataInputStream;

import java.io.*;
import java.util.ArrayList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Logger;
import java.util.zip.GZIPInputStream;

/**
* Copyright [2012] [Aapo Kyrola, Guy Blelloch, Carlos Guestrin / Carnegie Mellon University]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
* Used only internally - do not modify. To understand Memory shards, see
* http://code.google.com/p/graphchi/wiki/IntroductionToGraphChi
* @param <EdgeDataType>
*/
public class MemoryShard <EdgeDataType> {

    private String edgeDataFilename;
    private String adjDataFilename;
    private int rangeStart;
    private int rangeEnd;

    private byte[] adjData;
    private int[] blockIds = new int[0];
    private int[] blockSizes = new int[0];;

    private int edataFilesize;
    private boolean loaded = false;
    private boolean onlyAdjacency = false;
    private boolean hasSetRangeOffset = false, hasSetOffset = false;

    private int rangeStartOffset, rangeStartEdgePtr, rangeContVid;
    private int adjDataLength;

    private DataBlockManager dataBlockManager;
    private BytesToValueConverter<EdgeDataType> converter;
    private int streamingOffset, streamingOffsetEdgePtr, streamingOffsetVid;
    private int blocksize = 0;

    private final Timer loadAdjTimer = Metrics.defaultRegistry().newTimer(MemoryShard.class, "load-adj", TimeUnit.SECONDS, TimeUnit.MINUTES);
    private final Timer loadVerticesTimers = Metrics.defaultRegistry().newTimer(MemoryShard.class, "load-vertices", TimeUnit.SECONDS, TimeUnit.MINUTES);

    private static final Logger logger = ChiLogger.getLogger("memoryshard");

    private ArrayList<ShardIndex.IndexEntry> index;


    private MemoryShard() {}

    public MemoryShard(String edgeDataFilename, String adjDataFilename, int rangeStart, int rangeEnd) {
        this.edgeDataFilename = edgeDataFilename;
        this.adjDataFilename = adjDataFilename;
        this.rangeStart = rangeStart;
        this.rangeEnd = rangeEnd;

    }

    public void commitAndRelease(boolean modifiesInedges, boolean modifiesOutedges) throws IOException {
        int nblocks = blockIds.length;

        if (!onlyAdjacency && loaded) {
            if (modifiesInedges) {
                if (blocksize == 0) {
                    blocksize = ChiFilenames.getBlocksize(converter.sizeOf());
                }
                int startStreamBlock = rangeStartEdgePtr / blocksize;
                for(int i=0; i < nblocks; i++) {
                    String blockFilename = ChiFilenames.getFilenameShardEdataBlock(edgeDataFilename, i, blocksize);
                    if (i >= startStreamBlock) {
                        // Synchronous write
                        CompressedIO.writeCompressed(new File(blockFilename),
                                dataBlockManager.getRawBlock(blockIds[i]),
                                blockSizes[i]);
                    } else {
                        // Asynchronous write (not implemented yet, so is same as synchronous)
                        CompressedIO.writeCompressed(new File(blockFilename),
                                dataBlockManager.getRawBlock(blockIds[i]),
                                blockSizes[i]);
                    }
                }

            } else if (modifiesOutedges) {
                int last = streamingOffsetEdgePtr;
                if (last == 0) {
                    last = edataFilesize;
                }
                int startblock = (int) (rangeStartEdgePtr / blocksize);
                int endblock = (int) (last / blocksize);
                for(int i=startblock; i <= endblock; i++) {
                    String blockFilename = ChiFilenames.getFilenameShardEdataBlock(edgeDataFilename, i, blocksize);
                    CompressedIO.writeCompressed(new File(blockFilename),
                            dataBlockManager.getRawBlock(blockIds[i]),
                            blockSizes[i]);
                }
            }
            /* Release all blocks */
            for(Integer blockId : blockIds) {
                dataBlockManager.release(blockId);
            }
        }
    }

    public void loadVertices(final int windowStart, final int windowEnd, final ChiVertex[] vertices, final boolean disableOutEdges, final ExecutorService parallelExecutor)
            throws IOException {
        DataInput compressedInput = null;
        if (adjData == null) {
            compressedInput = loadAdj();

            if (!onlyAdjacency) loadEdata();
        }

        TimerContext _timer = loadVerticesTimers.time();

        if (compressedInput != null) {
            // This means we are using compressed data and cannot read in parallel (or could we?)
            // A bit ugly.
            index = new ArrayList<ShardIndex.IndexEntry>();
            index.add(new ShardIndex.IndexEntry(0, 0, 0));
        }
        final int sizeOf = (converter == null ? 0 : converter.sizeOf());

        /* Load in parallel */
        if (compressedInput == null) {
            final AtomicInteger countDown = new AtomicInteger(index.size());
            final Object waitLock = new Object();
            for(int chunk=0; chunk<index.size(); chunk++) {
                final int _chunk = chunk;
                parallelExecutor.submit(new Runnable() {
                    @Override
                    public void run() {
                        try {
                            loadAdjChunk(windowStart, windowEnd, vertices, disableOutEdges, null, sizeOf, _chunk);
                        } catch (IOException ioe) {
                            ioe.printStackTrace();
                            throw new RuntimeException(ioe);
                        } finally {
                            countDown.decrementAndGet();
                            synchronized (waitLock) {
                                waitLock.notifyAll();
                            }

                        }
                    }
                } );
            }
            /* Wait for finishing */
            while (countDown.get() > 0) {
                synchronized (waitLock) {
                    try {
                        waitLock.wait(10000);
                    } catch (InterruptedException e) {}
                }
            }
        } else {
            loadAdjChunk(windowStart, windowEnd, vertices, disableOutEdges, compressedInput, sizeOf, 0);
        }

        _timer.stop();
    }

    private void loadAdjChunk(int windowStart, int windowEnd, ChiVertex[] vertices, boolean disableOutEdges, DataInput compressedInput, int sizeOf, int chunk) throws IOException {
        ShardIndex.IndexEntry indexEntry = index.get(chunk);

        int vid = indexEntry.vertex;
        int viden = (chunk < index.size() - 1 ?  index.get(chunk + 1).vertex : Integer.MAX_VALUE);
        int edataPtr = indexEntry.edgePointer * sizeOf;
        int adjOffset = indexEntry.fileOffset;
        int end = adjDataLength;
        if (chunk < index.size() - 1) {
            end = index.get(chunk + 1).fileOffset;
        }

        boolean containsRangeEnd = (vid < rangeEnd && viden > rangeEnd);
        boolean containsRangeSt = (vid <= rangeStart && viden > rangeStart);

        DataInput adjInput = (compressedInput != null ? compressedInput : new DataInputStream(new ByteArrayInputStream(adjData)));

        adjInput.skipBytes(adjOffset);

        try {
            while(adjOffset < end) {

                if (containsRangeEnd) {
                    if (!hasSetOffset && vid > rangeEnd) {
                        streamingOffset = adjOffset;
                        streamingOffsetEdgePtr = edataPtr;
                        streamingOffsetVid = vid;
                        hasSetOffset = true;
                    }
                }
                if (containsRangeSt)  {
                    if (!hasSetRangeOffset && vid >= rangeStart) {
                        rangeStartOffset = adjOffset;
                        rangeStartEdgePtr = edataPtr;
                        hasSetRangeOffset = true;
                    }
                }

                int n = 0;
                int ns = adjInput.readUnsignedByte();
                adjOffset += 1;
                assert(ns >= 0);
                if (ns == 0) {
                    // next value tells the number of vertices with zeros
                    vid++;
                    int nz = adjInput.readUnsignedByte();
                    adjOffset += 1;
                    vid += nz;
                    continue;
                }
                if (ns == 0xff) {   // If 255 is not enough, then stores a 32-bit integer after.
                    n = Integer.reverseBytes(adjInput.readInt());
                    adjOffset += 4;
                } else {
                    n = ns;
                }

                ChiVertex vertex = null;
                if (vid >= windowStart && vid <= windowEnd) {
                    vertex = vertices[vid - windowStart];
                }

                while (--n >= 0) {
                    int target = Integer.reverseBytes(adjInput.readInt());
                    adjOffset += 4;
                    if (!(target >= rangeStart && target <= rangeEnd))
                        throw new IllegalStateException("Target " + target + " not in range!");
                    if (vertex != null && !disableOutEdges) {
                        vertex.addOutEdge((onlyAdjacency ? -1 : blockIds[edataPtr / blocksize]), (onlyAdjacency ? -1 : edataPtr % blocksize), target);
                    }

                    if (target >= windowStart) {
                        if (target <= windowEnd) {
                            ChiVertex dstVertex = vertices[target - windowStart];
                            if (dstVertex != null) {
                                dstVertex.addInEdge((onlyAdjacency ? -1 : blockIds[edataPtr / blocksize]),
                                        (onlyAdjacency ? -1 : edataPtr % blocksize),
                                        vid);
                            }
                            if (vertex != null && dstVertex != null) {
                                dstVertex.parallelSafe = false;
                                vertex.parallelSafe = false;
                            }
                        }
                    }
                    edataPtr += sizeOf;

                    // TODO: skip
                }
                vid++;
            }
        } catch (EOFException eof) {
            return;
        }
        if (adjInput instanceof InputStream) {
            ((InputStream) adjInput).close();
        }
    }


    private DataInput loadAdj() throws FileNotFoundException, IOException {
        File compressedFile = new File(adjDataFilename + ".gz");
        InputStream adjStreamRaw;
        long fileSizeEstimate = 0;
        if (compressedFile.exists()) {
            logger.info("Note: using compressed: " + compressedFile.getAbsolutePath());
            adjStreamRaw = new GZIPInputStream(new FileInputStream(compressedFile));
            fileSizeEstimate = compressedFile.length() * 3 / 2;
        } else {
            adjStreamRaw = new FileInputStream(adjDataFilename);
            fileSizeEstimate = new File(adjDataFilename).length();
        }

        /* Load index */
        index = new ShardIndex(new File(adjDataFilename)).sparserIndex(1204 * 1024);
        BufferedInputStream adjStream =  new BufferedInputStream(adjStreamRaw, (int) fileSizeEstimate /
                4);

        // Hack for cases when the load is not divided into subwindows
        TimerContext _timer = loadAdjTimer.time();

        ByteArrayOutputStream adjDataStream = new ByteArrayOutputStream((int) fileSizeEstimate);
        try {
            byte[] buf = new byte[(int) fileSizeEstimate / 4];   // Read in 16 chunks
            while (true) {
                int read =  adjStream.read(buf);
                if (read > 0) {
                    adjDataStream.write(buf, 0, read);
                } else break;
            }
        } catch (EOFException err) {
            // Done
        }

        adjData = adjDataStream.toByteArray();
        adjDataLength = adjData.length;

        adjStream.close();
        adjDataStream.close();

        _timer.stop();
        return null;
    }

    private void loadEdata() throws FileNotFoundException, IOException {
        /* Load the edge data from file. Should be done asynchronously. */
        blocksize = ChiFilenames.getBlocksize(converter.sizeOf());

        if (!loaded) {
            edataFilesize = ChiFilenames.getShardEdataSize(edgeDataFilename);
            int nblocks = edataFilesize / blocksize + (edataFilesize % blocksize == 0 ? 0 : 1);
            blockIds = new int[nblocks];
            blockSizes = new int[nblocks];
            for(int fileBlockId=0; fileBlockId < nblocks; fileBlockId++) {
                int fsize = Math.min(edataFilesize - blocksize * fileBlockId, blocksize);
                blockIds[fileBlockId] = dataBlockManager.allocateBlock(fsize);
                blockSizes[fileBlockId] = fsize;
                String blockfilename = ChiFilenames.getFilenameShardEdataBlock(edgeDataFilename, fileBlockId, blocksize);
                CompressedIO.readCompressed(new File(blockfilename), dataBlockManager.getRawBlock(blockIds[fileBlockId]), fsize);
            }

            loaded = true;
        }
    }

    public DataBlockManager getDataBlockManager() {
        return dataBlockManager;
    }

    public void setDataBlockManager(DataBlockManager dataBlockManager) {
        this.dataBlockManager = dataBlockManager;
    }

    public void setConverter(BytesToValueConverter<EdgeDataType> converter) {
        this.converter = converter;
    }

    public int getStreamingOffset() {
        return streamingOffset;
    }

    public int getStreamingOffsetEdgePtr() {
        return streamingOffsetEdgePtr;
    }

    public int getStreamingOffsetVid() {
        return streamingOffsetVid;
    }

    public void setOnlyAdjacency(boolean onlyAdjacency) {
        this.onlyAdjacency = onlyAdjacency;
    }
}
TOP

Related Classes of edu.cmu.graphchi.shards.MemoryShard

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.