Package com.tinkerpop.gremlin.giraph.structure.io.kryo

Source Code of com.tinkerpop.gremlin.giraph.structure.io.kryo.KryoRecordReader

package com.tinkerpop.gremlin.giraph.structure.io.kryo;

import com.tinkerpop.gremlin.giraph.process.computer.GiraphComputeVertex;
import com.tinkerpop.gremlin.structure.io.kryo.GremlinKryo;
import com.tinkerpop.gremlin.structure.io.kryo.KryoReader;
import com.tinkerpop.gremlin.tinkergraph.structure.TinkerVertex;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import java.io.IOException;

/**
* @author Joshua Shinavier (http://fortytwo.net)
* @author Marko A. Rodriguez (http://markorodriguez.com)
*/
public class KryoRecordReader extends RecordReader<NullWritable, GiraphComputeVertex> {

    private VertexStreamIterator vertexStreamIterator;
    private FSDataInputStream inputStream;

    private static final byte[] PATTERN = GremlinKryo.build().create().getVersionedHeader();

    public KryoRecordReader() {
    }

    @Override
    public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException {
        final FileSplit split = (FileSplit) genericSplit;
        final Configuration job = context.getConfiguration();
        long start = split.getStart();
        final Path file = split.getPath();
        if (null != new CompressionCodecFactory(job).getCodec(file)) {
            throw new IllegalStateException("Compression is not supported for the (binary) Gremlin Kryo format");
        }
        // open the file and seek to the start of the split
        this.inputStream = file.getFileSystem(job).open(split.getPath());
        this.inputStream.seek(start);
        final long newStart = seekToHeader(this.inputStream, start);
        this.vertexStreamIterator = new VertexStreamIterator(this.inputStream, split.getLength() - (newStart - start), KryoReader.build().create());
    }

    private static long seekToHeader(final FSDataInputStream inputStream, final long start) throws IOException {
        long nextStart = start;
        final byte[] buffer = new byte[32];
        while (true) {
            if ((buffer[0] = PATTERN[0]) == inputStream.readByte()) {
                inputStream.read(nextStart + 1, buffer, 1, 31);
                if (patternMatch(buffer)) {
                    inputStream.seek(nextStart);
                    return nextStart;
                }
            } else {
                nextStart = nextStart + 1;
                inputStream.seek(nextStart);
            }
        }
    }

    private static boolean patternMatch(final byte[] bytes) {
        for (int i = 0; i < 31; i++) {
            if (bytes[i] != PATTERN[i])
                return false;
        }
        return true;
    }

    @Override
    public boolean nextKeyValue() throws IOException {
        return this.vertexStreamIterator.hasNext();
    }

    @Override
    public NullWritable getCurrentKey() {
        return NullWritable.get();
    }

    @Override
    public GiraphComputeVertex getCurrentValue() {
        return new GiraphComputeVertex((TinkerVertex) this.vertexStreamIterator.next());
    }

    @Override
    public float getProgress() throws IOException {
        return this.vertexStreamIterator.getProgress();
    }

    @Override
    public synchronized void close() throws IOException {
        this.inputStream.close();
    }
}
TOP

Related Classes of com.tinkerpop.gremlin.giraph.structure.io.kryo.KryoRecordReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.