Package com.squareup.cascading2.scheme

Source Code of com.squareup.cascading2.scheme.ProtobufScheme

package com.squareup.cascading2.scheme;

import cascading.flow.FlowProcess;
import cascading.scheme.SinkCall;
import cascading.scheme.SourceCall;
import cascading.scheme.hadoop.SequenceFile;
import cascading.tap.Tap;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;
import com.google.protobuf.ExtensionRegistryLite;
import com.google.protobuf.Message;
import com.squareup.cascading2.util.Util;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;

/**
* A Scheme that allows reading from and writing to Hadoop SequenceFiles that use NullWritable keys
* and Protocol Buffers serialized objects wrapped in BytesWritable values.
*/
public class ProtobufScheme extends SequenceFile {
  private transient Message.Builder prototype;
  private final String fieldName;
  private final String messageClassName;
  private final ExtensionRegistryLite registry;

  public ProtobufScheme(String fieldName, Class<? extends Message> messageClass) {
    this(fieldName, messageClass, null);
  }

  public ProtobufScheme(String fieldName, Class<? extends Message> messageClass, ExtensionRegistryLite registry) {
    super(new Fields(fieldName));
    this.fieldName = fieldName;
    messageClassName = messageClass.getName();
    this.registry = registry;
  }

  @Override public void sourcePrepare(FlowProcess<JobConf> flowProcess,
      SourceCall<Object[], RecordReader> sourceCall) {
  }

  @Override
  public void sinkConfInit(FlowProcess<JobConf> flowProcess,
      Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) {
    conf.setOutputKeyClass(NullWritable.class);
    conf.setOutputValueClass(BytesWritable.class);

    conf.setOutputFormat(SequenceFileOutputFormat.class);
  }

  @Override
  public boolean source(FlowProcess<JobConf> flowProcess,
      SourceCall<Object[], RecordReader> sourceCall) throws IOException {
    // TODO: cache this BytesWritable in the context
    BytesWritable value = new BytesWritable();
    boolean result = sourceCall.getInput().next(NullWritable.get(), value);

    if (!result) return false;

    Tuple tuple = sourceCall.getIncomingEntry().getTuple();
    tuple.clear();

    Message.Builder builder = getPrototype();
    builder.clear();
    if (registry != null) {
      tuple.add(builder.mergeFrom(value.getBytes(), 0, value.getLength(), registry).build());
    } else {
      tuple.add(builder.mergeFrom(value.getBytes(), 0, value.getLength()).build());
    }

    return true;
  }

  private Message.Builder getPrototype() {
    if (prototype == null) {
      prototype = Util.builderFromMessageClass(messageClassName);
    }
    return prototype;
  }

  @Override
  public void sink(FlowProcess<JobConf> flowProcess, SinkCall<Void, OutputCollector> sinkCall)
      throws IOException {
    TupleEntry tupleEntry = sinkCall.getOutgoingEntry();

    Message message = (Message)tupleEntry.getObject(fieldName);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    message.writeTo(baos);
    // TODO: cache this BytesWritable
    BytesWritable outputWritable = new BytesWritable(baos.toByteArray());

    sinkCall.getOutput().collect(NullWritable.get(), outputWritable);
  }

  @Override
  public boolean equals(Object object) {
    if (this == object) return true;
    if (!(object instanceof ProtobufScheme)) return false;
    if (!super.equals(object)) return false;

    // TODO: reimplement this

    return true;
  }
}
TOP

Related Classes of com.squareup.cascading2.scheme.ProtobufScheme

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.