Package avrobase

Source Code of avrobase.AvroBaseImpl

package avrobase;

import org.apache.avro.AvroTypeException;
import org.apache.avro.Schema;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificDatumWriter;
import org.apache.avro.specific.SpecificRecord;
import org.apache.commons.codec.binary.Hex;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

/**
* Base class with built in serialization and deserialization and schema management.
* <p/>
* User: sam
* Date: Jun 23, 2010
* Time: 12:21:33 PM
*/
public abstract class AvroBaseImpl<T extends SpecificRecord, K> implements AvroBase<T, K> {

  protected Map<String, Schema> schemaCache = new ConcurrentHashMap<String, Schema>();
  protected Map<Schema, String> hashCache = new ConcurrentHashMap<Schema, String>();
  protected Schema actualSchema;
  protected AvroFormat format;

  protected static final Charset UTF8 = Charset.forName("utf-8");

  /**
   * The serializer needs to know what format you would like new records
   * to be stored in. Old records are not affected when read as it stores their
   * format alongside them.
   *
   * @param format
   */
  public AvroBaseImpl(Schema actualSchema, AvroFormat format) {
    this.actualSchema = actualSchema;
    this.format = format;
  }

  @Override
  public Row<T, K> mutate(K row, Mutator<T> tMutator, Creator<T> tCreator) throws AvroBaseException {
    Row<T, K> tRow;
    do {
      // Grab the current version
      tRow = get(row);

      // If it doesn't exist, create a new one
      if (tRow == null && tCreator != null) {
        final T newValue = tCreator.create();
        if (newValue != null) {
          tRow = new Row<T,K>(newValue, row, 0);
        }
      }

      if (tRow != null) {
        T value = tMutator.mutate(tRow.value);
        // Mutator can abort the mutation
        if (value == null) return tRow;
        // Optimistically set the row
        if (put(row, value, tRow.version)) {
          return new Row<T, K>(value, row, tRow.version + 1);
        }
      } else {
        return null;
      }
      // On failure to set, try again
    } while (true);
  }

  @Override
  public Row<T, K> mutate(K row, Mutator<T> tMutator) throws AvroBaseException {
    return mutate(row, tMutator, null);
  }

  /**
   * Load a schema from the schema table
   */
  protected Schema loadSchema(byte[] value, String row) throws AvroBaseException {
    Schema schema;
    try {
      schema = Schema.parse(new ByteArrayInputStream(value));
    } catch (IOException e) {
      throw new AvroBaseException("Failed to deserialize schema: " + row, e);
    }
    schemaCache.put(row, schema);
    hashCache.put(schema, row);
    return schema;
  }

  private static EncoderFactory encoderFactory = new EncoderFactory();

  /**
   * Serialize the Avro instance using its schema and the
   * format set for this avrobase
   * @param value value to serialize
   * @return bytes
   * @throws AvroBaseException if we couldn't serialize
   */
  protected byte[] serialize(T value) throws AvroBaseException {
    try {
      Schema schema = value.getSchema();
      ByteArrayOutputStream baos = new ByteArrayOutputStream();
      Encoder be;
      switch (format) {
        case JSON:
          be = encoderFactory.jsonEncoder(schema, baos);
          break;
        case BINARY:
        default:
          // TODO: cache binary encoders?
          be = encoderFactory.binaryEncoder(baos, null);
          break;
      }
      SpecificDatumWriter<T> sdw = new SpecificDatumWriter<T>(schema);
      sdw.write(value, be);
      be.flush();
      return baos.toByteArray();
    } catch (IOException e) {
      throw new AvroBaseException("Failed to serialize", e);
    }
  }

  /**
   * TODO: I really need to use a lookup table to reduce the per row overhead of schema references.
   *
   * @param schema
   * @param doc
   * @return
   */
  protected String createSchemaKey(Schema schema, String doc) {
    String schemaKey;
    MessageDigest md;
    try {
      md = MessageDigest.getInstance("SHA-256");
    } catch (NoSuchAlgorithmException e) {
      md = null;
    }
    if (md == null) {
      schemaKey = doc;
    } else {
      schemaKey = new String(Hex.encodeHex(md.digest(doc.getBytes())));
    }
    schemaCache.put(schemaKey, schema);
    hashCache.put(schema, schemaKey);
    return schemaKey;
  }

  /**
   * Read the avro serialized data using the specified schema and format
   * in the hbase row
   */
  protected T
  readValue(byte[] data, Schema schema, AvroFormat format) throws AvroBaseException {
    return readValue(data, schema, format, 0, data.length);
  }

  private static DecoderFactory decoderFactory = new DecoderFactory();

  /**
   * Read the avro serialized data using the specified schema and format
   * in the hbase row
   */
  protected T
  readValue(byte[] data, Schema schema, AvroFormat format, int offset, int length) throws AvroBaseException {
    try {
      Decoder d;
      switch (format) {
        case JSON:
          d = decoderFactory.jsonDecoder(schema, new String(data, UTF8));
          break;
        case BINARY:
        default:
          d = decoderFactory.binaryDecoder(data, offset, length, null);
          break;
      }
      SpecificDatumReader<T> sdr = new SpecificDatumReader<T>(schema, actualSchema);
      return sdr.read(null, d);
    } catch (IOException e) {
      throw new AvroBaseException("Failed to read value: " + schema, e);
    } catch (AvroTypeException e) {
      throw new AvroBaseException("Failed to read value: " + schema, e);
    }
  }
}
TOP

Related Classes of avrobase.AvroBaseImpl

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.