Package com.linkedin.databus.client

Source Code of com.linkedin.databus.client.DbusEventAvroDecoder

package com.linkedin.databus.client;
/*
*
* Copyright 2013 LinkedIn Corp. All rights reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/


import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.nio.BufferUnderflowException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.WritableByteChannel;

import org.apache.avro.Schema;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.BinaryDecoder;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.JsonEncoder;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificRecord;
import org.apache.log4j.Logger;
import org.codehaus.jackson.JsonEncoding;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonGenerationException;
import org.codehaus.jackson.JsonGenerator;

import com.linkedin.databus.client.pub.DbusEventDecoder;
import com.linkedin.databus.core.DatabusRuntimeException;
import com.linkedin.databus.core.DbusEvent;
import com.linkedin.databus.core.DbusEventPart;
import com.linkedin.databus2.schemas.SchemaId;
import com.linkedin.databus2.schemas.SchemaRegistryService;
import com.linkedin.databus2.schemas.VersionedSchema;
import com.linkedin.databus2.schemas.VersionedSchemaSet;

public class DbusEventAvroDecoder implements DbusEventDecoder
{
  public static final String MODULE = DbusEventAvroDecoder.class.getName();
  public static final Logger LOG = Logger.getLogger(MODULE);
  public static final String SRC_ID_FIELD_NAME = "srcId";
  public static final String VALUE_FIELD_NAME = "value";
  public static final String OPCODE_FIELD_NAME = "opCode";

  //BinaryDecoder is threadunsafe. So use threadlocal to wrap it
  private static final ThreadLocal<BinaryDecoder> binDecoder = new ThreadLocal<BinaryDecoder>();

  private final VersionedSchemaSet _schemaSet;
  private final VersionedSchemaSet _metadataSchemaSet;

  public DbusEventAvroDecoder(VersionedSchemaSet schemaSet)
  {
    this(schemaSet,null);
  }

  public DbusEventAvroDecoder(VersionedSchemaSet schemaSet, VersionedSchemaSet metadataSchemaSet)
  {
    super();
    _schemaSet = schemaSet;
    _metadataSchemaSet = metadataSchemaSet;
  }

  @Override
  public GenericRecord getGenericRecord(DbusEvent e, GenericRecord reuse)
  {
    byte[] md5 = new byte[16];
    e.schemaId(md5);
    SchemaId schemaId = new SchemaId(md5);
    VersionedSchema writerSchema = _schemaSet.getById(schemaId);

    if (null == writerSchema)
    {
      LOG.error("Unable to find schema for id " + schemaId + "; event = " + e);
      throw new DatabusRuntimeException("No schema available to decode event " + e);
    }

    ByteBuffer valueBuffer = e.value();

    byte[] valueBytes = null;
    if (valueBuffer.hasArray())
    {
      valueBytes = valueBuffer.array();
    }
    else
    {
      valueBytes = new byte[valueBuffer.remaining()];
      valueBuffer.get(valueBytes);
    }

    return getGenericRecord(valueBytes, writerSchema.getSchema(), reuse);
  }

  /**
   * Creates a generic Record from the DbusEvent.
   *
   * @param e   DbusEvent to be converted to generic record
   * @return the GenericRecord for the DbusEvent's payload
   */
  public GenericRecord getGenericRecord(DbusEvent e)
  {
    return getGenericRecord(e, null);
  }

  /**
   * Creates a generic record from a byte array.
   *
   * @param valueBytes  byte[] to be converted to generic record
   * @param schema      schema of the input record
   * @return GenericRecord for the given byte array + schema combo
   *
   * TODO:  Add a   getGenericRecord(InputStream data, Schema schema, GenericRecord reuse)
   *        variant; it can use DecoderFactory.createBinaryDecoder(InputStream, BinaryDecorder)
   *        and will allow us to use something like org.apache.avro.ipc.ByteBufferInputStream
   *        to avoid the data copy to a temp array.  (https://rb.corp.linkedin.com/r/172879/)
   */
  public GenericRecord getGenericRecord(byte[] valueBytes, Schema schema, GenericRecord reuse)
  {
    GenericRecord result = null;
    try
    {
      binDecoder.set(DecoderFactory.defaultFactory().createBinaryDecoder(valueBytes, binDecoder.get()));
      GenericDatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
      result = reader.read(reuse, binDecoder.get());
      return result;
    }
    catch (Exception ex// IOException, ArrayIndexOutOfBoundsException, ...
    {
      LOG.error("getGenericRecord Avro error: " + ex.getMessage(), ex);
    }
    return result;
  }

  /**
   * Deserializes the metadata (if any) of a Databus event to an Avro GenericRecord.  This method
   * is for INTERNAL USE ONLY (by Espresso and Databus).  It is NOT a stable API and may change
   * without warning!
   *
   * @param e       the Databus event whose metadata is to be decoded
   * @param reuse   an existing {@link org.apache.avro.generic.GenericRecord} object where the
   *                deserialized values will be written to. The object can be <b>null</b>, in
   *                which case a new object will be allocated.
   * @return {@link org.apache.avro.generic.GenericRecord} object with the deserialized data, or
   *         null if no metadata exists.  Returned in <b>reuse</b> if provided, else in a newly
   *         allocated object.
   * @throws DatabusRuntimeException if event contains metadata but schema to decode it is missing
   */
  public GenericRecord getMetadata(DbusEvent e, GenericRecord reuse)
  {
    DbusEventPart metadataPart = e.getPayloadMetadataPart();
    ByteBuffer dataBuffer = null;
    if (null == metadataPart || null == (dataBuffer = metadataPart.getData()) || dataBuffer.remaining() <= 0)
    {
      LOG.debug("No metadata for event " + e);
      return null;
    }

    VersionedSchema schema = getMetadataSchema(metadataPart);
    if (null == schema)
    {
      throw new DatabusRuntimeException("No schema available to decode metadata for event " + e);
    }

    byte[] dataBytes = null;
    if (dataBuffer.hasArray())
    {
      dataBytes = dataBuffer.array();
    }
    else
    {
      dataBytes = new byte[dataBuffer.remaining()];
      try
      {
        dataBuffer.get(dataBytes);
      }
      catch (BufferUnderflowException ex)
      {
        LOG.error("metadata buffer error (remaining = " + dataBuffer.remaining() + ") for event " + e, ex);
        return null;
      }
    }

    return getGenericRecord(dataBytes, schema.getSchema(), reuse);
  }

  @Override
  public <T extends SpecificRecord> T getTypedValue(DbusEvent e, T reuse, Class<T> targetClass)
  {
    if (null == reuse)
    {
      try
      {
        reuse = targetClass.newInstance();
      }
      catch (InstantiationException e1)
      {
        LOG.error("getTypedValue class instantiation error (" + e1.getMessage() + ") for event " + e, e1);
        return null;
      }
      catch (IllegalAccessException e1)
      {
        LOG.error("getTypedValue access error (" + e1.getMessage() + ") for event " + e, e1);
        return null;
      }
    }
    byte[] md5 = new byte[16];
    e.schemaId(md5);
    SchemaId schemaId = new SchemaId(md5);
    VersionedSchema writerSchema = _schemaSet.getById(schemaId);

    if (null == writerSchema)
    {
      LOG.error("Unable to find schema for id " + schemaId + "; event = " + e);
      throw new DatabusRuntimeException("No schema available to decode event " + e);
    }

    ByteBuffer valueBuffer = e.value();
    byte[] valueBytes = new byte[valueBuffer.remaining()];
    valueBuffer.get(valueBytes);
    try
    {
      //JsonDecoder jsonDec = new JsonDecoder(sourceSchema.getSchema(),new ByteArrayInputStream(valueBytes));
      binDecoder.set(DecoderFactory.defaultFactory().createBinaryDecoder(valueBytes, binDecoder.get()));
      SpecificDatumReader<SpecificRecord> reader =
          new SpecificDatumReader<SpecificRecord>(writerSchema.getSchema(), reuse.getSchema());
      return targetClass.cast(reader.read(reuse, binDecoder.get()));
    }
    catch (IOException e1)
    {
      LOG.error("getTypedValue IO error (" + e1.getMessage() + ") for event " + e, e1);
    }
    return reuse;
  }

  public void dumpEventValueInJSON(DbusEvent e, OutputStream out)
  {
    byte[] md5 = new byte[16];
    e.schemaId(md5);
    SchemaId schemaId = new SchemaId(md5);
    VersionedSchema sourceSchema = _schemaSet.getById(schemaId);
    ByteBuffer valueBuffer = e.value();
    byte[] valueBytes = new byte[valueBuffer.remaining()];
    valueBuffer.get(valueBytes);

    try {
      Schema schema = sourceSchema.getSchema();
      DatumReader<Object> reader = new GenericDatumReader<Object>(schema);
      binDecoder.set(DecoderFactory.defaultFactory().createBinaryDecoder(valueBytes, binDecoder.get()));

      Object datum = reader.read(null, binDecoder.get());
      DatumWriter<Object> writer = new GenericDatumWriter<Object>(schema);
      JsonGenerator g = new JsonFactory().createJsonGenerator(out, JsonEncoding.UTF8);
      // write the src ID
      g.writeStartObject();
      g.writeFieldName(SRC_ID_FIELD_NAME);
      g.writeNumber(e.getSourceId());
      g.writeFieldName(OPCODE_FIELD_NAME);
      g.writeString(e.getOpcode().toString());
      g.writeFieldName("partId");
      g.writeNumber(Integer.valueOf(e.getPartitionId()));

      g.writeFieldName(VALUE_FIELD_NAME);
      writer.write(datum, new JsonEncoder(schema, g));
      g.writeEndObject();
      g.writeEndObject();
      try {
        g.writeEndObject();
      }
      catch (JsonGenerationException e_json) {
        // ignore the error as some how avro JsonEncoder may some times missing two }
      }
      g.flush();
    } catch (IOException e1) {
      LOG.error("event value serialization error; event = " + e, e1);
    }
  }

  @Override
  public void dumpEventValueInJSON(DbusEvent e, WritableByteChannel writeChannel)
  {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();

    try {
      dumpEventValueInJSON(e, baos);
      baos.write("\n".getBytes("UTF-8"));

      ByteBuffer writeBuffer = ByteBuffer.wrap(baos.toByteArray());
      writeChannel.write(writeBuffer);
    } catch (IOException e1) {
      LOG.error("event value serialization error; event = " + e, e1);
    }
  }

  @Override
  /**
   * @param e DatabusEvent
   * @return Avro Schema, sourceName, version tuple describing the payload data appearing in 'e'.
   */
  public VersionedSchema getPayloadSchema(DbusEvent e)
  {
    byte[] md5 = new byte[16];
    e.schemaId(md5);
    SchemaId schemaId = new SchemaId(md5);
    VersionedSchema writerSchema = _schemaSet.getById(schemaId);
    return writerSchema;
  }

  protected VersionedSchemaSet getSchemaSet()
  {
    return _schemaSet;
  }

  /**
   * Returns the single version of the metadata schema specified in the given event's header.
   * For INTERNAL USE ONLY (by Espresso and Databus).  This is not a stable API and may change
   * without warning!
   *
   * @param e DbusEvent
   * @return {AvroSchema, "metadata-source", version} tuple for given event 'e' with
   *         metadata-schema-id; null if event contains no metadata
   * @throws DatabusRuntimeException if event contains metadata but schema to decode it is missing
   */
  public VersionedSchema getMetadataSchema(DbusEvent e)
  {
    DbusEventPart metadataPart = e.getPayloadMetadataPart();
    if (null == metadataPart)
    {
      LOG.debug("No metadata for event " + e);
      return null;
    }

    VersionedSchema schema = getMetadataSchema(metadataPart);
    if (null == schema)
    {
      throw new DatabusRuntimeException("No schema available to decode metadata for event " + e);
    }

    return schema;
  }

  /**
   * Returns the single version of the metadata schema specified in the metadata portion of an
   * event's header.  For INTERNAL USE ONLY (by Espresso and Databus).  This is not a stable
   * API and may change without warning!
   *
   * @param metadataPart  metadata portion of a DbusEvent
   * @return {AvroSchema, "metadata-source", version} tuple for metadataPart, or null if
   *         is not available
   */
  public VersionedSchema getMetadataSchema(DbusEventPart metadataPart)
  {
    if (null == _metadataSchemaSet)
    {
      return null;
    }
    SchemaId id = new SchemaId(metadataPart.getSchemaDigest());
    return _metadataSchemaSet.getById(id);
  }

  /**
   * Returns the specified version of the metadata schema.  For INTERNAL USE ONLY (by Espresso
   * and Databus).  This is not a stable API and may change without warning!
   *
   * @param version  version number of the desired metadata schema
   * @return {AvroSchema, "metadata-source", version} tuple for given metadata schema version;
   #         null if none exists
   */
  public VersionedSchema getMetadataSchema(short version)
  {
    if (_metadataSchemaSet != null)
    {
      return _metadataSchemaSet.getSchemaByNameVersion(SchemaRegistryService.DEFAULT_METADATA_SCHEMA_SOURCE, version);
    }
    return null;
  }

  /**
   * Returns the latest version of the metadata schema.  For INTERNAL USE ONLY (by Espresso
   * and Databus).  This is not a stable API and may change without warning!
   *
   * @return {AvroSchema, "metadata-source", version} tuple of highest-numbered version of
   #         metadata schema; null if none exists
   */
  VersionedSchema getLatestMetadataSchema()
  {
    if (_metadataSchemaSet != null)
    {
      return _metadataSchemaSet.getLatestVersionByName(SchemaRegistryService.DEFAULT_METADATA_SCHEMA_SOURCE);
    }
    return null;
  }

  public void dumpMetadata(DbusEvent e, FileChannel writeChannel)
  {
    GenericRecord genericRecord = this.getMetadata(e, null);
    if( genericRecord == null ) //no metadata
      return;

    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    try
    {
      String metadataInfo = genericRecord.toString() + "\n";
      baos.write(metadataInfo.getBytes("UTF-8"));
      ByteBuffer writeBuffer = ByteBuffer.wrap(baos.toByteArray());
      writeChannel.write(writeBuffer);
    }
    catch (UnsupportedEncodingException e1)
    {
      LOG.error("event metadata serialization error; event = " + e + "; metadata = " + genericRecord, e1);
    }
    catch (IOException e1)
    {
      LOG.error("event metadata serialization error; event = " + e + "; metadata = " + genericRecord, e1);
    }
  }

}
TOP

Related Classes of com.linkedin.databus.client.DbusEventAvroDecoder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.