Package org.kiji.schema.impl

Source Code of org.kiji.schema.impl.AvroCellDecoder$FinalSchemaDecoder

/**
* (c) Copyright 2012 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.kiji.schema.impl;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.List;

import com.google.common.base.Preconditions;
import org.apache.avro.Schema;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DecoderFactory;

import org.kiji.annotations.ApiAudience;
import org.kiji.schema.DecodedCell;
import org.kiji.schema.InternalKijiError;
import org.kiji.schema.KijiCellDecoder;
import org.kiji.schema.KijiColumnName;
import org.kiji.schema.KijiSchemaTable;
import org.kiji.schema.NoSuchColumnException;
import org.kiji.schema.avro.AvroSchema;
import org.kiji.schema.avro.CellSchema;
import org.kiji.schema.layout.CellSpec;
import org.kiji.schema.layout.KijiTableLayout;
import org.kiji.schema.layout.SchemaTableAvroResolver;
import org.kiji.schema.util.ByteStreamArray;
import org.kiji.schema.util.BytesKey;
import org.kiji.schema.util.Hasher;

/**
* Base class for decoders that read Kiji cells encoded using Avro.
*
* Kiji cells are encoded as Extension records, although for efficiency reasons, this class
* provides a custom decoder that avoid copying bytes unnecessarily.
*
* @param <T> The type of the decoded cell data.
*/
@ApiAudience.Private
public abstract class AvroCellDecoder<T> implements KijiCellDecoder<T> {

  /** Schema decoder. */
  private final SchemaDecoder mSchemaDecoder;

  /**
   * Reader Avro schema. Null means use the writer schema.
   *
   * <p>
   *   May currently be null if the specific class for an Avro record is not available.
   *   This behavior is not intended and will be removed in the future.
   * </p>
   *
   * <p>
   *   With SCHEMA-295, users will be able to override the reader schema.
   *   Null allows a user to request the actual writer schema.
   * </p>
   */
  private final Schema mReaderSchema;

  // -----------------------------------------------------------------------------------------------

  /** Interface for schema decoders. */
  private interface SchemaDecoder {
    /**
     * Decodes a schema from a given byte stream encoded using a SchemaEncoder.
     *
     * @param bsa Byte stream to decode from.
     * @return the decoded schema.
     * @throws IOException on I/O error.
     */
    Schema decode(ByteStreamArray bsa) throws IOException;
  }

  /** Schema decoder for schema hashes. */
  private static class SchemaHashDecoder implements SchemaDecoder {
    private final KijiSchemaTable mSchemaTable;

    /**
     * Creates a decoder for schemas encoded as hashes.
     *
     * @param schemaTable SchemaTable to resolve schema hashes.
     */
    public SchemaHashDecoder(KijiSchemaTable schemaTable) {
      mSchemaTable = Preconditions.checkNotNull(schemaTable);
    }

    /** {@inheritDoc} */
    @Override
    public Schema decode(ByteStreamArray bstream) throws IOException {
      final BytesKey schemaHash = new BytesKey(bstream.readBytes(Hasher.HASH_SIZE_BYTES));
      final Schema schema = mSchemaTable.getSchema(schemaHash);
      if (null == schema) {
        throw new IOException(
            String.format("Schema with hash %s not found in schema table.", schemaHash));
      }
      return schema;
    }
  }

  /** Schema decoder for schema UIDs. */
  private static class SchemaIdDecoder implements SchemaDecoder {
    private final KijiSchemaTable mSchemaTable;

    /**
     * Creates a decoder for schemas encoded as UIDs.
     *
     * @param schemaTable SchemaTable to resolve UIDs.
     */
    public SchemaIdDecoder(KijiSchemaTable schemaTable) {
      mSchemaTable = Preconditions.checkNotNull(schemaTable);
    }

    /** {@inheritDoc} */
    @Override
    public Schema decode(ByteStreamArray bstream) throws IOException {
      final long schemaId = bstream.readVarInt64();
      final Schema schema = mSchemaTable.getSchema(schemaId);
      if (null == schema) {
        throw new IOException(
            String.format("Schema with ID %d not found in schema table.", schemaId));
      }
      return schema;
    }
  }

  /** Schema decoder for cells from final columns (schema is not encoded as part of the cell). */
  private static class FinalSchemaDecoder implements SchemaDecoder {
    private final Schema mSchema;

    /**
     * Creates a schema decoder for final columns.
     *
     * @param schema Schema to decode.
     */
    public FinalSchemaDecoder(Schema schema) {
      mSchema = Preconditions.checkNotNull(schema);
    }

    /** {@inheritDoc} */
    @Override
    public Schema decode(ByteStreamArray bsa) throws IOException {
      return mSchema;
    }
  }

  /**
   * Creates a schema decoder.
   *
   * @param cellSpec Specification of the cell encoding.
   * @return a new schema decoder.
   * @throws IOException on I/O error.
   */
  private static SchemaDecoder createSchemaDecoder(CellSpec cellSpec) throws IOException {
    switch (cellSpec.getCellSchema().getStorage()) {
      case HASH: return new SchemaHashDecoder(cellSpec.getSchemaTable());
      case UID: return new SchemaIdDecoder(cellSpec.getSchemaTable());
      case FINAL: return new FinalSchemaDecoder(cellSpec.getAvroSchema());
      default: throw new InternalKijiError(
            "Unexpected cell storage in cell schema: " + cellSpec.getCellSchema());
    }
  }

  /**
   * Get the Avro Schema for a given column marked FINAL in a given layout. This will either be
   * defined INLINE as a Schema JSON or it will be the only writer schema in the layout 1.3+
   * Avro writers field.
   *
   * @param layout table layout from which to get the Schema for the given column.
   * @param column column for which to get the Schema.
   * @return Avro Schema of the given column.
   * @throws NoSuchColumnException in case the column does not exist.
   */
  private static Schema getSchemaForFinalColumn(KijiTableLayout layout, KijiColumnName column)
      throws NoSuchColumnException {
    final List<AvroSchema> writers =
        layout.getCellSchema(column).getWriters();
    if (null == writers) {
      return new Schema.Parser().parse(layout.getCellSchema(column).getValue());
    } else {
      Preconditions.checkState(writers.size() == 1,
          "FINAL columns must have exactly one writer schema, found: " + writers);
      return new SchemaTableAvroResolver(layout.getSchemaTable()).apply(writers.get(0));
    }
  }

  /**
   * Creates a schema decoder.
   *
   * @param layout KijiTableLayout from which to get storage information.
   * @param spec Specification of the cell encoding.
   * @return a new schema decoder.
   * @throws IOException on I/O error.
   */
  private static SchemaDecoder createSchemaDecoder(KijiTableLayout layout,
      BoundColumnReaderSpec spec) throws IOException {
    final CellSchema cellSchema = layout.getCellSchema(spec.getColumn());

    switch (cellSchema.getStorage()) {
      case HASH: {
        return new SchemaHashDecoder(layout.getSchemaTable());
      }
      case UID: {
        return new SchemaIdDecoder(layout.getSchemaTable());
      }
      case FINAL: {
        switch (spec.getColumnReaderSpec().getAvroReaderSchemaType()) {
          case DEFAULT:
          case WRITER: {
            return new FinalSchemaDecoder(getSchemaForFinalColumn(layout, spec.getColumn()));
          }
          case EXPLICIT: {
            return new FinalSchemaDecoder(spec.getColumnReaderSpec().getAvroReaderSchema());
          }
          default: {
            throw new InternalKijiError(
                "Unknown Avro reader schema type from reader spec: " + spec.getColumnReaderSpec());
          }
        }
      }
      default: {
        throw new InternalKijiError(
            "Unknown cell schema storage from CellSchema: " + cellSchema);
      }
    }
  }

  /**
   * Get the Avro Schema to use to decode cells.
   *
   * @param layout KijiTableLayout from which to get storage information.
   * @param spec Specification of the cell encoding.
   * @return the Avro Schema to use to decode cells.
   * @throws IOException in case the specified column does not exist.
   */
  private static Schema getReaderSchema(KijiTableLayout layout, BoundColumnReaderSpec spec)
      throws IOException {
    switch (spec.getColumnReaderSpec().getAvroReaderSchemaType()) {
      case DEFAULT: {
        final CellSchema cellSchema = layout.getCellSchema(spec.getColumn());
        final SchemaTableAvroResolver resolver =
            new SchemaTableAvroResolver(layout.getSchemaTable());
        return resolver.apply(cellSchema.getDefaultReader());
      }
      case EXPLICIT: return spec.getColumnReaderSpec().getAvroReaderSchema();
      case WRITER: return null;
      default: throw new InternalKijiError(
          "Unknown AvroReaderSchemaType: " + spec.getColumnReaderSpec().getAvroReaderSchemaType());
    }
  }

  // -----------------------------------------------------------------------------------------------

  /**
   * Initializes an abstract KijiAvroCellDecoder.
   *
   * @param cellSpec Specification of the cell encoding.
   * @throws IOException on I/O error.
   */
  protected AvroCellDecoder(CellSpec cellSpec) throws IOException {
    Preconditions.checkNotNull(cellSpec);
    Preconditions.checkArgument(cellSpec.isAvro());
    mSchemaDecoder = createSchemaDecoder(cellSpec);
    mReaderSchema = cellSpec.getAvroSchema();
  }

  /**
   * Initializes an abstract KijiAvroCellDecoder.
   *
   * @param layout KijiTableLayout from which to get storage information.
   * @param spec Specification of the cell encoding.
   * @throws IOException on I/O error.
   */
  protected AvroCellDecoder(KijiTableLayout layout, BoundColumnReaderSpec spec) throws IOException {
    mSchemaDecoder = createSchemaDecoder(layout, spec);
    mReaderSchema = getReaderSchema(layout, spec);
  }

  /**
   * Factory for DatumReader instances.
   *
   * Sub-classes must create DatumReader implementations for specific or generic records.
   *
   * @param writer Writer schema.
   * @param reader Reader schema.
   * @return a new DatumReader instance for the specified writer/reader schema combination.
   */
  protected abstract DatumReader<T> createDatumReader(Schema writer, Schema reader);

  /** {@inheritDoc} */
  @Override
  public DecodedCell<T> decodeCell(byte[] encodedBytes) throws IOException {
    return decode(encodedBytes, null);
  }

  /** {@inheritDoc} */
  @Override
  public T decodeValue(byte[] bytes) throws IOException {
    return decodeCell(bytes).getData();
  }

  /**
   * Decodes the serialized bytes into a KijiCell. If reuse is non-null, the implementation may fill
   * it and return it as the KijiCell data payload.
   *
   * @param bytes The bytes from an HBase table cell.
   * @param reuse If non-null, may be filled with the decoded data and used as the data payload in
   *          the return value.
   * @return The decoded KijiCell.
   * @throws IOException If there is an error.
   */
  private DecodedCell<T> decode(byte[] bytes, T reuse) throws IOException {
    final ByteStreamArray byteStream = new ByteStreamArray(bytes);
    final Schema writerSchema = mSchemaDecoder.decode(byteStream);
    final Schema readerSchema = (mReaderSchema != null) ? mReaderSchema : writerSchema;
    final ByteBuffer binaryData =
        ByteBuffer.wrap(bytes, byteStream.getOffset(), bytes.length - byteStream.getOffset());
    final T data = decodeAvro(binaryData, writerSchema, readerSchema, reuse);
    return new DecodedCell<T>(writerSchema, readerSchema, data);
  }

  /**
   * Gets the portion of the encoded byte array from an HBase table cell that has the avro-encoded
   * data payload.
   *
   * @param bytes The bytes from an HBase table cell.
   * @return the portion of the encoded byte array that contains the binary-encoded avro message.
   * @throws IOException on I/O error (eg. decoding error).
   */
  public ByteBuffer getPayload(byte[] bytes) throws IOException {
    final ByteStreamArray byteStream = new ByteStreamArray(bytes);
    // Decode the writer schema and throw it away:
    mSchemaDecoder.decode(byteStream);
    return ByteBuffer.wrap(bytes, byteStream.getOffset(), bytes.length - byteStream.getOffset());
  }

  /**
   * Decodes the data payload given the reader and writer schema. If reuse is non-null, the
   * implementation may fill it and return that object.
   *
   * @param encodedData The avro-encoded bytes of the data payload.
   * @param writerSchema The schema that was used to encode the data.
   * @param readerSchema The schema that is expected by the reader.
   * @param reuse An optional object to be filled and returned to save on object construction
   *     (may be null).
   * @return The decoded avro object.
   * @throws IOException If there is an error.
   */
  protected T decodeAvro(
      ByteBuffer encodedData,
      Schema writerSchema,
      Schema readerSchema,
      T reuse)
      throws IOException {
    final DatumReader<T> reader = createDatumReader(writerSchema, readerSchema);
    return reader.read(reuse,
        DecoderFactory.get().binaryDecoder(
            encodedData.array(),
            encodedData.position(),
            encodedData.limit() - encodedData.position(),
            null));
  }
}
TOP

Related Classes of org.kiji.schema.impl.AvroCellDecoder$FinalSchemaDecoder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.