Package org.kiji.schema

Source Code of org.kiji.schema.KijiSchemaTable

/**
* (c) Copyright 2012 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.kiji.schema;

import java.io.Closeable;
import java.io.Flushable;
import java.io.IOException;
import java.util.Collections;
import java.util.Map;

import com.google.common.base.Objects;
import org.apache.avro.Schema;
import org.apache.avro.util.WeakIdentityHashMap;
import org.apache.commons.lang.builder.HashCodeBuilder;

import org.kiji.annotations.ApiAudience;
import org.kiji.annotations.ApiStability;
import org.kiji.annotations.Inheritance;
import org.kiji.schema.avro.SchemaTableBackup;
import org.kiji.schema.util.BytesKey;
import org.kiji.schema.util.Hasher;

/**
* The Kiji schema table, which contains the lookup table between schema IDs, hashes, and full
* schemas.
*
* @see KijiMetaTable
* @see KijiSystemTable
*/
@ApiAudience.Framework
@ApiStability.Stable
@Inheritance.Sealed
public interface KijiSchemaTable extends Flushable, Closeable {
  /**
   * Looks up a schema ID given an Avro schema object.
   *
   * If the schema is unknown, allocates a new ID and stores the new schema mapping.
   *
   * @param schema The full schema to store in the table.
   * @return The schema ID.
   * @throws IOException on I/O error.
   */
  long getOrCreateSchemaId(Schema schema) throws IOException;

  /**
   * Looks up a schema hash given an Avro schema object.
   *
   * If the schema is unknown, allocates a new ID and stores the new schema mapping.
   *
   * @param schema Avro schema to look up.
   * @return The schema hash.
   * @throws IOException on I/O error.
   */
  BytesKey getOrCreateSchemaHash(Schema schema) throws IOException;

  /**
   * Computes a schema hash.
   *
   * @param schema The Avro schema to hash.
   * @return The hash of the schema
   */
  BytesKey getSchemaHash(Schema schema);

  /**
   * Looks up a schema given an ID.
   *
   * @param schemaId Schema ID to look up.
   * @return Avro schema, or null if the schema ID is unknown.
   * @throws IOException on I/O error.
   */
  Schema getSchema(long schemaId) throws IOException;

  /**
   * Looks up a schema given a hash.
   *
   * @param schemaHash Schema hash to look up.
   * @return Avro schema, or null if the schema hash is unknown.
   * @throws IOException on I/O error.
   */
  Schema getSchema(BytesKey schemaHash) throws IOException;

  /**
   * Looks up a schema entry from a given ID.
   *
   * @param schemaId Schema ID to look up.
   * @return SchemaEntry matching the given ID or null if the ID is unknown.
   * @throws IOException on I/O error.
   */
  SchemaEntry getSchemaEntry(long schemaId) throws IOException;

  /**
   * Looks up a schema entry from a given hash.
   *
   * @param schemaHash Schema hash to look up.
   * @return SchemaEntry matching the given hash or null if the hash is unknown.
   * @throws IOException on I/O error.
   */
  SchemaEntry getSchemaEntry(BytesKey schemaHash) throws IOException;

  /**
   * Looks up a schema entry from a given Schema definition.
   *
   * @param schema Schema definition to look up.
   * @return SchemaEntry matching the given Schema or null if the schema is unknown.
   * @throws IOException on I/O error.
   */
  SchemaEntry getSchemaEntry(Schema schema) throws IOException;

  /** Association between a schema and its ID. */
  public static class SchemaEntry {
    private final long mId;
    private final BytesKey mHash;
    private final Schema mSchema;

    /**
     * Creates a new schema entry.
     *
     * @param id the schema ID
     * @param hash the schema hash
     * @param schema the Avro schema object
     */
    @ApiAudience.Private
    public SchemaEntry(long id, BytesKey hash, Schema schema) {
      this.mId = id;
      this.mHash = hash;
      this.mSchema = schema;
    }

    /** @return the schema ID */
    public long getId() {
      return this.mId;
    }

    /** @return the schema hash */
    public BytesKey getHash() {
      return this.mHash;
    }

    /** @return the Avro schema object */
    public Schema getSchema() {
      return this.mSchema;
    }

    /** {@inheritDoc} */
    @Override
    public String toString() {
      return Objects.toStringHelper(SchemaEntry.class)
                 .add("id", this.mId)
                 .add("hash", this.mHash)
                 .add("schema", this.mSchema)
                 .toString();
    }

    /** {@inheritDoc} */
    @Override
    public boolean equals(Object other) {
      if (!(other instanceof SchemaEntry)) {
        return false;
      }
      final SchemaEntry entry = (SchemaEntry) other;
      return (this.mId == entry.mId)
                 && (this.mHash.equals(entry.mHash))
                 && (this.mSchema.equals(entry.mSchema));
    }

    /** {@inheritDoc} */
    @Override
    public int hashCode() {
      return new HashCodeBuilder()
                 .append(mId)
                 .append(mHash)
                 .append(mSchema)
                 .toHashCode();
    }
  }

  /**
   * Cache providing an efficient mapping from Avro schema object to the schema hash.
   *
   * Computing the hash code of a schema is expensive as it serializes the Avro schema object
   * into JSON.
   */
  @ApiAudience.Private
  static final class SchemaHashCache {
    /**
     * Underlying cache is a weak identity hash map:
     * <li> We must use object IDs since Schema.hashCode() and Schema.equals() implement a
     *      comparison that ignores doc fields or default values.
     * <li> We must use a weak map to ensure the cache gets garbage collected properly.
     */
    private final Map<Schema, BytesKey> mCache =
        Collections.synchronizedMap(new WeakIdentityHashMap<Schema, BytesKey>());

    /**
     * Computes a hash of the specified Avro schema.
     *
     * Kiji currently uses MD5 sums (128 bits) of the schema JSON representation.
     *
     * @param schema Avro schema to compute a hash of.
     * @return Hash code as an array of bytes (16 bytes).
     */
    public static byte[] hashSchema(Schema schema) {
      return Hasher.hash(schema.toString());
    }

    /**
     * Hashes an Avro schema.
     *
     * @param schema Avro schema to hash.
     * @return the schema hash.
     */
    public BytesKey getHash(Schema schema) {
      final BytesKey hash = mCache.get(schema);
      if (null != hash) {
        return hash;
      }
      final BytesKey newHash = new BytesKey(hashSchema(schema));
      mCache.put(schema, newHash);
      return newHash;
    }
  }

  /**
   * Returns schema backup information in a form that can be directly written to a MetadataBackup
   * record. To read more about the avro type that has been specified to store this info, see
   * Layout.avdl
   *
   * @throws IOException on I/O error.
   * @return A list of schema table entries.
   */
  SchemaTableBackup toBackup() throws IOException;

  /**
   * Restores the schema entries from the specified backup record.
   *
   * @param backup The schema entries from a MetadataBackup record. This consist of the schema
   *     definition, schema id, and schema hash.
   * @throws IOException on I/O error.
   */
  void fromBackup(SchemaTableBackup backup) throws IOException;
}
TOP

Related Classes of org.kiji.schema.KijiSchemaTable

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.