Package org.kiji.schema.util

Source Code of org.kiji.schema.util.AvroUtils$SchemaPairCompatibility

/**
* (c) Copyright 2013 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.kiji.schema.util;

import java.io.IOException;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import com.google.common.base.Joiner;
import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.Schema.Type;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.kiji.annotations.ApiAudience;
import org.kiji.schema.InternalKijiError;
import org.kiji.schema.KijiSchemaTable;
import org.kiji.schema.avro.AvroSchema;
import org.kiji.schema.layout.AvroSchemaResolver;
import org.kiji.schema.layout.SchemaTableAvroResolver;

/**
* General purpose Avro utilities.
*/
@ApiAudience.Private
public final class AvroUtils {
  private static final Logger LOG = LoggerFactory.getLogger(AvroUtils.class);

  /** Utility class cannot be instantiated. */
  private AvroUtils() {
  }

  /**
   * Compares two AvroSchema objects for equality within the context of the given SchemaTable.
   *
   * @param schemaTable SchemaTable with which to resolve schema UIDs.
   * @param first one AvroSchema object to compare for equality.
   * @param second another AvroSchema object to compare for equality.
   * @return whether the two objects represent the same Schema.
   * @throws IOException in case of an error reading from the SchemaTable.
   */
  public static boolean avroSchemaEquals(
      final KijiSchemaTable schemaTable,
      final AvroSchema first,
      final AvroSchema second
  ) throws IOException {
    final AvroSchemaResolver resolver = new SchemaTableAvroResolver(schemaTable);

    return Objects.equal(resolver.apply(first), resolver.apply(second));
  }

  /**
   * Check whether a collection of AvroSchema objects contains a given AvroSchema element, resolving
   * UIDs using the given KijiSchemaTable.
   *
   * @param schemaTable KijiSchemaTable with which to resolve schema UIDs.
   * @param schemaCollection collection of AvroSchemas to check for the presence of the given
   *     element.
   * @param element AvroSchema for whose presence to check in schemaCollection.
   * @return whether schemaCollection contains element after resolving UIDs using schemaTable.
   * @throws IOException in case of an error reading from the schema table.
   */
  public static boolean avroSchemaCollectionContains(
      final KijiSchemaTable schemaTable,
      final Collection<AvroSchema> schemaCollection,
      final AvroSchema element
  ) throws IOException {
    for (AvroSchema schema : schemaCollection) {
      if (avroSchemaEquals(schemaTable, schema, element)) {
        return true;
      }
    }
    // If none match, return false.
    return false;
  }

  /**
   * Reports whether the given schema is an optional type (ie. a union { null, Type }).
   *
   * @param schema The schema to test.
   * @return the optional type, if the specified schema describes an optional type, null otherwise.
   */
  public static Schema getOptionalType(Schema schema) {
    Preconditions.checkArgument(schema.getType() == Schema.Type.UNION);
    final List<Schema> types = schema.getTypes();
    if (types.size() != 2) {
      return null;
    }
    if (types.get(0).getType() == Schema.Type.NULL) {
      return types.get(1);
    } else if (types.get(1).getType() == Schema.Type.NULL) {
      return types.get(0);
    } else {
      return null;
    }
  }

  /**
   * Validates that the provided reader schemas can be used to decode data written with the provided
   * writer schema.
   *
   * @param readers that must be able to be used to decode data encoded with the provided writer
   *     schema.
   * @param writer schema to check.
   * @return a list of compatibility results.
   */
  public static SchemaSetCompatibility checkWriterCompatibility(
      Iterator<Schema> readers,
      Schema writer) {
    final List<SchemaPairCompatibility> results = Lists.newArrayList();
    while (readers.hasNext()) {
      // Check compatibility between each reader/writer pair.
      results.add(checkReaderWriterCompatibility(readers.next(), writer));
    }
    return new SchemaSetCompatibility(results);
  }

  /**
   * Validates that the provided reader schema can read data written with the provided writer
   * schemas.
   *
   * @param reader schema to check.
   * @param writers that must be compatible with the provided reader schema.
   * @return a list of compatibility results.
   */
  public static SchemaSetCompatibility checkReaderCompatibility(
      Schema reader,
      Iterator<Schema> writers) {
    final List<SchemaPairCompatibility> results = Lists.newArrayList();
    while (writers.hasNext()) {
      // Check compatibility between each reader/writer pair.
      results.add(checkReaderWriterCompatibility(reader, writers.next()));
    }
    return new SchemaSetCompatibility(results);
  }

  /** Message to annotate reader/writer schema pairs that are compatible. */
  public static final String READER_WRITER_COMPATIBLE_MESSAGE =
      "Reader schema can always successfully decode data written using the writer schema.";

  /**
   * Validates that the provided reader schema can be used to decode avro data written with the
   * provided writer schema.
   *
   * @param reader schema to check.
   * @param writer schema to check.
   * @return a result object identifying any compatibility errors.
   */
  public static SchemaPairCompatibility checkReaderWriterCompatibility(
      final Schema reader,
      final Schema writer
  ) {
    final SchemaCompatibilityType compatibility =
        new ReaderWriterCompatiblityChecker()
            .getCompatibility(reader, writer);

    final String message;
    switch (compatibility) {
      case INCOMPATIBLE: {
        message = String.format(
            "Data encoded using writer schema:%n%s%n"
            + "will or may fail to decode using reader schema:%n%s%n",
            writer.toString(true),
            reader.toString(true));
        break;
      }
      case COMPATIBLE: {
        message = READER_WRITER_COMPATIBLE_MESSAGE;
        break;
      }
      default: throw new InternalKijiError("Unknown compatibility: " + compatibility);
    }

    return new SchemaPairCompatibility(
        compatibility,
        reader,
        writer,
        message);
  }

  // -----------------------------------------------------------------------------------------------

  /**
   * Tests the equality of two Avro named schemas.
   *
   * <p> Matching includes reader name aliases. </p>
   *
   * @param reader Named reader schema.
   * @param writer Named writer schema.
   * @return whether the names of the named schemas match or not.
   */
  public static boolean schemaNameEquals(final Schema reader, final Schema writer) {
    final String writerFullName = writer.getFullName();
    if (Objects.equal(reader.getFullName(), writerFullName)) {
      return true;
    }
    // Apply reader aliases:
    if (reader.getAliases().contains(writerFullName)) {
      return true;
    }
    return false;
  }

  /**
   * Identifies the writer field that corresponds to the specified reader field.
   *
   * <p> Matching includes reader name aliases. </p>
   *
   * @param writerSchema Schema of the record where to look for the writer field.
   * @param readerField Reader field to identify the corresponding writer field of.
   * @return the writer field, if any does correspond, or None.
   */
  public static Field lookupWriterField(final Schema writerSchema, final Field readerField) {
    Preconditions.checkArgument(writerSchema.getType() == Type.RECORD);
    final List<Field> writerFields = Lists.newArrayList();
    final Field direct = writerSchema.getField(readerField.name());
    if (direct != null) {
      writerFields.add(direct);
    }
    for (final String readerFieldAliasName : readerField.aliases()) {
      final Field writerField = writerSchema.getField(readerFieldAliasName);
      if (writerField != null) {
        writerFields.add(writerField);
      }
    }
    switch (writerFields.size()) {
      case 0: return null;
      case 1: return writerFields.get(0);
      default: {
        // TODO Report this properly to the user.
        LOG.error("Reader record field {} matches multiple fields in the writer record: {}",
            readerField, Joiner.on(",").join(writerFields));
        // Pick the first writer field that matches:
        return writerFields.get(0);
      }
    }
  }

  /**
   * Reader/writer schema pair that can be used as a key in a hash map.
   *
   * This reader/writer pair differentiates Schema objects based on their system hash code.
   */
  private static final class ReaderWriter {
    private final Schema mReader;
    private final Schema mWriter;

    /**
     * Initializes a new reader/writer pair.
     *
     * @param reader Reader schema.
     * @param writer Writer schema.
     */
    public ReaderWriter(final Schema reader, final Schema writer) {
      mReader = reader;
      mWriter = writer;
    }

    /**
     * Returns the reader schema in this pair.
     * @return the reader schema in this pair.
     */
    public Schema getReader() {
      return mReader;
    }

    /**
     * Returns the writer schema in this pair.
     * @return the writer schema in this pair.
     */
    public Schema getWriter() {
      return mWriter;
    }

    /** {@inheritDoc} */
    @Override
    public int hashCode() {
      return System.identityHashCode(mReader) ^ System.identityHashCode(mWriter);
    }

    /** {@inheritDoc} */
    @Override
    public boolean equals(Object obj) {
      if (!(obj instanceof ReaderWriter)) {
        return false;
      }
      final ReaderWriter that = (ReaderWriter) obj;
      // Use pointer comparison here:
      return (this.mReader == that.mReader)
          && (this.mWriter == that.mWriter);
    }

    /** {@inheritDoc} */
    @Override
    public String toString() {
      return Objects.toStringHelper(ReaderWriter.class)
          .add("reader", mReader)
          .add("writer", mWriter)
          .toString();
    }
  }

  /**
   * Determines the compatibility of a reader/writer schema pair.
   *
   * <p> Provides memoization to handle recursive schemas. </p>
   */
  private static final class ReaderWriterCompatiblityChecker {
    private final Map<ReaderWriter, SchemaCompatibilityType> mMemoizeMap = Maps.newHashMap();

    /**
     * Reports the compatibility of a reader/writer schema pair.
     *
     * <p> Memoizes the compatibility results. </p>
     *
     * @param reader Reader schema to test.
     * @param writer Writer schema to test.
     * @return the compatibility of the reader/writer schema pair.
     */
    public SchemaCompatibilityType getCompatibility(
        final Schema reader,
        final Schema writer
    ) {
      LOG.debug("Checking compatibility of reader {} with writer {}", reader, writer);
      final ReaderWriter pair = new ReaderWriter(reader, writer);
      final SchemaCompatibilityType existing = mMemoizeMap.get(pair);
      if (existing != null) {
        if (existing == SchemaCompatibilityType.RECURSION_IN_PROGRESS) {
          // Break the recursion here.
          // schemas are compatible unless proven incompatible:
          return SchemaCompatibilityType.COMPATIBLE;
        }
        return existing;
      }
      // Mark this reader/writer pair as "in progress":
      mMemoizeMap.put(pair, SchemaCompatibilityType.RECURSION_IN_PROGRESS);
      final SchemaCompatibilityType calculated = calculateCompatibility(reader, writer);
      mMemoizeMap.put(pair, calculated);
      return calculated;
    }

    /**
     * Calculates the compatibility of a reader/writer schema pair.
     *
     * <p>
     * Relies on external memoization performed by {@link #getCompatibility(Schema, Schema)}.
     * </p>
     *
     * @param reader Reader schema to test.
     * @param writer Writer schema to test.
     * @return the compatibility of the reader/writer schema pair.
     */
    private SchemaCompatibilityType calculateCompatibility(
        final Schema reader,
        final Schema writer
    ) {
      Preconditions.checkNotNull(reader);
      Preconditions.checkNotNull(writer);

      if (reader.getType() == writer.getType()) {
        switch (reader.getType()) {
          case NULL:
          case BOOLEAN:
          case INT:
          case LONG:
          case FLOAT:
          case DOUBLE:
          case BYTES:
          case STRING: {
            return SchemaCompatibilityType.COMPATIBLE;
          }
          case ARRAY: {
            return getCompatibility(reader.getElementType(), writer.getElementType());
          }
          case MAP: {
            return getCompatibility(reader.getValueType(), writer.getValueType());
          }
          case FIXED: {
            // fixed size and name must match:
            if (!schemaNameEquals(reader, writer)) {
              return SchemaCompatibilityType.INCOMPATIBLE;
            }
            if (reader.getFixedSize() != writer.getFixedSize()) {
              return SchemaCompatibilityType.INCOMPATIBLE;
            }
            return SchemaCompatibilityType.COMPATIBLE;
          }
          case ENUM: {
            // enum names must match:
            if (!schemaNameEquals(reader, writer)) {
              return SchemaCompatibilityType.INCOMPATIBLE;
            }
            // reader symbols must contain all writer symbols:
            final Set<String> symbols = Sets.newHashSet(writer.getEnumSymbols());
            symbols.removeAll(reader.getEnumSymbols());
            // TODO: Report a human-readable error.
            // if (!symbols.isEmpty()) {
            // }
            return symbols.isEmpty()
                ? SchemaCompatibilityType.COMPATIBLE
                : SchemaCompatibilityType.INCOMPATIBLE;
          }
          case RECORD: {
            // record names must match:
            if (!schemaNameEquals(reader, writer)) {
              return SchemaCompatibilityType.INCOMPATIBLE;
            }

            // Check that each field in the reader record can be populated from the writer record:
            for (final Field readerField : reader.getFields()) {
              final Field writerField = lookupWriterField(writer, readerField);
              if (writerField == null) {
                // Reader field does not correspond to any field in the writer record schema,
                // reader field must have a default value.
                if (readerField.defaultValue() == null) {
                  // reader field has no default value
                  return SchemaCompatibilityType.INCOMPATIBLE;
                }
              } else {
                if (getCompatibility(readerField.schema(), writerField.schema())
                    == SchemaCompatibilityType.INCOMPATIBLE) {
                  return SchemaCompatibilityType.INCOMPATIBLE;
                }
              }
            }

            // All fields in the reader record can be populated from the writer record:
            return SchemaCompatibilityType.COMPATIBLE;
          }
          case UNION: {
            // Check that each individual branch of the writer union can be decoded:
            for (final Schema writerBranch : writer.getTypes()) {
              if (getCompatibility(reader, writerBranch) == SchemaCompatibilityType.INCOMPATIBLE) {
                return SchemaCompatibilityType.INCOMPATIBLE;
              }
            }
            // Each schema in the writer union can be decoded with the reader:
            return SchemaCompatibilityType.COMPATIBLE;
          }

          default: {
            throw new InternalKijiError("Unknown schema type: " + reader.getType());
          }
        }

      } else {
        // Reader and writer have different schema types:

        // Handle the corner case where writer is a union of a singleton branch: { X } === X
        if ((writer.getType() == Schema.Type.UNION)
            && writer.getTypes().size() == 1) {
          return getCompatibility(reader, writer.getTypes().get(0));
        }

        switch (reader.getType()) {
          case NULL: return SchemaCompatibilityType.INCOMPATIBLE;
          case BOOLEAN: return SchemaCompatibilityType.INCOMPATIBLE;
          case INT: return SchemaCompatibilityType.INCOMPATIBLE;
          case LONG: {
            return (writer.getType() == Type.INT)
                ? SchemaCompatibilityType.COMPATIBLE
                : SchemaCompatibilityType.INCOMPATIBLE;
          }
          case FLOAT: {
            return ((writer.getType() == Type.INT)
                || (writer.getType() == Type.LONG))
                ? SchemaCompatibilityType.COMPATIBLE
                : SchemaCompatibilityType.INCOMPATIBLE;

          }
          case DOUBLE: {
            return ((writer.getType() == Type.INT)
                || (writer.getType() == Type.LONG)
                || (writer.getType() == Type.FLOAT))
                ? SchemaCompatibilityType.COMPATIBLE
                : SchemaCompatibilityType.INCOMPATIBLE;
          }
          case BYTES: return SchemaCompatibilityType.INCOMPATIBLE;
          case STRING: return SchemaCompatibilityType.INCOMPATIBLE;
          case ARRAY: return SchemaCompatibilityType.INCOMPATIBLE;
          case MAP: return SchemaCompatibilityType.INCOMPATIBLE;
          case FIXED: return SchemaCompatibilityType.INCOMPATIBLE;
          case ENUM: return SchemaCompatibilityType.INCOMPATIBLE;
          case RECORD: return SchemaCompatibilityType.INCOMPATIBLE;
          case UNION: {
            for (final Schema readerBranch : reader.getTypes()) {
              if (getCompatibility(readerBranch, writer) == SchemaCompatibilityType.COMPATIBLE) {
                return SchemaCompatibilityType.COMPATIBLE;
              }
            }
            // No branch in the reader union has been found compatible with the writer schema:
            return SchemaCompatibilityType.INCOMPATIBLE;
          }

          default: {
            throw new InternalKijiError("Unknown schema type: " + reader.getType());
          }
        }
      }
    }
  }

  /**
   * Identifies the type of a schema compatibility result.
   */
  public static enum SchemaCompatibilityType {
    COMPATIBLE,
    INCOMPATIBLE,

    /** Used internally to tag a reader/writer schema pair and prevent recursion. */
    RECURSION_IN_PROGRESS;
  }

  // -----------------------------------------------------------------------------------------------

  /**
   * Provides information about the compatibility of a single schema with a set of schemas.
   */
  public static final class SchemaSetCompatibility {
    /** Whether or not the schemas are compatible. */
    private final SchemaCompatibilityType mType;

    /** The compatibilities of each schema pair. */
    private final List<SchemaPairCompatibility> mCauses;

    /**
     * Constructs a new instance.
     *
     * @param causes identifying the compatibilities of each schema pair.
     */
    public SchemaSetCompatibility(List<SchemaPairCompatibility> causes) {
      SchemaCompatibilityType isCompatible = SchemaCompatibilityType.COMPATIBLE;
      for (SchemaPairCompatibility compatibility : causes) {
        if (compatibility.getType() == SchemaCompatibilityType.INCOMPATIBLE) {
          isCompatible = SchemaCompatibilityType.INCOMPATIBLE;
          break;
        }
      }

      mType = isCompatible;
      mCauses = causes;
    }

    /**
     * Whether or not the schemas are compatible.
     *
     * @return whether or not the schemas are compatible.
     */
    public SchemaCompatibilityType getType() {
      return mType;
    }

    /**
     * Returns the compatibility of each schema pair.
     *
     * @return the compatibility of each schema pair.
     */
    public List<SchemaPairCompatibility> getCauses() {
      return mCauses;
    }

    @Override
    public String toString() {
      return Objects.toStringHelper(SchemaSetCompatibility.class)
          .add("type", mType)
          .add("cause", mCauses)
          .toString();
    }
  }

  // -----------------------------------------------------------------------------------------------

  /**
   * Provides information about the compatibility of a single reader and writer schema pair.
   *
   * Note: This class represents a one-way relationship from the reader to the writer schema.
   */
  public static final class SchemaPairCompatibility {
    /** The type of this result. */
    private final SchemaCompatibilityType mType;

    /** Validated reader schema. */
    private final Schema mReader;

    /** Validated writer schema. */
    private final Schema mWriter;

    /** Human readable description of this result. */
    private final String mDescription;

    /**
     * Constructs a new instance.
     *
     * @param type of the schema compatibility.
     * @param reader schema that was validated.
     * @param writer schema that was validated.
     * @param description of this compatibility result.
     */
    public SchemaPairCompatibility(
        SchemaCompatibilityType type,
        Schema reader,
        Schema writer,
        String description) {
      mType = type;
      mReader = reader;
      mWriter = writer;
      mDescription = description;
    }

    /**
     * Gets the type of this result.
     *
     * @return the type of this result.
     */
    public SchemaCompatibilityType getType() {
      return mType;
    }

    /**
     * Gets the reader schema that was validated.
     *
     * @return reader schema that was validated.
     */
    public Schema getReader() {
      return mReader;
    }

    /**
     * Gets the writer schema that was validated.
     *
     * @return writer schema that was validated.
     */
    public Schema getWriter() {
      return mWriter;
    }

    /**
     * Gets a human readable description of this validation result.
     *
     * @return a human readable description of this validation result.
     */
    public String getDescription() {
      return mDescription;
    }

    /** {@inheritDoc} */
    @Override
    public String toString() {
      return Objects.toStringHelper(this.getClass())
          .add("type", mType)
          .add("readerSchema", mReader)
          .add("writerSchema", mWriter)
          .add("description", mDescription)
          .toString();
    }

    /** {@inheritDoc} */
    @Override
    public boolean equals(Object other) {
      if ((null != other) && (other instanceof SchemaPairCompatibility)) {
        final SchemaPairCompatibility result = (SchemaPairCompatibility) other;
        return Objects.equal(result.mType, mType)
            && Objects.equal(result.mReader, mReader)
            && Objects.equal(result.mWriter, mWriter)
            && Objects.equal(result.mDescription, mDescription);
      } else {
        return false;
      }
    }

    /** {@inheritDoc} */
    @Override
    public int hashCode() {
      return Objects.hashCode(mType, mReader, mWriter, mDescription);
    }
  }
}
TOP

Related Classes of org.kiji.schema.util.AvroUtils$SchemaPairCompatibility

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.