Package com.odiago.flumebase.io

Source Code of com.odiago.flumebase.io.AvroEventParser

/**
* Licensed to Odiago, Inc. under one or more contributor license
* agreements.  See the NOTICE.txt file distributed with this work for
* additional information regarding copyright ownership.  Odiago, Inc.
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the
* License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
* License for the specific language governing permissions and limitations
* under the License.
*/

package com.odiago.flumebase.io;

import java.io.IOException;

import java.util.List;
import java.util.Map;

import org.apache.avro.AvroRuntimeException;
import org.apache.avro.Schema;

import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;

import org.apache.avro.io.BinaryDecoder;
import org.apache.avro.io.DecoderFactory;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.cloudera.flume.core.Event;

import com.odiago.flumebase.exec.StreamSymbol;

import com.odiago.flumebase.lang.PreciseType;
import com.odiago.flumebase.lang.Type;

import com.odiago.flumebase.parser.TypedField;

public class AvroEventParser extends EventParser {
  private static final Logger LOG = LoggerFactory.getLogger(
      AvroEventParser.class.getName());

  public static final String SCHEMA_PARAM = "schema";

  /** Configuration parameters. */
  private Map<String, String> mParams;

  /** Current event we're parsing. */
  private Event mEvent;

  /** Schema for input events */
  private Schema mSchema;

  /** Current event deserialized into a generic data record */
  private GenericData.Record mRecord;

  private boolean mIsDecoded; // true if the mEvent is deserialized into mRecord.

  // Avro parsing utility objects below.

  private DecoderFactory mDecoderFactory;
  private BinaryDecoder mDecoder;
  private GenericDatumReader<GenericData.Record> mDatumReader;

  /**
   * Creates a new AvroEventParser, with a string--string parameter map specified
   * by the user who created the stream we are parsing.
   */
  public AvroEventParser(Map<String, String> params) {
    mParams = params;

    // Initialize avro decoder.
    String schemaStr = mParams.get(SCHEMA_PARAM);
    if (null != schemaStr) {
      // If schemaStr is null, validate() will fail, so we won't
      // need these things that we can't initialize.
      try {
        mSchema = Schema.parse(schemaStr);
        mDecoderFactory = new DecoderFactory();
        mRecord = new GenericData.Record(mSchema);
        mDatumReader = new GenericDatumReader<GenericData.Record>(mSchema);
      } catch (RuntimeException re) {
        // Couldn't parse schema. Ok, we'll get this in the validate() method.
      }
    }
  }

  /** {@inheritDoc} */
  @Override
  public void reset(Event e) {
    mEvent = e;
    mIsDecoded = false;
  }

  /** {@inheritDoc} */
  @Override
  public Object getColumn(int colIdx, Type expectedType)
      throws IOException {

    if (!mIsDecoded) {
      // Now that we actually want a record value, decode the input bytes.
      mDecoder = mDecoderFactory.createBinaryDecoder(mEvent.getBody(), mDecoder);
      mRecord = mDatumReader.read(mRecord, mDecoder);
      mIsDecoded = true;
    }

    return avroToNative(mRecord.get(colIdx), expectedType);
  }

  @Override
  public boolean validate(StreamSymbol streamSym) {
    // Check that we have an incoming schema.
    String schemaStr = mParams.get(SCHEMA_PARAM);
    if (null == schemaStr) {
      LOG.error("The EventParser for this stream requires the '"
          + SCHEMA_PARAM + "' property to be set. Try recreating the stream as: "
          + "CREATE STREAM .. EVENT FORMAT 'avro' PROPERTIES ('" + SCHEMA_PARAM
          + "' = ...)");
      return false;
    } else {
      try {
        Schema.parse(schemaStr);
      } catch (RuntimeException re) {
        LOG.error("Couldn't parse specified schema for the stream: " + re);
        return false;
      }

      // Given a schema -- does it match the expected column types?
      // TODO -- can we induce the field defs from the schema?
      // we should be able to say something like: CREATE STREAM foo (auto) FROM SCHEMA '....'
      // (Currently no, since PRECISE values are held as strings in avro.)
      List<Schema.Field> schemaFields = null;
      try {
        schemaFields = mSchema.getFields();
      } catch (AvroRuntimeException are) {
        // This wasn't a record schema, it was a single field or something.
        LOG.error("Schemas for events must be of record type. Each column must "
            + "represent a field of the same name.");
        return false;
      }
      List<TypedField> columnFields = streamSym.getFields();

      for (int i = 0; i < columnFields.size(); i++) {
        TypedField col = columnFields.get(i);
        Type colType = col.getType();
       
        // Get the schema field that matches this column name.
        Schema.Field schemaField = null;
        for (Schema.Field testSchemaField : schemaFields) {
          if (testSchemaField.name().equals(col.getUserAlias())) {
            schemaField = testSchemaField;
            break;
          }
        }
       
        if (null == schemaField) {
          // Can't find a field in the schema to match the current column.
          LOG.error("The Avro schema does not contain a field with the same name "
              + "as column '" + col.getUserAlias() + "'.");
          return false;
        }
       
        // Are the schemas compatible?
        if (!schemaField.schema().equals(colType.getAvroSchema())) {
          boolean warned = false;

          if (colType.isNullable() && colType.isPrimitive()) {
            // A common error is that the rtsql type is nullable and the schema
            // type isn't. Give a specific suggestion in this case.
            Type nonNullVersion = Type.getPrimitive(colType.getPrimitiveTypeName());
            if (schemaField.schema().equals(nonNullVersion.getAvroSchema())) {
              LOG.error("Column " + col.getUserAlias() + " has type " + colType
                  + ", but requires type " + nonNullVersion + " to match the Avro schema.");
              warned = true;
            }
          }

          if (!warned) {
            // Give a generic error message.
            LOG.error("Column " + col.getUserAlias() + " has type " + colType
                + " with avro schema " + colType.getAvroSchema()
                + " but the stream schema field " + schemaField.name() + " has "
                + " an incompatible Avro schema: " + schemaField.schema());
          }
          return false;
        }
      }
    }

    // Looks good!
    return true;
  }

  /**
   * @return a Java object in the native Java type that FlumeBase expects to
   * work with.
   * @param in the object returned from the Avro generic record.
   * @param expectedType the expected FlumeBase type of the returned object.
   * @see AvroOutputElementImpl.nativeToAvro()
   */
  private Object avroToNative(Object in, Type expectedType) {
    if (null == in) {
      return null;
    } else if (expectedType.getPrimitiveTypeName().equals(Type.TypeName.PRECISE)) {
      // Convert string input to precise type.
      PreciseType preciseType = PreciseType.toPreciseType(expectedType);
      return preciseType.parseStringInput((String) in);
    } else {
      return in; // All other types map to themselves.
    }
  }
}
TOP

Related Classes of com.odiago.flumebase.io.AvroEventParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.