/**
* Licensed to Odiago, Inc. under one or more contributor license
* agreements. See the NOTICE.txt file distributed with this work for
* additional information regarding copyright ownership. Odiago, Inc.
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package com.odiago.flumebase.io;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import org.apache.avro.AvroRuntimeException;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.io.BinaryDecoder;
import org.apache.avro.io.DecoderFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.cloudera.flume.core.Event;
import com.odiago.flumebase.exec.StreamSymbol;
import com.odiago.flumebase.lang.PreciseType;
import com.odiago.flumebase.lang.Type;
import com.odiago.flumebase.parser.TypedField;
public class AvroEventParser extends EventParser {
private static final Logger LOG = LoggerFactory.getLogger(
AvroEventParser.class.getName());
public static final String SCHEMA_PARAM = "schema";
/** Configuration parameters. */
private Map<String, String> mParams;
/** Current event we're parsing. */
private Event mEvent;
/** Schema for input events */
private Schema mSchema;
/** Current event deserialized into a generic data record */
private GenericData.Record mRecord;
private boolean mIsDecoded; // true if the mEvent is deserialized into mRecord.
// Avro parsing utility objects below.
private DecoderFactory mDecoderFactory;
private BinaryDecoder mDecoder;
private GenericDatumReader<GenericData.Record> mDatumReader;
/**
* Creates a new AvroEventParser, with a string--string parameter map specified
* by the user who created the stream we are parsing.
*/
public AvroEventParser(Map<String, String> params) {
mParams = params;
// Initialize avro decoder.
String schemaStr = mParams.get(SCHEMA_PARAM);
if (null != schemaStr) {
// If schemaStr is null, validate() will fail, so we won't
// need these things that we can't initialize.
try {
mSchema = Schema.parse(schemaStr);
mDecoderFactory = new DecoderFactory();
mRecord = new GenericData.Record(mSchema);
mDatumReader = new GenericDatumReader<GenericData.Record>(mSchema);
} catch (RuntimeException re) {
// Couldn't parse schema. Ok, we'll get this in the validate() method.
}
}
}
/** {@inheritDoc} */
@Override
public void reset(Event e) {
mEvent = e;
mIsDecoded = false;
}
/** {@inheritDoc} */
@Override
public Object getColumn(int colIdx, Type expectedType)
throws IOException {
if (!mIsDecoded) {
// Now that we actually want a record value, decode the input bytes.
mDecoder = mDecoderFactory.createBinaryDecoder(mEvent.getBody(), mDecoder);
mRecord = mDatumReader.read(mRecord, mDecoder);
mIsDecoded = true;
}
return avroToNative(mRecord.get(colIdx), expectedType);
}
@Override
public boolean validate(StreamSymbol streamSym) {
// Check that we have an incoming schema.
String schemaStr = mParams.get(SCHEMA_PARAM);
if (null == schemaStr) {
LOG.error("The EventParser for this stream requires the '"
+ SCHEMA_PARAM + "' property to be set. Try recreating the stream as: "
+ "CREATE STREAM .. EVENT FORMAT 'avro' PROPERTIES ('" + SCHEMA_PARAM
+ "' = ...)");
return false;
} else {
try {
Schema.parse(schemaStr);
} catch (RuntimeException re) {
LOG.error("Couldn't parse specified schema for the stream: " + re);
return false;
}
// Given a schema -- does it match the expected column types?
// TODO -- can we induce the field defs from the schema?
// we should be able to say something like: CREATE STREAM foo (auto) FROM SCHEMA '....'
// (Currently no, since PRECISE values are held as strings in avro.)
List<Schema.Field> schemaFields = null;
try {
schemaFields = mSchema.getFields();
} catch (AvroRuntimeException are) {
// This wasn't a record schema, it was a single field or something.
LOG.error("Schemas for events must be of record type. Each column must "
+ "represent a field of the same name.");
return false;
}
List<TypedField> columnFields = streamSym.getFields();
for (int i = 0; i < columnFields.size(); i++) {
TypedField col = columnFields.get(i);
Type colType = col.getType();
// Get the schema field that matches this column name.
Schema.Field schemaField = null;
for (Schema.Field testSchemaField : schemaFields) {
if (testSchemaField.name().equals(col.getUserAlias())) {
schemaField = testSchemaField;
break;
}
}
if (null == schemaField) {
// Can't find a field in the schema to match the current column.
LOG.error("The Avro schema does not contain a field with the same name "
+ "as column '" + col.getUserAlias() + "'.");
return false;
}
// Are the schemas compatible?
if (!schemaField.schema().equals(colType.getAvroSchema())) {
boolean warned = false;
if (colType.isNullable() && colType.isPrimitive()) {
// A common error is that the rtsql type is nullable and the schema
// type isn't. Give a specific suggestion in this case.
Type nonNullVersion = Type.getPrimitive(colType.getPrimitiveTypeName());
if (schemaField.schema().equals(nonNullVersion.getAvroSchema())) {
LOG.error("Column " + col.getUserAlias() + " has type " + colType
+ ", but requires type " + nonNullVersion + " to match the Avro schema.");
warned = true;
}
}
if (!warned) {
// Give a generic error message.
LOG.error("Column " + col.getUserAlias() + " has type " + colType
+ " with avro schema " + colType.getAvroSchema()
+ " but the stream schema field " + schemaField.name() + " has "
+ " an incompatible Avro schema: " + schemaField.schema());
}
return false;
}
}
}
// Looks good!
return true;
}
/**
* @return a Java object in the native Java type that FlumeBase expects to
* work with.
* @param in the object returned from the Avro generic record.
* @param expectedType the expected FlumeBase type of the returned object.
* @see AvroOutputElementImpl.nativeToAvro()
*/
private Object avroToNative(Object in, Type expectedType) {
if (null == in) {
return null;
} else if (expectedType.getPrimitiveTypeName().equals(Type.TypeName.PRECISE)) {
// Convert string input to precise type.
PreciseType preciseType = PreciseType.toPreciseType(expectedType);
return preciseType.parseStringInput((String) in);
} else {
return in; // All other types map to themselves.
}
}
}