package com.linkedin.databus.util;
/*
*
* Copyright 2013 LinkedIn Corp. All rights reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
import java.io.StringWriter;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonGenerator;
import org.codehaus.jackson.map.ObjectMapper;
/**
* Generate an Avro schema to describe the fields of a database table.
*/
public class FieldToAvro
{
public String buildAvroSchema(String namespace,
String topRecordAvroName,
String topRecordDatabaseName,
String[][] headers,
TableTypeInfo topRecordTypeInfo)
{
if (namespace == null)
throw new IllegalArgumentException("namespace should not be null.");
if (topRecordAvroName == null)
throw new IllegalArgumentException("topRecordAvroName should not be null.");
if (topRecordDatabaseName == null)
throw new IllegalArgumentException("topRecordDatabaseName should not be null.");
if (topRecordTypeInfo == null)
throw new IllegalArgumentException("topRecordTypeInfo should not be null.");
FieldInfo fieldInfo = new FieldInfo(topRecordDatabaseName, topRecordTypeInfo, -1);
Map<String, Object> field = fieldToAvro(fieldInfo, true);
// Overwrite the name with the nice Java record name
field.put("name", topRecordAvroName);
// Add namespace
field.put("namespace", namespace);
// Add doc and serialize to JSON
try
{
SimpleDateFormat df = new SimpleDateFormat("MMM dd, yyyy hh:mm:ss a zzz");
field.put("doc", "Auto-generated Avro schema for " + topRecordDatabaseName +
". Generated at " + df.format(new Date(System.currentTimeMillis())));
ObjectMapper mapper = new ObjectMapper();
JsonFactory factory = new JsonFactory();
StringWriter writer = new StringWriter();
JsonGenerator jgen = factory.createJsonGenerator(writer);
jgen.useDefaultPrettyPrinter();
mapper.writeValue(jgen, field);
return writer.getBuffer().toString();
}
catch(Exception ex)
{
throw new RuntimeException(ex);
}
}
private Map<String,Object> fieldToAvro(FieldInfo fieldInfo, boolean asSchema)
{
TypeInfo typeInfo = fieldInfo.getFieldTypeInfo();
//System.out.println(fieldInfo.getFieldName() + ":" + typeInfo.getClass().getSimpleName() + " --> " + asSchema);
if (typeInfo instanceof SimpleTypeInfo)
{
return simpleTypeToAvro(fieldInfo, (SimpleTypeInfo) typeInfo);
}
else if (typeInfo instanceof UserTypeInfo) // TableTypeInfo is now a subclass of this
{
return tableOrUserTypeToAvro(fieldInfo, (UserTypeInfo) typeInfo, asSchema);
}
else if (typeInfo instanceof CollectionTypeInfo)
{
return collectionTypeToAvro(fieldInfo, (CollectionTypeInfo)typeInfo);
}
return null;
}
private Map<String,Object> collectionTypeToAvro(FieldInfo fieldInfo, CollectionTypeInfo typeInfo)
{
Map<String,Object> field = new HashMap<String,Object>();
// Field name
String name = SchemaUtils.toCamelCase(fieldInfo.getFieldName());
field.put("name", name);
// Field type
FieldInfo elementFieldInfo = new FieldInfo(typeInfo.getElementTypeInfo().getName(), typeInfo.getElementTypeInfo(), 0);
Map<String, Object> itemsRecordType = fieldToAvro(elementFieldInfo, true);
Map<String, Object> arrayType = new HashMap<String, Object>();
arrayType.put("name", name + "Array");
arrayType.put("type", "array");
arrayType.put("items", itemsRecordType);
List<Object> nullableType = new ArrayList<Object>(); // ["null", { .. arrayType .. }]
nullableType.add("null");
nullableType.add(arrayType);
field.put("type", nullableType);
field.put("default", null);
// Field metadata
String dbFieldName = fieldInfo.getFieldName();
int dbFieldPosition = fieldInfo.getFieldPosition();
String dbFieldType = fieldInfo.getFieldTypeInfo().getName();
String meta = buildMetaString(dbFieldName, dbFieldPosition, dbFieldType, null);
itemsRecordType.put("meta", meta);
return field;
}
private Map<String,Object> tableOrUserTypeToAvro(FieldInfo fieldInfo,
UserTypeInfo typeInfo,
boolean asSchema)
{
Map<String,Object> field = new HashMap<String,Object>();
// Field name
String name = SchemaUtils.toCamelCase(fieldInfo.getFieldName());
field.put("name", name);
// Field type
Map<String,Object> realType = new HashMap<String, Object>();
// check if we are a "top-level" record or not
Map<String,Object> fieldsDest = asSchema ? field : realType;
if (asSchema)
{
// asSchema is true only for the very topmost level of the schema (type = record; should never be null)
// and for the "items" descriptor in collectionTypeToAvro() (aggregate descriptor of sub-fields; latter
// may be null individually, but descriptor presumably never can be). Ergo, "default":null makes sense
// only in the other half of this conditional.
field.put("type", "record");
}
else
{
realType.put("type", "record"); // inner, curly-brace level ("real" structure)
realType.put("name", typeInfo.getName());
List<Object> nullableType = new ArrayList<Object>(); // outer, square-brackets level (solely for nullability)
nullableType.add("null");
nullableType.add(realType);
field.put("type", nullableType);
field.put("default", null); // field default value: only for this level?
}
// Child fields
List<Map<String, Object>> fields = new ArrayList<Map<String, Object>>();
for (FieldInfo childField : typeInfo.getFields())
{
Map<String, Object> childFieldMap = fieldToAvro(childField, false);
fields.add(childFieldMap);
}
fieldsDest.put("fields", fields);
// Field metadata
String dbFieldName = fieldInfo.getFieldName();
int dbFieldPosition = fieldInfo.getFieldPosition();
String dbFieldType = fieldInfo.getFieldTypeInfo().getName();
String pk = typeInfo.getPrimaryKey(); // null unless TableTypeInfo (== top-level table)
String meta = buildMetaString(dbFieldName, dbFieldPosition, dbFieldType, pk);
field.put("meta", meta);
// Return the Map for this field
return field;
}
private Map<String, Object> simpleTypeToAvro(FieldInfo fieldInfo, SimpleTypeInfo typeInfo)
{
Map<String,Object> field = new HashMap<String,Object>();
// Field name
String name = SchemaUtils.toCamelCase(fieldInfo.getFieldName());
field.put("name", name);
// Field default value (for Avro unions, corresponds to _first_ field type in list)
field.put("default", null);
// Field type
String[] type = new String[] { "null", typeInfo.getPrimitiveType().getAvroType() };
field.put("type", type);
// Field metadata
String dbFieldName = fieldInfo.getFieldName();
int dbFieldPosition = fieldInfo.getFieldPosition();
String dbFieldType = fieldInfo.getFieldTypeInfo().getName();
String meta = buildMetaString(dbFieldName, dbFieldPosition, dbFieldType, null);
field.put("meta", meta);
// Return the Map for this field
return field;
}
private String buildMetaString(String dbFieldName, int dbFieldPosition, String dbFieldType, String pk)
{
// Metadata for database field name and position.
// Have to store this as a serialized String, since Avro's "getProp()" method will not return
// a complex object. We still write it in JSON, but it will be escaped and put in a String that
// we have to deserialize later.
StringBuilder meta = new StringBuilder();
if(dbFieldName != null)
{
meta.append("dbFieldName=" + dbFieldName + ";");
}
if(dbFieldPosition != -1)
{
meta.append("dbFieldPosition=" + dbFieldPosition + ";");
}
if (dbFieldType != null)
{
meta.append("dbFieldType=" + dbFieldType + ";");
}
if ((null != pk) && (!pk.isEmpty()))
{
meta.append("pk=" + pk + ";");
}
return meta.toString();
}
}