Package com.linkedin.databus.util

Source Code of com.linkedin.databus.util.FieldToAvro

package com.linkedin.databus.util;
/*
*
* Copyright 2013 LinkedIn Corp. All rights reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/


import java.io.StringWriter;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonGenerator;
import org.codehaus.jackson.map.ObjectMapper;

/**
* Generate an Avro schema to describe the fields of a database table.
*/
public class FieldToAvro
{

  public String buildAvroSchema(String namespace,
                                String topRecordAvroName,
                                String topRecordDatabaseName,
                                String[][] headers,
                                TableTypeInfo topRecordTypeInfo)
  {
    if (namespace == null)
      throw new IllegalArgumentException("namespace should not be null.");
    if (topRecordAvroName == null)
      throw new IllegalArgumentException("topRecordAvroName should not be null.");
    if (topRecordDatabaseName == null)
      throw new IllegalArgumentException("topRecordDatabaseName should not be null.");
    if (topRecordTypeInfo == null)
      throw new IllegalArgumentException("topRecordTypeInfo should not be null.");

    FieldInfo fieldInfo = new FieldInfo(topRecordDatabaseName, topRecordTypeInfo, -1);
    Map<String, Object> field = fieldToAvro(fieldInfo, true);

    // Overwrite the name with the nice Java record name
    field.put("name", topRecordAvroName);

    // Add namespace
    field.put("namespace", namespace);

    // Add doc and serialize to JSON
    try
    {
      SimpleDateFormat df = new SimpleDateFormat("MMM dd, yyyy hh:mm:ss a zzz");
      field.put("doc", "Auto-generated Avro schema for " + topRecordDatabaseName +
                       ". Generated at " + df.format(new Date(System.currentTimeMillis())));

      ObjectMapper mapper = new ObjectMapper();
      JsonFactory factory = new JsonFactory();
      StringWriter writer = new StringWriter();
      JsonGenerator jgen = factory.createJsonGenerator(writer);
      jgen.useDefaultPrettyPrinter();
      mapper.writeValue(jgen, field);
      return writer.getBuffer().toString();
    }
    catch(Exception ex)
    {
      throw new RuntimeException(ex);
    }
  }

  private Map<String,Object> fieldToAvro(FieldInfo fieldInfo, boolean asSchema)
  {
    TypeInfo typeInfo = fieldInfo.getFieldTypeInfo();
    //System.out.println(fieldInfo.getFieldName() + ":" + typeInfo.getClass().getSimpleName() + " --> " + asSchema);

    if (typeInfo instanceof SimpleTypeInfo)
    {
      return simpleTypeToAvro(fieldInfo, (SimpleTypeInfo) typeInfo);
    }
    else if (typeInfo instanceof UserTypeInfo// TableTypeInfo is now a subclass of this
    {
      return tableOrUserTypeToAvro(fieldInfo, (UserTypeInfo) typeInfo, asSchema);
    }
    else if (typeInfo instanceof CollectionTypeInfo)
    {
      return collectionTypeToAvro(fieldInfo, (CollectionTypeInfo)typeInfo);
    }
    return null;
  }

  private Map<String,Object> collectionTypeToAvro(FieldInfo fieldInfo, CollectionTypeInfo typeInfo)
  {
    Map<String,Object> field = new HashMap<String,Object>();

    // Field name
    String name = SchemaUtils.toCamelCase(fieldInfo.getFieldName());
    field.put("name", name);

    // Field type
    FieldInfo elementFieldInfo = new FieldInfo(typeInfo.getElementTypeInfo().getName(), typeInfo.getElementTypeInfo(), 0);
    Map<String, Object> itemsRecordType = fieldToAvro(elementFieldInfo, true);

    Map<String, Object> arrayType = new HashMap<String, Object>();
    arrayType.put("name", name + "Array");
    arrayType.put("type", "array");
    arrayType.put("items", itemsRecordType);

    List<Object> nullableType = new ArrayList<Object>()// ["null", { .. arrayType .. }]
    nullableType.add("null");
    nullableType.add(arrayType);

    field.put("type", nullableType);
    field.put("default", null);

    // Field metadata
    String dbFieldName = fieldInfo.getFieldName();
    int dbFieldPosition = fieldInfo.getFieldPosition();
    String dbFieldType  = fieldInfo.getFieldTypeInfo().getName();
    String meta = buildMetaString(dbFieldName, dbFieldPosition, dbFieldType, null);
    itemsRecordType.put("meta", meta);

    return field;
  }

  private Map<String,Object> tableOrUserTypeToAvro(FieldInfo fieldInfo,
                                                   UserTypeInfo typeInfo,
                                                   boolean asSchema)
  {
    Map<String,Object> field = new HashMap<String,Object>();

    // Field name
    String name = SchemaUtils.toCamelCase(fieldInfo.getFieldName());
    field.put("name", name);

    // Field type
    Map<String,Object> realType = new HashMap<String, Object>();
    // check if we are a "top-level" record or not
    Map<String,Object> fieldsDest = asSchema ? field : realType;
    if (asSchema)
    {
      // asSchema is true only for the very topmost level of the schema (type = record; should never be null)
      // and for the "items" descriptor in collectionTypeToAvro() (aggregate descriptor of sub-fields; latter
      // may be null individually, but descriptor presumably never can be).  Ergo, "default":null makes sense
      // only in the other half of this conditional.
      field.put("type", "record");
    }
    else
    {
      realType.put("type", "record");                       // inner, curly-brace level ("real" structure)
      realType.put("name", typeInfo.getName());
      List<Object> nullableType = new ArrayList<Object>()// outer, square-brackets level (solely for nullability)
      nullableType.add("null");
      nullableType.add(realType);
      field.put("type", nullableType);
      field.put("default", null); // field default value:  only for this level?
    }

    // Child fields
    List<Map<String, Object>> fields = new ArrayList<Map<String, Object>>();
    for (FieldInfo childField : typeInfo.getFields())
    {
      Map<String, Object> childFieldMap = fieldToAvro(childField, false);
      fields.add(childFieldMap);
    }
    fieldsDest.put("fields", fields);

    // Field metadata
    String dbFieldName = fieldInfo.getFieldName();
    int dbFieldPosition = fieldInfo.getFieldPosition();
    String dbFieldType = fieldInfo.getFieldTypeInfo().getName();
    String pk = typeInfo.getPrimaryKey()// null unless TableTypeInfo (== top-level table)
    String meta = buildMetaString(dbFieldName, dbFieldPosition, dbFieldType, pk);
    field.put("meta", meta);

    // Return the Map for this field
    return field;
  }

  private Map<String, Object> simpleTypeToAvro(FieldInfo fieldInfo, SimpleTypeInfo typeInfo)
  {
    Map<String,Object> field = new HashMap<String,Object>();

    // Field name
    String name = SchemaUtils.toCamelCase(fieldInfo.getFieldName());
    field.put("name", name);

    // Field default value (for Avro unions, corresponds to _first_ field type in list)
    field.put("default", null);

    // Field type
    String[] type = new String[] { "null", typeInfo.getPrimitiveType().getAvroType() };
    field.put("type", type);

    // Field metadata
    String dbFieldName = fieldInfo.getFieldName();
    int dbFieldPosition = fieldInfo.getFieldPosition();
    String dbFieldType = fieldInfo.getFieldTypeInfo().getName();

    String meta = buildMetaString(dbFieldName, dbFieldPosition, dbFieldType, null);
    field.put("meta", meta);

    // Return the Map for this field
    return field;
  }

  private String buildMetaString(String dbFieldName, int dbFieldPosition, String dbFieldType, String pk)
  {
    // Metadata for database field name and position.
    // Have to store this as a serialized String, since Avro's "getProp()" method will not return
    // a complex object. We still write it in JSON, but it will be escaped and put in a String that
    // we have to deserialize later.
    StringBuilder meta = new StringBuilder();
    if(dbFieldName != null)
    {
      meta.append("dbFieldName=" + dbFieldName + ";");
    }

    if(dbFieldPosition != -1)
    {
      meta.append("dbFieldPosition=" + dbFieldPosition + ";");
    }

    if (dbFieldType != null)
    {
      meta.append("dbFieldType=" + dbFieldType + ";");
    }

    if ((null != pk) && (!pk.isEmpty()))
    {
      meta.append("pk=" + pk + ";");
    }

    return meta.toString();
  }

}
TOP

Related Classes of com.linkedin.databus.util.FieldToAvro

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.