Package org.kitesdk.data.spi

Source Code of org.kitesdk.data.spi.ColumnMappingParser

/**
* Copyright 2014 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.data.spi;

import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.fasterxml.jackson.databind.node.TextNode;
import com.google.common.base.Splitter;
import com.google.common.collect.Maps;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.util.Iterator;
import java.util.Map;
import org.apache.avro.Schema;
import org.kitesdk.data.ColumnMapping;
import org.kitesdk.data.DatasetIOException;
import org.kitesdk.data.FieldMapping;
import org.kitesdk.data.ValidationException;

/**
* Parser for {@link ColumnMapping}. Will parse the mapping annotation from
* Avro schemas, and will parse the ColumnMapping JSON format. An
* example of that is:
*
* <pre>
* [
*   { "source": "field1", "type": "column", "value": "cf:field1" },
*   { "source": "field2", "type": "keyAsColumn", "value": "kac:" },
*   { "source": "field3", "type": "occVersion" }
* ]
* </pre>
*
*/
public class ColumnMappingParser {

  // name of the json node when embedded in a schema
  private static final String MAPPING = "mapping";

  // property constants
  private static final String TYPE = "type";
  private static final String SOURCE = "source";
  private static final String FAMILY = "family";
  private static final String QUALIFIER = "qualifier";
  private static final String PREFIX = "prefix";
  private static final String VALUE = "value";

  private static final Splitter VALUE_SPLITTER = Splitter.on(":").limit(2);

  /**
   * Parses the Mapping Descriptor as a JSON string.
   *
   * @param mappingDescriptor
   *          The mapping descriptor as a JSON string
   * @return ColumnMapping
   */
  public static ColumnMapping parse(String mappingDescriptor) {
    return buildColumnMapping(JsonUtil.parse(mappingDescriptor));
  }

  /**
   * Parses the Mapping Descriptor from a File
   *
   * @param file
   *          The File that contains the Mapping Descriptor in JSON format.
   * @return ColumnMapping.
   */
  public static ColumnMapping parse(File file) {
    return buildColumnMapping(JsonUtil.parse(file));
  }

  /**
   * Parses the Mapping Descriptor from an input stream
   *
   * @param in
   *          The input stream that contains the Mapping Descriptor in JSON
   *          format.
   * @return ColumnMapping.
   */
  public static ColumnMapping parse(InputStream in) {
    return buildColumnMapping(JsonUtil.parse(in));
  }

  public static boolean hasEmbeddedColumnMapping(Schema schema) {
    return schema.getJsonProp(MAPPING) != null;
  }

  public static Schema removeEmbeddedMapping(Schema schema) {
    // TODO: avoid embedding mappings in the schema
    // Avro considers Props read-only and uses an older Jackson version
    // Parse the Schema as a String because Avro uses com.codehaus.jackson
    ObjectNode schemaJson = JsonUtil.parse(schema.toString(), ObjectNode.class);
    schemaJson.remove(MAPPING);
    return new Schema.Parser().parse(schemaJson.toString());
  }

  public static ColumnMapping parseFromSchema(Schema schema) {
    // parse the String because Avro uses com.codehaus.jackson
    return parse(schema.getJsonProp(MAPPING).toString());
  }

  public static boolean hasEmbeddedFieldMappings(Schema schema) {
    if (Schema.Type.RECORD == schema.getType()) {
      for (Schema.Field field : schema.getFields()) {
        if (field.getJsonProp(MAPPING) != null) {
          return true;
        }
      }
    }
    return false;
  }

  public static ColumnMapping parseFromSchemaFields(Schema schema) {
    if (Schema.Type.RECORD == schema.getType()) {
      ColumnMapping.Builder builder = new ColumnMapping.Builder();
      for (Schema.Field field : schema.getFields()) {
        if (field.getJsonProp(MAPPING) != null) {
          // parse the String because Avro uses com.codehaus.jackson
          builder.fieldMapping(parseFieldMapping(field.name(),
              JsonUtil.parse(field.getJsonProp(MAPPING).toString())));
        }
      }
      return builder.build();
    }
    throw new IllegalArgumentException(
        "Cannot parse field-level mappings from non-Record");
  }

  public static Schema embedColumnMapping(Schema schema, ColumnMapping mapping) {
    // TODO: avoid embedding mappings in the schema
    // Avro considers Props read-only and uses an older Jackson version
    // Parse the Schema as a String because Avro uses com.codehaus.jackson
    ObjectNode schemaJson = JsonUtil.parse(schema.toString(), ObjectNode.class);
    schemaJson.set(MAPPING, toJson(mapping));
    return new Schema.Parser().parse(schemaJson.toString());
  }

  public static Map<Integer, FieldMapping> parseKeyMappingsFromSchemaFields(
      Schema schema) {
    Map<Integer, FieldMapping> keyMappings = Maps.newHashMap();
    if (Schema.Type.RECORD == schema.getType()) {
      for (Schema.Field field : schema.getFields()) {
        if (field.getJsonProp(MAPPING) != null) {
          // parse the String because Avro uses com.codehaus.jackson
          JsonNode mappingNode = JsonUtil.parse(
              field.getJsonProp(MAPPING).toString());
          FieldMapping fm = parseFieldMapping(field.name(), mappingNode);
          if (FieldMapping.MappingType.KEY == fm.getMappingType() &&
              mappingNode.has(VALUE)) {
            Integer index = mappingNode.get(VALUE).asInt();
            keyMappings.put(index, fm);
          }
        }
      }
      return keyMappings;
    }
    throw new IllegalArgumentException(
        "Cannot parse field-level mappings from non-Record");
  }

  /**
   * Parses the FieldMapping from an annotated schema field.
   *
   * @param mappingNode
   *          The value of the "mapping" node
   * @return FieldMapping
   */
  public static FieldMapping parseFieldMapping(JsonNode mappingNode) {
    ValidationException.check(mappingNode.isObject(),
        "A column mapping must be a JSON record");

    ValidationException.check(mappingNode.has(SOURCE),
        "Partitioners must have a %s.", SOURCE);
    String source = mappingNode.get("source").asText();

    return parseFieldMapping(source, mappingNode);
  }

  /**
   * Parses the FieldMapping from an annotated schema field.
   *
   * @param source
   *          The source field name for this mapping
   * @param mappingNode
   *          The value of the "mapping" node
   * @return FieldMapping
   */
  public static FieldMapping parseFieldMapping(String source, JsonNode mappingNode) {
    ValidationException.check(mappingNode.isObject(),
        "A column mapping must be a JSON record");

    ValidationException.check(mappingNode.has(TYPE),
        "Column mappings must have a %s.", TYPE);
    String type = mappingNode.get(TYPE).asText();

    // return easy cases
    if ("occVersion".equals(type)) {
      return FieldMapping.version(source);
    } else if ("key".equals(type)) {
      return FieldMapping.key(source);
    }

    String family = null;
    String qualifier = null;
    String prefix = null;

    // for backward-compatibility, check for "value": "fam:qual"
    if (mappingNode.has(VALUE)) {
      // avoids String#split because of odd cases, like ":".split(":")
      String value = mappingNode.get(VALUE).asText();
      Iterator<String> values = VALUE_SPLITTER.split(value).iterator();
      if (values.hasNext()) {
        family = values.next();
      }
      if (values.hasNext()) {
        if ("keyAsColumn".equals(type)) {
          prefix = values.next();
          if (prefix.isEmpty()) {
            prefix = null;
          }
        } else {
          qualifier = values.next();
        }
      }
    }

    // replace any existing values with explicit family and qualifier
    if (mappingNode.has(FAMILY)) {
      family = mappingNode.get(FAMILY).textValue();
    }
    if (mappingNode.has(QUALIFIER)) {
      qualifier = mappingNode.get(QUALIFIER).textValue();
    }

    if ("column".equals(type)) {
      ValidationException.check(family != null && !family.isEmpty(),
          "Column mapping %s must have a %s", source, FAMILY);
      ValidationException.check(qualifier != null && !qualifier.isEmpty(),
          "Column mapping %s must have a %s", source, QUALIFIER);
      return FieldMapping.column(source, family, qualifier);

    } else if ("keyAsColumn".equals(type)) {
      ValidationException.check(family != null && !family.isEmpty(),
          "Column mapping %s must have a %s", source, FAMILY);
      ValidationException.check(qualifier == null,
          "Key-as-column mapping %s cannot have a %s", source, QUALIFIER);
      if (mappingNode.has(PREFIX)) {
        prefix = mappingNode.get(PREFIX).asText();
        if (prefix.isEmpty()) {
          prefix = null;
        }
      }
      return FieldMapping.keyAsColumn(source, family, prefix);

    } else if ("counter".equals(type)) {
      ValidationException.check(family != null && !family.isEmpty(),
          "Counter mapping %s must have a %s", source, FAMILY);
      ValidationException.check(qualifier != null && !qualifier.isEmpty(),
          "Counter mapping %s must have a %s", source, QUALIFIER);
      return FieldMapping.counter(source, family, qualifier);

    } else {
      throw new ValidationException("Invalid mapping type: " + type);
    }
  }

  private static ColumnMapping buildColumnMapping(JsonNode node) {
    ValidationException.check(node.isArray(),
        "Must be a JSON array of column mappings");

    ColumnMapping.Builder builder = new ColumnMapping.Builder();
    for (Iterator<JsonNode> it = node.elements(); it.hasNext();) {
      builder.fieldMapping(parseFieldMapping(it.next()));
    }
    return builder.build();
  }

  private static JsonNode toJson(FieldMapping fm) {
    ObjectNode fieldMapping = JsonNodeFactory.instance.objectNode();
    fieldMapping.set(SOURCE, TextNode.valueOf(fm.getFieldName()));
    switch (fm.getMappingType()) {
      case KEY:
        fieldMapping.set(TYPE, TextNode.valueOf("key"));
        break;
      case KEY_AS_COLUMN:
        fieldMapping.set(TYPE, TextNode.valueOf("keyAsColumn"));
        fieldMapping.set(FAMILY, TextNode.valueOf(fm.getFamilyAsString()));
        if (fm.getPrefix() != null) {
          fieldMapping.set(PREFIX, TextNode.valueOf(fm.getPrefix()));
        }
        break;
      case COLUMN:
        fieldMapping.set(TYPE, TextNode.valueOf("column"));
        fieldMapping.set(FAMILY, TextNode.valueOf(fm.getFamilyAsString()));
        fieldMapping.set(QUALIFIER, TextNode.valueOf(fm.getQualifierAsString()));
        break;
      case COUNTER:
        fieldMapping.set(TYPE, TextNode.valueOf("counter"));
        fieldMapping.set(FAMILY, TextNode.valueOf(fm.getFamilyAsString()));
        fieldMapping.set(QUALIFIER, TextNode.valueOf(fm.getQualifierAsString()));
        break;
      case OCC_VERSION:
        fieldMapping.set(TYPE, TextNode.valueOf("occVersion"));
        break;
      default:
        throw new ValidationException(
            "Unknown mapping type: " + fm.getMappingType());
    }
    return fieldMapping;
  }

  public static String toString(FieldMapping mapping) {
    StringWriter writer = new StringWriter();
    JsonGenerator gen;
    try {
      gen = new JsonFactory().createGenerator(writer);
      gen.setCodec(new ObjectMapper());
      gen.writeTree(toJson(mapping));
      gen.close();
    } catch (IOException e) {
      throw new DatasetIOException("Cannot write to JSON generator", e);
    }
    return writer.toString();
  }

  private static JsonNode toJson(ColumnMapping mapping) {
    ArrayNode mappingJson = JsonNodeFactory.instance.arrayNode();
    for (FieldMapping fm : mapping.getFieldMappings()) {
      mappingJson.add(toJson(fm));
    }
    return mappingJson;
  }

  public static String toString(ColumnMapping mapping, boolean pretty) {
    StringWriter writer = new StringWriter();
    JsonGenerator gen;
    try {
      gen = new JsonFactory().createGenerator(writer);
      if (pretty) {
        gen.useDefaultPrettyPrinter();
      }
      gen.setCodec(new ObjectMapper());
      gen.writeTree(toJson(mapping));
      gen.close();
    } catch (IOException e) {
      throw new DatasetIOException("Cannot write to JSON generator", e);
    }
    return writer.toString();
  }
}
TOP

Related Classes of org.kitesdk.data.spi.ColumnMappingParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.