Package org.kitesdk.morphline.protobuf

Source Code of org.kitesdk.morphline.protobuf.ExtractProtobufPathsBuilder

/*
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.morphline.protobuf;

import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;

import org.kitesdk.morphline.api.Command;
import org.kitesdk.morphline.api.CommandBuilder;
import org.kitesdk.morphline.api.MorphlineCompilationException;
import org.kitesdk.morphline.api.MorphlineContext;
import org.kitesdk.morphline.api.MorphlineRuntimeException;
import org.kitesdk.morphline.api.Record;
import org.kitesdk.morphline.base.AbstractCommand;
import org.kitesdk.morphline.base.Configs;
import org.kitesdk.morphline.base.Fields;
import org.kitesdk.morphline.base.Validator;

import com.google.common.base.Preconditions;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ListMultimap;
import com.typesafe.config.Config;

/**
* Command that uses zero or more protocol buffer path expressions to extract
* values from a protocol buffer object.
*
* The protocol buffer input object is expected to be contained in the
* {@link Fields#ATTACHMENT_BODY}
*
* Each expression consists of a record output field name (on the left side of
* the colon ':') as well as zero or more path steps (on the right hand side),
* each path step separated by a '/' slash. Protocol buffer arrays are traversed
* with the '[]' notation.
*
* The result of a path expression is a list of objects, each of which is added
* to the given record output field.
*
* ExtractMethod option says what will results path to object. <li>toByteArray -
* pass protocol buffer bytes to next command <li>toString - pass toString value
* to next command <li>none - pass protocol buffer object to next command
*/
public final class ExtractProtobufPathsBuilder implements CommandBuilder {

  @Override
  public Command build(Config config, Command parent, Command child, MorphlineContext context) {
    return new ExtractProtobufPaths(this, config, parent, child, context);
  }

  @Override
  public Collection<String> getNames() {
    return Collections.singletonList("extractProtobufPaths");
  }

  // /////////////////////////////////////////////////////////////////////////////
  // Nested classes:
  // /////////////////////////////////////////////////////////////////////////////
  private static final class ExtractProtobufPaths extends AbstractCommand {

    private static final String OBJECT_EXTRACT_METHOD = "objectExtractMethod";

    private static final String ENUM_EXTRACT_METHOD = "enumExtractMethod";

    private static final String LIST = "List";
    private static final String GET = "get";
    private static final String HAS = "has";

    private static final String ARRAY_TOKEN = "[]";
    private static final Set<Class<?>> WRAPPERS;
    static {
      Set<Class<?>> ret = new HashSet<Class<?>>();
      ret.add(Boolean.class);
      ret.add(Character.class);
      ret.add(Byte.class);
      ret.add(Short.class);
      ret.add(Integer.class);
      ret.add(Long.class);
      ret.add(Float.class);
      ret.add(Double.class);
      ret.add(String.class);
      ret.add(byte[].class);
      WRAPPERS = Collections.<Class<?>> unmodifiableSet(ret);
    }
    private final ObjectExtractMethods objectExtractMethod;
    private final EnumExtractMethods enumExtractMethod;

    private final Map<String, Collection<String>> stepMap;
    private final Map<Class<?>, Method> objectExtractMethods = new HashMap<Class<?>, Method>();
    private final Map<Class<?>, Method> enumExtractMethods = new HashMap<Class<?>, Method>();
    private final Map<Class<?>, Map<String, Method>> propertyGetters = new HashMap<Class<?>, Map<String, Method>>();
    private final Map<Class<?>, Map<String, Method>> propertyCheckers = new HashMap<Class<?>, Map<String, Method>>();

    public ExtractProtobufPaths(CommandBuilder builder, Config config, Command parent, Command child,
        MorphlineContext context) {

      super(builder, config, parent, child, context);
      ListMultimap<String, String> stepMultiMap = ArrayListMultimap.create();

      this.objectExtractMethod = new Validator<ObjectExtractMethods>().validateEnum(config,
          getConfigs().getString(config, OBJECT_EXTRACT_METHOD, ObjectExtractMethods.toByteArray.name()),
          ObjectExtractMethods.class);
      this.enumExtractMethod = new Validator<EnumExtractMethods>()
          .validateEnum(config, getConfigs().getString(config, ENUM_EXTRACT_METHOD, EnumExtractMethods.name.name()),
              EnumExtractMethods.class);

      Config paths = getConfigs().getConfig(config, "paths");
      for (Map.Entry<String, Object> entry : new Configs().getEntrySet(paths)) {
        String fieldName = entry.getKey();
        String path = entry.getValue().toString().trim();
        if (path.contains("//")) {
          throw new MorphlineCompilationException("No support for descendant axis available yet", config);
        }
        if (path.startsWith("/")) {
          path = path.substring(1);
        }
        if (path.endsWith("/")) {
          path = path.substring(0, path.length() - 1);
        }
        path = path.trim();
        for (String step : path.split("/")) {
          step = step.trim();
          if (step.length() > ARRAY_TOKEN.length() && step.endsWith(ARRAY_TOKEN)) {
            step = step.substring(0, step.length() - ARRAY_TOKEN.length());
            stepMultiMap.put(fieldName, normalize(step));
            stepMultiMap.put(fieldName, ARRAY_TOKEN);
          } else {
            stepMultiMap.put(fieldName, normalize(step));
          }
        }
      }
      this.stepMap = stepMultiMap.asMap();
      LOG.debug("stepMap: {}", stepMap);
      validateArguments();
    }

    @Override
    protected boolean doProcess(Record inputRecord) {
      Object datum = inputRecord.getFirstValue(Fields.ATTACHMENT_BODY);
      Preconditions.checkNotNull(datum);
      Record outputRecord = inputRecord.copy();

      for (Map.Entry<String, Collection<String>> entry : stepMap.entrySet()) {
        String fieldName = entry.getKey();
        List<String> steps = (List<String>) entry.getValue();
        try {
          extractPath(datum, fieldName, steps, outputRecord, 0);
        } catch (Exception e) {
          LOG.error(e.getMessage(), e);
          return false;
        }
      }

      // pass record to next command in chain:
      return getChild().process(outputRecord);
    }

    private void extractPath(Object datum, String fieldName, List<String> steps, Record record, int level)
        throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException,
        InvocationTargetException {
      if (level >= steps.size()) {
        return;
      }
      boolean isLeaf = (level + 1 == steps.size());
      String step = steps.get(level);
      if (ARRAY_TOKEN == step) {
        if (!List.class.isAssignableFrom(datum.getClass())) {
          throw new MorphlineRuntimeException("Datum " + datum + " is not a list. Steps: " + steps + " Level: " + level);
        }
        if (isLeaf) {
          resolve(datum, record, fieldName);
        } else {
          Iterator<?> iter = ((List<?>) datum).iterator();
          while (iter.hasNext()) {
            extractPath(iter.next(), fieldName, steps, record, level + 1);
          }
        }
      } else if (hasProperty(datum, step)) {
        Object value = readProperty(datum, step);
        if (value != null) {
          if (isLeaf) {
            resolve(value, record, fieldName);
          } else {
            extractPath(value, fieldName, steps, record, level + 1);
          }
        }
      }
    }

    private Object extractValue(Object datum, Class<?> clazz) throws IllegalAccessException, IllegalArgumentException,
        InvocationTargetException, NoSuchMethodException, SecurityException {

      boolean isEnum = clazz.isEnum();

      if (isEnum && enumExtractMethod == EnumExtractMethods.none) {
        return datum;
      } else if (objectExtractMethod == ObjectExtractMethods.none) {
        return datum;
      }

      Map<Class<?>, Method> extractMethods;
      String methodName;
      if (isEnum) {
        extractMethods = enumExtractMethods;
        methodName = enumExtractMethod.name();
      } else {
        extractMethods = objectExtractMethods;
        methodName = objectExtractMethod.name();
      }

      Method extractMethod = extractMethods.get(clazz);
      if (extractMethod == null) {
        extractMethod = clazz.getMethod(methodName);
        extractMethods.put(clazz, extractMethod);
      }
      return extractMethod.invoke(datum);
    }

    private void findGetAndCheckMethods(String propertyName, Class<?> clazz) {
      Map<String, Method> getters = propertyGetters.get(clazz);
      Map<String, Method> checkers = propertyCheckers.get(clazz);
      if (getters == null) {
        getters = new HashMap<String, Method>();
        checkers = new HashMap<String, Method>();
        propertyGetters.put(clazz, getters);
        propertyCheckers.put(clazz, checkers);
      }
      if (!getters.containsKey(propertyName)) {
        String uppercaseProperty = new StringBuilder(propertyName.substring(0, 1).toUpperCase(Locale.ROOT)).append(
            propertyName.substring(1)).toString();

        String checkerName = new StringBuilder(HAS).append(uppercaseProperty).toString();
        try {
          checkers.put(propertyName, clazz.getMethod(checkerName));
        } catch (Exception e) {
          checkers.put(propertyName, null);
        }

        StringBuilder getterName = new StringBuilder(GET).append(uppercaseProperty);
        try {
          getters.put(propertyName, clazz.getMethod(getterName.toString()));
        } catch (NoSuchMethodException e) {
          getterName.append(LIST);
          try {
            getters.put(propertyName, clazz.getMethod(getterName.toString()));
          } catch (Exception e1) {
            throw new MorphlineRuntimeException("Property '" + propertyName + "' does not exist in class '"
                + clazz.getName() + "'.");
          }
        }
      }
    }

    private Method getCheckMethod(String propertyName, Class<?> clazz) {
      if (!propertyCheckers.containsKey(clazz) || !propertyCheckers.get(clazz).containsKey(propertyName)) {
        findGetAndCheckMethods(propertyName, clazz);
      }
      return propertyCheckers.get(clazz).get(propertyName);
    }

    private Method getReadMethod(String propertyName, Class<?> clazz) {
      if (!propertyGetters.containsKey(clazz) || !propertyGetters.get(clazz).containsKey(propertyName)) {
        findGetAndCheckMethods(propertyName, clazz);
      }
      return propertyGetters.get(clazz).get(propertyName);
    }

    private boolean hasProperty(Object datum, String propertyName) throws IllegalAccessException,
        IllegalArgumentException, InvocationTargetException {
      Method checker = getCheckMethod(propertyName, datum.getClass());
      return checker == null || Boolean.TRUE.equals(checker.invoke(datum));
    }

    private boolean isCommonType(Class<?> clazz) {
      return clazz.isPrimitive() || isWrapper(clazz);
    }

    private boolean isWrapper(Class<?> clazz) {
      return WRAPPERS.contains(clazz);
    }

    private String normalize(String step) { // for faster subsequent query
                                            // performance
      return ARRAY_TOKEN.equals(step) ? ARRAY_TOKEN : step;
    }

    private Object readProperty(Object datum, String propertyName) throws IllegalAccessException,
        IllegalArgumentException, InvocationTargetException {
      Method getter = getReadMethod(propertyName, datum.getClass());
      return getter.invoke(datum);
    }

    private void resolve(Object datum, Record record, String fieldName) throws NoSuchMethodException,
        SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException {
      if (datum == null) {
        return;
      }

      Class<?> clazz = datum.getClass();
      if (isCommonType(clazz)) {
        record.put(fieldName, datum);
      } else if (List.class.isAssignableFrom(clazz)) {
        for (Object o : (List<?>) datum) {
          resolve(o, record, fieldName);
        }
      } else {
        Object extracted = extractValue(datum, clazz);
        record.put(fieldName, extracted);
      }
    }

    private static enum EnumExtractMethods {
      name, toString, none
    }

    private static enum ObjectExtractMethods {
      toByteArray, toString, none
    }
  }

}
TOP

Related Classes of org.kitesdk.morphline.protobuf.ExtractProtobufPathsBuilder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.