Package org.apache.hadoop.hive.ql.exec

Source Code of org.apache.hadoop.hive.ql.exec.UnionOperator

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hive.ql.exec;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.UnionDesc;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ReturnObjectInspectorResolver;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;

/**
* Union Operator Just forwards. Doesn't do anything itself.
**/
public class UnionOperator extends Operator<UnionDesc> implements Serializable {
  private static final long serialVersionUID = 1L;

  StructObjectInspector[] parentObjInspectors;
  List<? extends StructField>[] parentFields;

  ReturnObjectInspectorResolver[] columnTypeResolvers;
  boolean[] needsTransform;

  ArrayList<Object> outputRow;

  /**
   * UnionOperator will transform the input rows if the inputObjInspectors from
   * different parents are different. If one parent has exactly the same
   * ObjectInspector as the output ObjectInspector, then we don't need to do
   * transformation for that parent. This information is recorded in
   * needsTransform[].
   */
  @Override
  protected void initializeOp(Configuration hconf) throws HiveException {

    int parents = parentOperators.size();
    parentObjInspectors = new StructObjectInspector[parents];
    parentFields = new List[parents];
    for (int p = 0; p < parents; p++) {
      parentObjInspectors[p] = (StructObjectInspector) inputObjInspectors[p];
      parentFields[p] = parentObjInspectors[p].getAllStructFieldRefs();
    }

    // Get columnNames from the first parent
    int columns = parentFields[0].size();
    ArrayList<String> columnNames = new ArrayList<String>(columns);
    for (int c = 0; c < columns; c++) {
      columnNames.add(parentFields[0].get(c).getFieldName());
    }

    // Get outputFieldOIs
    columnTypeResolvers = new ReturnObjectInspectorResolver[columns];
    for (int c = 0; c < columns; c++) {
      columnTypeResolvers[c] = new ReturnObjectInspectorResolver(true);
    }

    for (int p = 0; p < parents; p++) {
      assert (parentFields[p].size() == columns);
      for (int c = 0; c < columns; c++) {
        if (!columnTypeResolvers[c].updateForUnionAll(parentFields[p].get(c)
            .getFieldObjectInspector())) {
          // checked in SemanticAnalyzer. Should not happen
          throw new HiveException("Incompatible types for union operator");
        }
      }
    }

    ArrayList<ObjectInspector> outputFieldOIs = new ArrayList<ObjectInspector>(
        columns);
    for (int c = 0; c < columns; c++) {
      // can be null for void type
      ObjectInspector oi = columnTypeResolvers[c].get();
      outputFieldOIs.add(oi == null ? parentFields[0].get(c).getFieldObjectInspector() : oi);
    }

    // create output row ObjectInspector
    outputObjInspector = ObjectInspectorFactory
        .getStandardStructObjectInspector(columnNames, outputFieldOIs);
    outputRow = new ArrayList<Object>(columns);
    for (int c = 0; c < columns; c++) {
      outputRow.add(null);
    }

    // whether we need to do transformation for each parent
    needsTransform = new boolean[parents];
    for (int p = 0; p < parents; p++) {
      // Testing using != is good enough, because we use ObjectInspectorFactory
      // to
      // create ObjectInspectors.
      needsTransform[p] = (inputObjInspectors[p] != outputObjInspector);
      if (needsTransform[p]) {
        LOG.info("Union Operator needs to transform row from parent[" + p
            + "] from " + inputObjInspectors[p] + " to " + outputObjInspector);
      }
    }
    initializeChildren(hconf);
  }

  @Override
  public synchronized void processOp(Object row, int tag) throws HiveException {

    StructObjectInspector soi = parentObjInspectors[tag];
    List<? extends StructField> fields = parentFields[tag];

    if (needsTransform[tag]) {
      for (int c = 0; c < fields.size(); c++) {
        outputRow.set(c, columnTypeResolvers[c].convertIfNecessary(soi
            .getStructFieldData(row, fields.get(c)), fields.get(c)
            .getFieldObjectInspector()));
      }
      forward(outputRow, outputObjInspector);
    } else {
      forward(row, inputObjInspectors[tag]);
    }
  }

  /**
   * @return the name of the operator
   */
  @Override
  public String getName() {
    return getOperatorName();
  }

  static public String getOperatorName() {
    return "UNION";
  }

  @Override
  public OperatorType getType() {
    return OperatorType.UNION;
  }

  /**
   * Union operators are not allowed either before or after a explicit mapjoin hint.
   * Note that, the same query would just work without the mapjoin hint (by setting
   * hive.auto.convert.join to true).
   **/
  @Override
  public boolean opAllowedBeforeMapJoin() {
    return false;
  }

  @Override
  public boolean opAllowedAfterMapJoin() {
    return false;
  }

  @Override
  public boolean opAllowedBeforeSortMergeJoin() {
    // If a union occurs before the sort-merge join, it is not useful to convert the the
    // sort-merge join to a mapjoin. The number of inputs for the union is more than 1 so
    // it would be difficult to figure out the big table for the mapjoin.
    return false;
  }
}
TOP

Related Classes of org.apache.hadoop.hive.ql.exec.UnionOperator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.