Package org.apache.hadoop.hive.ql.exec.persistence

Source Code of org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hive.ql.exec.persistence;

import java.io.IOException;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;

import org.apache.hadoop.hive.common.type.Decimal128;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapper;
import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapperBatch;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.ByteStream.Output;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.StringWrapper;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;

import org.apache.hadoop.io.Writable;

/**
* The base class for MapJoinKey; also acts as a factory for creating and reading
* the keys, choosing whether size-optimized byte-array based MJKBytes can be used.
*/
public abstract class MapJoinKey {
  private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];

  @SuppressWarnings("deprecation")
  public static MapJoinKey read(Output output, MapJoinKey key, MapJoinObjectSerDeContext context,
      Writable writable, boolean mayReuseKey) throws SerDeException {
    SerDe serde = context.getSerDe();
    Object obj = serde.deserialize(writable);
    boolean useOptimized = useOptimizedKeyBasedOnPrev(key);
    if (useOptimized || key == null) {
      byte[] structBytes = serializeKey(output, obj, serde.getObjectInspector(), !useOptimized);
      if (structBytes != null) {
        return MapJoinKeyBytes.fromBytes(key, mayReuseKey, structBytes);
      } else if (useOptimized) {
        throw new SerDeException(
            "Failed to serialize " + obj + " even though optimized keys are used");
      }
    }
    MapJoinKeyObject result = mayReuseKey ? (MapJoinKeyObject)key : new MapJoinKeyObject();
    result.read(serde.getObjectInspector(), obj);
    return result;
  }

  private static final HashSet<PrimitiveCategory> SUPPORTED_PRIMITIVES
      = new HashSet<PrimitiveCategory>();
  static {
    // All but decimal.
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.BOOLEAN);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.VOID);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.BOOLEAN);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.BYTE);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.SHORT);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.INT);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.LONG);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.FLOAT);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.DOUBLE);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.STRING);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.DATE);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.TIMESTAMP);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.BINARY);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.VARCHAR);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.CHAR);
  }

  private static byte[] serializeKey(Output byteStream,
      Object obj, ObjectInspector oi, boolean checkTypes) throws SerDeException {
    if (null == obj || !(oi instanceof StructObjectInspector)) {
      return null; // not supported
    }
    StructObjectInspector soi = (StructObjectInspector)oi;
    List<? extends StructField> fields = soi.getAllStructFieldRefs();
    int size = fields.size();
    if (size > 8) {
      return null; // not supported
    } else if (size == 0) {
      return EMPTY_BYTE_ARRAY; // shortcut for null keys
    }
    Object[] fieldData = new Object[size];
    List<ObjectInspector> fieldOis = new ArrayList<ObjectInspector>(size);
    for (int i = 0; i < size; ++i) {
      StructField field = fields.get(i);
      ObjectInspector foi = field.getFieldObjectInspector();
      if (checkTypes) {
        if (foi.getCategory() != Category.PRIMITIVE) return null; // not supported
        PrimitiveCategory pc = ((PrimitiveObjectInspector)foi).getPrimitiveCategory();
        if (!SUPPORTED_PRIMITIVES.contains(pc)) return null; // not supported
      }
      fieldData[i] = soi.getStructFieldData(obj, field);
      fieldOis.add(foi);
    }

    return serializeRowCommon(byteStream, fieldData, fieldOis);
  }

  public static MapJoinKey readFromVector(Output output, MapJoinKey key, VectorHashKeyWrapper kw,
      VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch,
      boolean mayReuseKey) throws HiveException {
    boolean useOptimized = useOptimizedKeyBasedOnPrev(key);
    if (useOptimized || key == null) {
      byte[] structBytes = null;
      try {
        structBytes = serializeVector(output, kw, keyOutputWriters, keyWrapperBatch);
      } catch (SerDeException e) {
        throw new HiveException(e);
      }
      if (structBytes != null) {
        return MapJoinKeyBytes.fromBytes(key, mayReuseKey, structBytes);
      } else if (useOptimized) {
        throw new HiveException(
            "Failed to serialize " + kw + " even though optimized keys are used");
      }
    }
    MapJoinKeyObject result = mayReuseKey ? (MapJoinKeyObject)key : new MapJoinKeyObject();
    result.readFromVector(kw, keyOutputWriters, keyWrapperBatch);
    return result;
  }

  private static byte[] serializeVector(Output byteStream, VectorHashKeyWrapper kw,
      VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch)
          throws HiveException, SerDeException {
    Object[] fieldData = new Object[keyOutputWriters.length];
    List<ObjectInspector> fieldOis = new ArrayList<ObjectInspector>();
    for (int i = 0; i < keyOutputWriters.length; ++i) {
      VectorExpressionWriter writer = keyOutputWriters[i];
      fieldOis.add(writer.getObjectInspector());
      // This is rather convoluted... to simplify for per, we could call getRawKeyValue
      // instead of writable, and serialize based on Java type as opposed to OI.
      fieldData[i] = keyWrapperBatch.getWritableKeyValue(kw, i, writer);
    }
    return serializeRowCommon(byteStream, fieldData, fieldOis);
  }

  public static MapJoinKey readFromRow(Output output, MapJoinKey key, Object row,
      List<ExprNodeEvaluator> fields, List<ObjectInspector> keyFieldsOI, boolean mayReuseKey)
          throws HiveException {
    Object[] fieldObjs = new Object[fields.size()];
    for (int keyIndex = 0; keyIndex < fields.size(); ++keyIndex) {
      fieldObjs[keyIndex] = fields.get(keyIndex).evaluate(row);
    }
    boolean useOptimized = useOptimizedKeyBasedOnPrev(key);
    if (useOptimized || key == null) {
      try {
        byte[] structBytes = serializeRow(output, fieldObjs, keyFieldsOI);
        if (structBytes != null) {
          return MapJoinKeyBytes.fromBytes(key, mayReuseKey, structBytes);
        } else if (useOptimized) {
          throw new HiveException(
              "Failed to serialize " + row + " even though optimized keys are used");
        }
      } catch (SerDeException ex) {
        throw new HiveException("Serialization error", ex);
      }
    }
    MapJoinKeyObject result = mayReuseKey ? (MapJoinKeyObject)key : new MapJoinKeyObject();
    result.readFromRow(fieldObjs, keyFieldsOI);
    return result;
  }

  private static byte[] serializeRow(Output byteStream,
      Object[] fieldData, List<ObjectInspector> fieldOis) throws SerDeException {
    if (fieldData.length > 8) {
      return null; // not supported
    } else if (fieldData.length == 0) {
      return EMPTY_BYTE_ARRAY; // shortcut for null keys
    }
    assert fieldData.length == fieldOis.size();
    return serializeRowCommon(byteStream, fieldData, fieldOis);
  }

  private static byte[] serializeRowCommon(Output byteStream,
      Object[] fieldData, List<ObjectInspector> fieldOis) throws SerDeException {
    if (byteStream == null) {
      byteStream = new Output();
    } else {
      byteStream.reset();
    }
    LazyBinarySerDe.serializeStruct(byteStream, fieldData, fieldOis);
    return Arrays.copyOf(byteStream.getData(), byteStream.getCount());
  }

  private static boolean useOptimizedKeyBasedOnPrev(MapJoinKey key) {
    return (key != null) && (key instanceof MapJoinKeyBytes);
  }

  public abstract void write(MapJoinObjectSerDeContext context, ObjectOutputStream out)
      throws IOException, SerDeException;

  public abstract boolean hasAnyNulls(int fieldCount, boolean[] nullsafes);
}
TOP

Related Classes of org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.