Package org.apache.hadoop.hive.ql.exec

Source Code of org.apache.hadoop.hive.ql.exec.FetchOperator$FetchInputFormatSplit

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hive.ql.exec;

import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader;
import org.apache.hadoop.hive.ql.io.HiveInputFormat;
import org.apache.hadoop.hive.ql.io.HiveRecordReader;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.parse.SplitSample;
import org.apache.hadoop.hive.ql.plan.FetchWork;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.objectinspector.DelegatedObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobConfigurable;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.ReflectionUtils;

/**
* FetchTask implementation.
**/
public class FetchOperator implements Serializable {

  static Log LOG = LogFactory.getLog(FetchOperator.class.getName());
  static LogHelper console = new LogHelper(LOG);

  public static final String FETCH_OPERATOR_DIRECTORY_LIST =
      "hive.complete.dir.list";

  private boolean isNativeTable;
  private FetchWork work;
  protected Operator<?> operator;    // operator tree for processing row further (option)
  private int splitNum;
  private PartitionDesc currPart;
  private TableDesc currTbl;
  private boolean tblDataDone;
  private FooterBuffer footerBuffer = null;
  private int headerCount = 0;
  private int footerCount = 0;

  private boolean hasVC;
  private boolean isPartitioned;
  private StructObjectInspector vcsOI;
  private List<VirtualColumn> vcCols;
  private ExecMapperContext context;

  private transient RecordReader<WritableComparable, Writable> currRecReader;
  private transient FetchInputFormatSplit[] inputSplits;
  private transient InputFormat inputFormat;
  private transient JobConf job;
  private transient WritableComparable key;
  private transient Writable value;
  private transient Object[] vcValues;
  private transient Deserializer serde;
  private transient Deserializer tblSerde;
  private transient Converter partTblObjectInspectorConverter;

  private transient Iterator<Path> iterPath;
  private transient Iterator<PartitionDesc> iterPartDesc;
  private transient Path currPath;
  private transient StructObjectInspector objectInspector;
  private transient StructObjectInspector rowObjectInspector;
  private transient ObjectInspector partitionedTableOI;
  private transient Object[] row;

  public FetchOperator() {
  }

  public FetchOperator(FetchWork work, JobConf job) {
    this.job = job;
    this.work = work;
    initialize();
  }

  public FetchOperator(FetchWork work, JobConf job, Operator<?> operator,
      List<VirtualColumn> vcCols) {
    this.job = job;
    this.work = work;
    this.operator = operator;
    this.vcCols = vcCols;
    initialize();
  }

  private void initialize() {
    if (hasVC = vcCols != null && !vcCols.isEmpty()) {
      List<String> names = new ArrayList<String>(vcCols.size());
      List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>(vcCols.size());
      for (VirtualColumn vc : vcCols) {
        inspectors.add(vc.getObjectInspector());
        names.add(vc.getName());
      }
      vcsOI = ObjectInspectorFactory.getStandardStructObjectInspector(names, inspectors);
      vcValues = new Object[vcCols.size()];
    }
    isPartitioned = work.isPartitioned();
    tblDataDone = false;
    if (hasVC && isPartitioned) {
      row = new Object[3];
    } else if (hasVC || isPartitioned) {
      row = new Object[2];
    } else {
      row = new Object[1];
    }
    if (work.getTblDesc() != null) {
      isNativeTable = !work.getTblDesc().isNonNative();
    } else {
      isNativeTable = true;
    }
    setupExecContext();
  }

  private void setupExecContext() {
    if (hasVC || work.getSplitSample() != null) {
      context = new ExecMapperContext(job);
      if (operator != null) {
        operator.setExecContext(context);
      }
    }
  }

  public FetchWork getWork() {
    return work;
  }

  public void setWork(FetchWork work) {
    this.work = work;
  }

  public int getSplitNum() {
    return splitNum;
  }

  public void setSplitNum(int splitNum) {
    this.splitNum = splitNum;
  }

  public PartitionDesc getCurrPart() {
    return currPart;
  }

  public void setCurrPart(PartitionDesc currPart) {
    this.currPart = currPart;
  }

  public TableDesc getCurrTbl() {
    return currTbl;
  }

  public void setCurrTbl(TableDesc currTbl) {
    this.currTbl = currTbl;
  }

  public boolean isTblDataDone() {
    return tblDataDone;
  }

  public void setTblDataDone(boolean tblDataDone) {
    this.tblDataDone = tblDataDone;
  }

  public boolean isEmptyTable() {
    return work.getTblDir() == null && (work.getPartDir() == null || work.getPartDir().isEmpty());
  }

  /**
   * A cache of InputFormat instances.
   */
  private static final Map<String, InputFormat> inputFormats = new HashMap<String, InputFormat>();

  @SuppressWarnings("unchecked")
  static InputFormat getInputFormatFromCache(Class<? extends InputFormat> inputFormatClass,
       JobConf conf) throws IOException {
    if (Configurable.class.isAssignableFrom(inputFormatClass) ||
        JobConfigurable.class.isAssignableFrom(inputFormatClass)) {
      return (InputFormat<WritableComparable, Writable>) ReflectionUtils
          .newInstance(inputFormatClass, conf);
    }
    InputFormat format = inputFormats.get(inputFormatClass.getName());
    if (format == null) {
      try {
        format = ReflectionUtils.newInstance(inputFormatClass, conf);
        inputFormats.put(inputFormatClass.getName(), format);
      } catch (Exception e) {
        throw new IOException("Cannot create an instance of InputFormat class "
            + inputFormatClass.getName() + " as specified in mapredWork!", e);
      }
    }
    return format;
  }

  private StructObjectInspector getRowInspectorFromTable(TableDesc table) throws Exception {
    Deserializer serde = table.getDeserializerClass().newInstance();
    SerDeUtils.initializeSerDeWithoutErrorCheck(serde, job, table.getProperties(), null);
    return createRowInspector(getStructOIFrom(serde.getObjectInspector()));
  }

  private StructObjectInspector getRowInspectorFromPartition(PartitionDesc partition,
      ObjectInspector partitionOI) throws Exception {

    String pcols = partition.getTableDesc().getProperties().getProperty(
        org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);
    String[] partKeys = pcols.trim().split("/");
    String pcolTypes = partition.getTableDesc().getProperties().getProperty(
        org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES);
    String[] partKeyTypes = pcolTypes.trim().split(":");
    row[1] = createPartValue(partKeys, partition.getPartSpec(), partKeyTypes);

    return createRowInspector(getStructOIFrom(partitionOI), partKeys, partKeyTypes);
  }

  private StructObjectInspector getRowInspectorFromPartitionedTable(TableDesc table)
      throws Exception {
    Deserializer serde = table.getDeserializerClass().newInstance();
    SerDeUtils.initializeSerDe(serde, job, table.getProperties(), null);
    String pcols = table.getProperties().getProperty(
        org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);
    String[] partKeys = pcols.trim().split("/");
    String pcolTypes = table.getProperties().getProperty(
        org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES);
    String[] partKeyTypes = pcolTypes.trim().split(":");   
    row[1] = null;
    return createRowInspector(getStructOIFrom(serde.getObjectInspector()), partKeys, partKeyTypes);
  }

  private StructObjectInspector getStructOIFrom(ObjectInspector current) throws SerDeException {
    if (objectInspector != null) {
      current = DelegatedObjectInspectorFactory.reset(objectInspector, current);
    } else {
      current = DelegatedObjectInspectorFactory.wrap(current);
    }
    return objectInspector = (StructObjectInspector) current;
  }

  private StructObjectInspector createRowInspector(StructObjectInspector current)
      throws SerDeException {
    return hasVC ? ObjectInspectorFactory.getUnionStructObjectInspector(
        Arrays.asList(current, vcsOI)) : current;
  }

  private StructObjectInspector createRowInspector(StructObjectInspector current, String[] partKeys, String[] partKeyTypes)
      throws SerDeException {
    List<String> partNames = new ArrayList<String>();
    List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>();
    for (int i = 0; i < partKeys.length; i++) {
      String key = partKeys[i];
      partNames.add(key);   
      ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(
          TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i]));
      partObjectInspectors.add(oi);
    }
    StructObjectInspector partObjectInspector = ObjectInspectorFactory
        .getStandardStructObjectInspector(partNames, partObjectInspectors);

    return ObjectInspectorFactory.getUnionStructObjectInspector(
        hasVC ? Arrays.asList(current, partObjectInspector, vcsOI) :
            Arrays.asList(current, partObjectInspector));
  }

  private Object[] createPartValue(String[] partKeys, Map<String, String> partSpec, String[] partKeyTypes) {
    Object[] partValues = new Object[partKeys.length];
    for (int i = 0; i < partKeys.length; i++) {
      String key = partKeys[i];
      ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(
          TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i]));
      partValues[i] =
          ObjectInspectorConverters.
          getConverter(PrimitiveObjectInspectorFactory.
              javaStringObjectInspector, oi).convert(partSpec.get(key));  
    }
    return partValues;
  }

  private void getNextPath() throws Exception {
    // first time
    if (iterPath == null) {
      if (work.isNotPartitioned()) {
        if (!tblDataDone) {
          currPath = work.getTblDir();
          currTbl = work.getTblDesc();
          if (isNativeTable) {
            FileSystem fs = currPath.getFileSystem(job);
            if (fs.exists(currPath)) {
              FileStatus[] fStats = listStatusUnderPath(fs, currPath);
              for (FileStatus fStat : fStats) {
                if (fStat.getLen() > 0) {
                  tblDataDone = true;
                  break;
                }
              }
            }
          } else {
            tblDataDone = true;
          }

          if (!tblDataDone) {
            currPath = null;
          }
          return;
        } else {
          currTbl = null;
          currPath = null;
        }
        return;
      } else {
        setFetchOperatorContext(job, work.getPartDir());
        iterPath = work.getPartDir().iterator();
        iterPartDesc = work.getPartDesc().iterator();
      }
    }

    while (iterPath.hasNext()) {
      Path nxt = iterPath.next();
      PartitionDesc prt = null;
      if (iterPartDesc != null) {
        prt = iterPartDesc.next();
      }
      FileSystem fs = nxt.getFileSystem(job);
      if (fs.exists(nxt)) {
        FileStatus[] fStats = listStatusUnderPath(fs, nxt);
        for (FileStatus fStat : fStats) {
          if (fStat.getLen() > 0) {
            currPath = nxt;
            if (iterPartDesc != null) {
              currPart = prt;
            }
            return;
          }
        }
      }
    }
  }

  /**
   * Set context for this fetch operator in to the jobconf.
   * This helps InputFormats make decisions based on the scope of the complete
   * operation.
   * @param conf the configuration to modify
   * @param partDirs the list of partition directories
   */
  static void setFetchOperatorContext(JobConf conf,
                                      ArrayList<Path> partDirs) {
    if (partDirs != null) {
      StringBuilder buff = new StringBuilder();
      boolean first = true;
      for(Path p: partDirs) {
        if (first) {
          first = false;
        } else {
          buff.append('\t');
        }
        buff.append(StringEscapeUtils.escapeJava(p.toString()));
      }
      conf.set(FETCH_OPERATOR_DIRECTORY_LIST, buff.toString());
    }
  }

  /**
   * A cache of Object Inspector Settable Properties.
   */
  private static Map<ObjectInspector, Boolean> oiSettableProperties = new HashMap<ObjectInspector, Boolean>();

  private RecordReader<WritableComparable, Writable> getRecordReader() throws Exception {
    if (currPath == null) {
      getNextPath();
      if (currPath == null) {
        return null;
      }

      // not using FileInputFormat.setInputPaths() here because it forces a
      // connection
      // to the default file system - which may or may not be online during pure
      // metadata
      // operations
      job.set("mapred.input.dir", org.apache.hadoop.util.StringUtils.escapeString(currPath
          .toString()));

      // Fetch operator is not vectorized and as such turn vectorization flag off so that
      // non-vectorized record reader is created below.
      if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
        HiveConf.setBoolVar(job, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false);
      }

      PartitionDesc partDesc;
      if (currTbl == null) {
        partDesc = currPart;
      } else {
        partDesc = new PartitionDesc(currTbl, null);
      }

      Class<? extends InputFormat> formatter = partDesc.getInputFileFormatClass();
      inputFormat = getInputFormatFromCache(formatter, job);
      Utilities.copyTableJobPropertiesToConf(partDesc.getTableDesc(), job);
      InputSplit[] splits = inputFormat.getSplits(job, 1);
      FetchInputFormatSplit[] inputSplits = new FetchInputFormatSplit[splits.length];
      for (int i = 0; i < splits.length; i++) {
        inputSplits[i] = new FetchInputFormatSplit(splits[i], formatter.getName());
      }
      if (work.getSplitSample() != null) {
        inputSplits = splitSampling(work.getSplitSample(), inputSplits);
      }
      this.inputSplits = inputSplits;

      splitNum = 0;
      serde = partDesc.getDeserializer(job);
      SerDeUtils.initializeSerDe(serde, job, partDesc.getTableDesc().getProperties(),
                                 partDesc.getProperties());

      if (currTbl != null) {
        tblSerde = serde;
      }
      else {
        tblSerde = currPart.getTableDesc().getDeserializerClass().newInstance();
        SerDeUtils.initializeSerDe(tblSerde, job, currPart.getTableDesc().getProperties(), null);
      }

      ObjectInspector outputOI = ObjectInspectorConverters.getConvertedOI(
          serde.getObjectInspector(),
          partitionedTableOI == null ? tblSerde.getObjectInspector() : partitionedTableOI,
          oiSettableProperties);

      partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(
          serde.getObjectInspector(), outputOI);

      if (LOG.isDebugEnabled()) {
        LOG.debug("Creating fetchTask with deserializer typeinfo: "
            + serde.getObjectInspector().getTypeName());
        LOG.debug("deserializer properties:\ntable properties: " +
            partDesc.getTableDesc().getProperties() + "\npartition properties: " +
            partDesc.getProperties());
      }

      if (currPart != null) {
        getRowInspectorFromPartition(currPart, outputOI);
      }
    }

    if (splitNum >= inputSplits.length) {
      if (currRecReader != null) {
        currRecReader.close();
        currRecReader = null;
      }
      currPath = null;
      return getRecordReader();
    }

    final FetchInputFormatSplit target = inputSplits[splitNum];

    @SuppressWarnings("unchecked")
    final RecordReader<WritableComparable, Writable> reader =
        inputFormat.getRecordReader(target.getInputSplit(), job, Reporter.NULL);
    if (hasVC || work.getSplitSample() != null) {
      currRecReader = new HiveRecordReader<WritableComparable, Writable>(reader, job) {
        @Override
        public boolean doNext(WritableComparable key, Writable value) throws IOException {
          // if current pos is larger than shrinkedLength which is calculated for
          // each split by table sampling, stop fetching any more (early exit)
          if (target.shrinkedLength > 0 &&
              context.getIoCxt().getCurrentBlockStart() > target.shrinkedLength) {
            return false;
          }
          return super.doNext(key, value);
        }
      };
      ((HiveContextAwareRecordReader)currRecReader).
          initIOContext(target, job, inputFormat.getClass(), reader);
    } else {
      currRecReader = reader;
    }
    splitNum++;
    key = currRecReader.createKey();
    value = currRecReader.createValue();
    return currRecReader;
  }

  private FetchInputFormatSplit[] splitSampling(SplitSample splitSample,
      FetchInputFormatSplit[] splits) {
    long totalSize = 0;
    for (FetchInputFormatSplit split: splits) {
        totalSize += split.getLength();
    }
    List<FetchInputFormatSplit> result = new ArrayList<FetchInputFormatSplit>();
    long targetSize = splitSample.getTargetSize(totalSize);
    int startIndex = splitSample.getSeedNum() % splits.length;
    long size = 0;
    for (int i = 0; i < splits.length; i++) {
      FetchInputFormatSplit split = splits[(startIndex + i) % splits.length];
      result.add(split);
      long splitgLength = split.getLength();
      if (size + splitgLength >= targetSize) {
        if (size + splitgLength > targetSize) {
          split.shrinkedLength = targetSize - size;
        }
        break;
      }
      size += splitgLength;
    }
    return result.toArray(new FetchInputFormatSplit[result.size()]);
  }

  /**
   * Get the next row and push down it to operator tree.
   * Currently only used by FetchTask.
   **/
  public boolean pushRow() throws IOException, HiveException {
    if(work.getRowsComputedUsingStats() != null) {
      for (List<Object> row : work.getRowsComputedUsingStats()) {
        operator.processOp(row, 0);
      }
      operator.flush();
      return true;
    }
    InspectableObject row = getNextRow();
    if (row != null) {
      pushRow(row);
    } else {
      operator.flush();
    }
    return row != null;
  }

  protected void pushRow(InspectableObject row) throws HiveException {
    operator.processOp(row.o, 0);
  }

  private transient final InspectableObject inspectable = new InspectableObject();

  /**
   * Get the next row. The fetch context is modified appropriately.
   *
   **/
  public InspectableObject getNextRow() throws IOException {
    try {
      while (true) {
        boolean opNotEOF = true;
        if (context != null) {
          context.resetRow();
        }
        if (currRecReader == null) {
          currRecReader = getRecordReader();
          if (currRecReader == null) {
            return null;
          }

          /**
           * Start reading a new file.
           * If file contains header, skip header lines before reading the records.
           * If file contains footer, used FooterBuffer to cache and remove footer
           * records at the end of the file.
           */
          headerCount = 0;
          footerCount = 0;
          TableDesc table = null;
          if (currTbl != null) {
            table = currTbl;
          } else if (currPart != null) {
            table = currPart.getTableDesc();
          }
          if (table != null) {
            headerCount = Utilities.getHeaderCount(table);
            footerCount = Utilities.getFooterCount(table, job);
          }

          // Skip header lines.
          opNotEOF = Utilities.skipHeader(currRecReader, headerCount, key, value);

          // Initialize footer buffer.
          if (opNotEOF) {
            if (footerCount > 0) {
              footerBuffer = new FooterBuffer();
              opNotEOF = footerBuffer.initializeBuffer(job, currRecReader, footerCount, key, value);
            }
          }
        }

        if (opNotEOF && footerBuffer == null) {
          /**
           * When file doesn't end after skipping header line
           * and there is no footer lines, read normally.
           */
          opNotEOF = currRecReader.next(key, value);
        }
        if (opNotEOF && footerBuffer != null) {
          opNotEOF = footerBuffer.updateBuffer(job, currRecReader, key, value);
        }
        if (opNotEOF) {
          if (operator != null && context != null && context.inputFileChanged()) {
            // The child operators cleanup if input file has changed
            try {
              operator.cleanUpInputFileChanged();
            } catch (HiveException e) {
              throw new IOException(e);
            }
          }
          if (hasVC) {
            vcValues = MapOperator.populateVirtualColumnValues(context, vcCols, vcValues, serde);
            row[isPartitioned ? 2 : 1] = vcValues;
          }
          row[0] = partTblObjectInspectorConverter.convert(serde.deserialize(value));

          if (hasVC || isPartitioned) {
            inspectable.o = row;
            inspectable.oi = rowObjectInspector;
            return inspectable;
          }
          inspectable.o = row[0];
          inspectable.oi = tblSerde.getObjectInspector();
          return inspectable;
        } else {
          currRecReader.close();
          currRecReader = null;
        }
      }
    } catch (Exception e) {
      throw new IOException(e);
    }
  }

  /**
   * Clear the context, if anything needs to be done.
   *
   **/
  public void clearFetchContext() throws HiveException {
    try {
      if (currRecReader != null) {
        currRecReader.close();
        currRecReader = null;
      }
      if (operator != null) {
        operator.close(false);
        operator = null;
      }
      if (context != null) {
        context.clear();
        context = null;
      }
      this.currTbl = null;
      this.currPath = null;
      this.iterPath = null;
      this.iterPartDesc = null;
    } catch (Exception e) {
      throw new HiveException("Failed with exception " + e.getMessage()
          + org.apache.hadoop.util.StringUtils.stringifyException(e));
    }
  }

  /**
   * used for bucket map join
   */
  public void setupContext(List<Path> paths) {
    this.iterPath = paths.iterator();
    if (work.isNotPartitioned()) {
      this.currTbl = work.getTblDesc();
    } else {
      this.iterPartDesc = work.getPartDescs(paths).iterator();
    }
    setupExecContext();
  }

  /**
   * returns output ObjectInspector, never null
   */
  public ObjectInspector getOutputObjectInspector() throws HiveException {
    if(null != work.getStatRowOI()) {
      return work.getStatRowOI();
    }
    try {
      if (work.isNotPartitioned()) {
        return getRowInspectorFromTable(work.getTblDesc());
      }
      List<PartitionDesc> listParts = work.getPartDesc();
      // Chose the table descriptor if none of the partitions is present.
      // For eg: consider the query:
      // select /*+mapjoin(T1)*/ count(*) from T1 join T2 on T1.key=T2.key
      // Both T1 and T2 and partitioned tables, but T1 does not have any partitions
      // FetchOperator is invoked for T1, and listParts is empty. In that case,
      // use T1's schema to get the ObjectInspector.
      if (listParts == null || listParts.isEmpty()) {
        return getRowInspectorFromPartitionedTable(work.getTblDesc());
      }

      // Choose any partition. It's OI needs to be converted to the table OI
      // Whenever a new partition is being read, a new converter is being created
      PartitionDesc partition = listParts.get(0);
      Deserializer tblSerde = partition.getTableDesc().getDeserializerClass().newInstance();
      SerDeUtils.initializeSerDe(tblSerde, job, partition.getTableDesc().getProperties(), null);

      partitionedTableOI = null;
      ObjectInspector tableOI = tblSerde.getObjectInspector();

      // Get the OI corresponding to all the partitions
      for (PartitionDesc listPart : listParts) {
        partition = listPart;
        Deserializer partSerde = listPart.getDeserializer(job);
        SerDeUtils.initializeSerDe(partSerde, job, partition.getTableDesc().getProperties(),
                                   listPart.getProperties());

        partitionedTableOI = ObjectInspectorConverters.getConvertedOI(
            partSerde.getObjectInspector(), tableOI, oiSettableProperties);
        if (!partitionedTableOI.equals(tableOI)) {
          break;
        }
      }
      return getRowInspectorFromPartition(partition, partitionedTableOI);
    } catch (Exception e) {
      throw new HiveException("Failed with exception " + e.getMessage()
          + org.apache.hadoop.util.StringUtils.stringifyException(e));
    } finally {
      currPart = null;
    }
  }

  /**
   * Lists status for all files under a given path. Whether or not this is recursive depends on the
   * setting of job configuration parameter mapred.input.dir.recursive.
   *
   * @param fs
   *          file system
   *
   * @param p
   *          path in file system
   *
   * @return list of file status entries
   */
  private FileStatus[] listStatusUnderPath(FileSystem fs, Path p) throws IOException {
    boolean recursive = HiveConf.getBoolVar(job, HiveConf.ConfVars.HADOOPMAPREDINPUTDIRRECURSIVE);
    // If this is in acid format always read it recursively regardless of what the jobconf says.
    if (!recursive && !AcidUtils.isAcid(p, job)) {
      return fs.listStatus(p);
    }
    List<FileStatus> results = new ArrayList<FileStatus>();
    for (FileStatus stat : fs.listStatus(p)) {
      FileUtils.listStatusRecursively(fs, stat, results);
    }
    return results.toArray(new FileStatus[results.size()]);
  }

  // for split sampling. shrinkedLength is checked against IOContext.getCurrentBlockStart,
  // which is from RecordReader.getPos(). So some inputformats which does not support getPos()
  // like HiveHBaseTableInputFormat cannot be used with this (todo)
  private static class FetchInputFormatSplit extends HiveInputFormat.HiveInputSplit {

    // shrinked size for this split. counter part of this in normal mode is
    // InputSplitShim.shrinkedLength.
    // what's different is that this is evaluated by unit of row using RecordReader.getPos()
    // and that is evaluated by unit of split using InputSplt.getLength().
    private long shrinkedLength = -1;

    public FetchInputFormatSplit(InputSplit split, String name) {
      super(split, name);
    }
  }
}
TOP

Related Classes of org.apache.hadoop.hive.ql.exec.FetchOperator$FetchInputFormatSplit

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.