Package com.facebook.hive.orc

Source Code of com.facebook.hive.orc.WriterImpl

//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.facebook.hive.orc;

import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.TreeMap;

import com.facebook.hive.orc.compression.CompressionCodec;
import com.facebook.hive.orc.compression.CompressionKind;
import com.facebook.hive.orc.compression.SnappyCodec;
import com.facebook.hive.orc.compression.ZlibCodec;
import com.facebook.hive.orc.statistics.ColumnStatisticsImpl;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.io.RawDatasizeConst;
import org.apache.hadoop.hive.serde2.ReaderWriterProfiler;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;

import com.facebook.hive.orc.OrcConf.ConfVars;
import com.facebook.hive.orc.OrcProto.Stream.Kind;
import com.google.protobuf.ByteString;
import com.google.protobuf.CodedOutputStream;

/**
* An ORC file writer. The file is divided into stripes, which is the natural
* unit of work when reading. Each stripe is buffered in memory until the
* memory reaches the stripe size and then it is written out broken down by
* columns. Each column is written by a TreeWriter that is specific to that
* type of column. TreeWriters may have children TreeWriters that handle the
* sub-types. Each of the TreeWriters writes the column's data as a set of
* streams.
*
* This class is synchronized so that multi-threaded access is ok. In
* particular, because the MemoryManager is shared between writers, this class
* assumes that checkMemory may be called from a separate thread.
*/
public class WriterImpl implements Writer, MemoryManager.Callback {

  private static final Log LOG = LogFactory.getLog(WriterImpl.class);

  private static final int HDFS_BUFFER_SIZE = 256 * 1024;
  private static final int MIN_ROW_INDEX_STRIDE = 1000;

  public static final int SHORT_BYTE_SIZE = 2;
  public static final int INT_BYTE_SIZE = 4;
  public static final int LONG_BYTE_SIZE = 8;

  // Specifies how many index entries are created for a present stream
  public static final int UNCOMPRESSED_PRESENT_STREAM_INDEX_ENTRIES = 3;
  public static final int COMPRESSED_PRESENT_STREAM_INDEX_ENTRIES = 4;

  private final FileSystem fs;
  private final Path path;
  private final long stripeSize;
  private final int rowIndexStride;
  private final CompressionKind compress;
  private final CompressionCodec codec;
  private final int bufferSize;
  // the streams that make up the current stripe
  private final Map<StreamName, BufferedStream> streams =
    new TreeMap<StreamName, BufferedStream>();

  private FSDataOutputStream rawWriter = null;
  // the compressed metadata information outStream
  private OutStream writer = null;
  // a protobuf outStream around streamFactory
  private CodedOutputStream protobufWriter = null;
  private long headerLength;
  private int columnCount;
  private long rowCount = 0;
  private long rowsInStripe = 0;
  private int rowsInIndex = 0;
  private long rawDataSize = 0;
  private final List<OrcProto.StripeInformation> stripes =
    new ArrayList<OrcProto.StripeInformation>();
  private final Map<String, ByteString> userMetadata =
    new TreeMap<String, ByteString>();
  private final StreamFactory streamFactory = new StreamFactory();
  private final TreeWriter treeWriter;
  private final OrcProto.RowIndex.Builder rowIndex =
      OrcProto.RowIndex.newBuilder();
  private final boolean buildIndex;
  private final MemoryManager memoryManager;
  private final boolean useVInts;
  private final int dfsBytesPerChecksum;
  private final long maxDictSize;
  private final MemoryEstimate memoryEstimate = new MemoryEstimate();

  private final Configuration conf;

  WriterImpl(FileSystem fs,
             Path path,
             Configuration conf,
             ObjectInspector inspector,
             long stripeSize,
             CompressionKind compress,
             int bufferSize,
             int rowIndexStride,
             MemoryManager memoryManager) throws IOException {
    this.fs = fs;
    this.path = path;
    this.conf = conf;
    this.stripeSize = stripeSize;
    this.compress = compress;
    this.bufferSize = bufferSize;
    this.rowIndexStride = rowIndexStride;
    this.memoryManager = memoryManager;
    buildIndex = rowIndexStride > 0;
    codec = createCodec(compress, conf);
    useVInts = OrcConf.getBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_USE_VINTS);
    treeWriter = createTreeWriter(inspector, streamFactory, false, conf, useVInts,
        memoryManager.isLowMemoryMode(), memoryEstimate);
    dfsBytesPerChecksum = conf.getInt("io.bytes.per.checksum", 512);
    if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) {
      throw new IllegalArgumentException("Row stride must be at least " +
          MIN_ROW_INDEX_STRIDE);
    }
    maxDictSize = OrcConf.getLongVar(conf, OrcConf.ConfVars.HIVE_ORC_MAX_DICTIONARY_SIZE);
    // ensure that we are able to handle callbacks before we register ourselves
    memoryManager.addWriter(path, stripeSize, this, memoryEstimate.getTotalMemory());
  }

  static CompressionCodec createCodec(CompressionKind kind) {
    // To be used for cases where we don't care about configuring the codec,
    // e.g. reads
    return createCodec(kind, null);
  }

  static CompressionCodec createCodec(CompressionKind kind, Configuration conf) {
    switch (kind) {
      case NONE:
        return null;
      case ZLIB:
        return new ZlibCodec(conf);
      case SNAPPY:
        return new SnappyCodec();
      case LZO:
        try {
          Class<? extends CompressionCodec> lzo =
              (Class<? extends CompressionCodec>)
                  Class.forName("com.facebook.hive.orc.LzoCodec");
          return lzo.newInstance();
        } catch (ClassNotFoundException e) {
          throw new IllegalArgumentException("LZO is not available.", e);
        } catch (InstantiationException e) {
          throw new IllegalArgumentException("Problem initializing LZO", e);
        } catch (IllegalAccessException e) {
          throw new IllegalArgumentException("Insufficient access to LZO", e);
        }
      default:
        throw new IllegalArgumentException("Unknown compression codec: " +
            kind);
    }
  }

  @Override
  public synchronized void enterLowMemoryMode() throws IOException {
    // Don't use dictionaries
    treeWriter.abandonDictionaries();
    // If the Zlib compression level is less than 6, raise it to 6 to compensate for the fact
    // we aren't using dictionaries
    if (codec != null && OrcConf.getIntVar(conf, ConfVars.HIVE_ORC_ZLIB_COMPRESSION_LEVEL) < 6) {
      OrcConf.setIntVar(conf, ConfVars.HIVE_ORC_ZLIB_COMPRESSION_LEVEL, 6);
      codec.reloadConfigurations(conf);
    }
  }

  /**
   * This class is used to hold the contents of streams as they are buffered.
   * The TreeWriters write to the outStream and the codec compresses the
   * data as buffers fill up and stores them in the output list. When the
   * stripe is being written, the whole stream is written to the file.
   */
  private class BufferedStream implements OutStream.OutputReceiver {
    private final OutStream outStream;
    private final List<ByteBuffer> output = new ArrayList<ByteBuffer>();

    BufferedStream(String name, int bufferSize,
                   CompressionCodec codec) throws IOException {
      outStream = new OutStream(name, bufferSize, codec, this, memoryEstimate);
    }

    /**
     * Receive a buffer from the compression codec.
     * @param buffer the buffer to save
     * @throws IOException
     */
    @Override
    public void output(ByteBuffer buffer) {
      output.add(buffer);
    }

    /**
     * Get the number of bytes in buffers that are allocated to this stream.
     * @return number of bytes in buffers
     */
    public long getBufferSize() {
      long result = 0;
      for(ByteBuffer buf: output) {
        result += buf.capacity();
      }
      return outStream.getBufferSize() + result;
    }

    /**
     * Flush the stream to the codec.
     * @throws IOException
     */
    public void flush(boolean reuseBuffer) throws IOException {
      outStream.flush(reuseBuffer);
    }

    /**
     * Clear all of the buffers.
     * @throws IOException
     */
    public void clear() throws IOException {
      outStream.clear();
      for(ByteBuffer buf: output) {
        memoryEstimate.decrementTotalMemory(buf.capacity());
      }
      output.clear();
    }

    /**
     * Check the state of suppress flag in output stream
     * @return value of suppress flag
     */
    public boolean isSuppressed() {
      return outStream.isSuppressed();
    }

    /**
     * Write the saved compressed buffers to the OutputStream.
     * @param out the stream to write to
     * @throws IOException
     */
    void spillTo(OutputStream out) throws IOException {
      for(ByteBuffer buffer: output) {
        out.write(buffer.array(), buffer.arrayOffset() + buffer.position(),
          buffer.remaining());
      }
    }

    @Override
    public String toString() {
      return outStream.toString();
    }
  }

  /**
   * An output receiver that writes the ByteBuffers to the output stream
   * as they are received.
   */
  private class DirectStream implements OutStream.OutputReceiver {
    private final FSDataOutputStream output;

    DirectStream(FSDataOutputStream output) {
      this.output = output;
    }

    @Override
    public void output(ByteBuffer buffer) throws IOException {
      output.write(buffer.array(), buffer.arrayOffset() + buffer.position(),
        buffer.remaining());
      memoryEstimate.decrementTotalMemory(buffer.capacity());
    }
  }

  static class RowIndexPositionRecorder implements PositionRecorder {
    private final OrcProto.RowIndexEntry.Builder builder;

    RowIndexPositionRecorder(OrcProto.RowIndexEntry.Builder builder) {
      this.builder = builder;
    }

    @Override
    public void addPosition(long position) {
      builder.addPositions(position);
    }
  }

  /**
   * Interface from the Writer to the TreeWriters. This limits the visibility
   * that the TreeWriters have into the Writer.
   */
  private class StreamFactory {
    /**
     * Create a stream to store part of a column.
     * @param column the column id for the stream
     * @param kind the kind of stream
     * @return The output outStream that the section needs to be written to.
     * @throws IOException
     */
    public OutStream createStream(int column,
                                               OrcProto.Stream.Kind kind
                                              ) throws IOException {
      StreamName name = new StreamName(column, kind);
      BufferedStream result = streams.get(name);
      if (result == null) {
        result = new BufferedStream(name.toString(), bufferSize, codec);
        streams.put(name, result);
      }
      return result.outStream;
    }

    /**
     * Get the next column id.
     * @return a number from 0 to the number of columns - 1
     */
    public int getNextColumnId() {
      return columnCount++;
    }

    /**
     * Get the stride rate of the row index.
     */
    public int getRowIndexStride() {
      return rowIndexStride;
    }

    /**
     * Should be building the row index.
     * @return true if we are building the index
     */
    public boolean buildIndex() {
      return buildIndex;
    }

    /**
     * Is the ORC file compressed?
     * @return are the streams compressed
     */
    public boolean isCompressed() {
      return codec != null;
    }
  }

  /**
   * The parent class of all of the writers for each column. Each column
   * is written by an instance of this class. The compound types (struct,
   * list, map, and union) have children tree writers that write the children
   * types.
   */
  private abstract static class TreeWriter {
    protected final int id;
    protected final ObjectInspector inspector;
    private final BitFieldWriter isPresent;
    private final boolean isCompressed;
    protected final ColumnStatisticsImpl indexStatistics;
    private final ColumnStatisticsImpl fileStatistics;
    protected TreeWriter[] childrenWriters;
    protected final RowIndexPositionRecorder rowIndexPosition;
    private final OrcProto.RowIndex.Builder rowIndex;
    private final OrcProto.RowIndexEntry.Builder rowIndexEntry;
    private final PositionedOutputStream rowIndexStream;
    private final Configuration conf;
    protected long stripeRawDataSize = 0;
    protected long rowRawDataSize = 0;
    protected final boolean useVInts;
    private int numStripes = 0;
    private boolean foundNulls;
    private OutStream isPresentOutStream;
    protected final MemoryEstimate memoryEstimate;

    /**
     * Create a tree writer
     * @param columnId the column id of the column to write
     * @param inspector the object inspector to use
     * @param streamFactory limited access to the Writer's data.
     * @param nullable can the value be null?
     * @throws IOException
     */
    TreeWriter(int columnId, ObjectInspector inspector,
               StreamFactory streamFactory,
               boolean nullable, Configuration conf,
               boolean useVInts, MemoryEstimate memoryEstimate) throws IOException {
      this.id = columnId;
      this.inspector = inspector;
      this.conf = conf;
      this.useVInts = useVInts;
      this.memoryEstimate = memoryEstimate;
      this.isCompressed = streamFactory.isCompressed();
      if (nullable) {
        isPresentOutStream = streamFactory.createStream(id, OrcProto.Stream.Kind.PRESENT);
        isPresent = new BitFieldWriter(isPresentOutStream, 1);
      } else {
        isPresent = null;
      }
      this.foundNulls = false;
      indexStatistics = ColumnStatisticsImpl.create(inspector);
      fileStatistics = ColumnStatisticsImpl.create(inspector);
      childrenWriters = new TreeWriter[0];
      rowIndex = OrcProto.RowIndex.newBuilder();
      rowIndexEntry = OrcProto.RowIndexEntry.newBuilder();
      rowIndexPosition = new RowIndexPositionRecorder(rowIndexEntry);
      if (streamFactory.buildIndex()) {
        rowIndexStream = streamFactory.createStream(id,
            OrcProto.Stream.Kind.ROW_INDEX);
      } else {
        rowIndexStream = null;
      }
    }

    protected int getNumStripes() {
      return numStripes;
    }
    protected OrcProto.RowIndex.Builder getRowIndex() {
      return rowIndex;
    }

    protected ColumnStatisticsImpl getFileStatistics() {
      return fileStatistics;
    }

    protected OrcProto.RowIndexEntry.Builder getRowIndexEntry() {
      return rowIndexEntry;
    }

    /**
     * Add a new value to the column.
     * @param obj
     * @throws IOException
     */
    abstract void write(Object obj) throws IOException;

    void write(Object obj, long rawDataSize) throws IOException{

      if (obj != null) {
        setRawDataSize(rawDataSize);
      } else {
        // Estimate the raw size of null as 1 byte
        setRawDataSize(RawDatasizeConst.NULL_SIZE);
      }

      flushRow(obj);
    }

    /**
     * Update the row count and mark the isPresent bit
     */
    void flushRow(Object obj) throws IOException {
      if (obj != null) {
        indexStatistics.increment();
      }

      if (isPresent != null) {
        isPresent.write(obj == null ? 0 : 1);
        if(obj == null) {
          foundNulls = true;
        }
      }
    }

    private void removeIsPresentPositions() {
      for(int i=0; i < rowIndex.getEntryCount(); ++i) {
        OrcProto.RowIndexEntry.Builder entry = rowIndex.getEntryBuilder(i);
        List<Long> positions = entry.getPositionsList();
        // bit streams use 3 positions if uncompressed, 4 if compressed
        positions = positions.subList(isCompressed ? COMPRESSED_PRESENT_STREAM_INDEX_ENTRIES :
          UNCOMPRESSED_PRESENT_STREAM_INDEX_ENTRIES, positions.size());
        entry.clearPositions();
        entry.addAllPositions(positions);
      }
    }

    /**
     * Sets the row raw data size and updates the stripe raw data size
     *
     * @param rawDataSize
     */
    protected void setRawDataSize(long rawDataSize) {
      rowRawDataSize = rawDataSize;
      stripeRawDataSize += rawDataSize;
    }

    /**
     * Write the stripe out to the file.
     * @param builder the stripe footer that contains the information about the
     *                layout of the stripe. The TreeWriter is required to update
     *                the footer with its information.
     * @param requiredIndexEntries the number of index entries that are
     *                             required. this is to check to make sure the
     *                             row index is well formed.
     * @throws IOException
     */
    void writeStripe(OrcProto.StripeFooter.Builder builder,
                     int requiredIndexEntries) throws IOException {
      numStripes++;
      if (isPresent != null) {
        isPresent.flush();

        // if no nulls are found in a stream, then suppress the stream
        if(!foundNulls) {
          isPresentOutStream.suppress();
          // since isPresent bitstream is suppressed, update the index to
          // remove the positions of the isPresent stream
          if (rowIndexStream != null) {
            removeIsPresentPositions();
          }
        }
      }

      // reset the flag for next stripe
      foundNulls = false;

      builder.addColumns(getEncoding());
      if (rowIndexStream != null) {
        if (rowIndex.getEntryCount() != requiredIndexEntries) {
          throw new IllegalArgumentException("Column has wrong number of " +
               "index entries found: " + rowIndex.getEntryCount() + " expected: " +
               requiredIndexEntries);
        }
        rowIndex.build().writeTo(rowIndexStream);
        rowIndexStream.flush();
      }
      rowIndex.clear();
      rowIndexEntry.clear();
      stripeRawDataSize = 0;
    }

    TreeWriter[] getChildrenWriters() {
      return childrenWriters;
    }

    /**
     * For all the TreeWriters that buffer rows, process
     * all the buffered rows.
     */
    void flush() throws IOException {
      for (TreeWriter writer : childrenWriters) {
        writer.flush();
      }
      return;
    }

    /**
     * Get the encoding for this column.
     * @return the information about the encoding of this column
     */
    OrcProto.ColumnEncoding getEncoding() {
      return OrcProto.ColumnEncoding.newBuilder().setKind(
          OrcProto.ColumnEncoding.Kind.DIRECT).build();
    }

    /**
     * Create a row index entry with the previous location and the current
     * index statistics. Also merges the index statistics into the file
     * statistics before they are cleared. Finally, it records the start of the
     * next index and ensures all of the children columns also create an entry.
     * @throws IOException
     */
    void createRowIndexEntry() throws IOException {
      fileStatistics.merge(indexStatistics);
      rowIndexEntry.setStatistics(indexStatistics.serialize());
      indexStatistics.reset();
      rowIndex.addEntry(rowIndexEntry);
      rowIndexEntry.clear();
      recordPosition(rowIndexPosition);
      for(TreeWriter child: childrenWriters) {
        child.createRowIndexEntry();
      }
    }

    /**
     * Record the current position in each of this column's streams.
     * @param recorder where should the locations be recorded
     * @throws IOException
     */
    void recordPosition(PositionRecorder recorder) throws IOException {
      if (isPresent != null) {
        isPresent.getPosition(recorder);
      }
    }

    public void abandonDictionaries() throws IOException {
      for (TreeWriter child: childrenWriters) {
        child.abandonDictionaries();
      }
    }

    long getStripeRawDataSize() {
      return stripeRawDataSize;
    }

    long getRowRawDataSize() {
      return rowRawDataSize;
    }
  }

  private static class BooleanTreeWriter extends TreeWriter {
    private final BitFieldWriter writer;

    BooleanTreeWriter(int columnId,
                      ObjectInspector inspector,
                      StreamFactory writer,
                      boolean nullable, Configuration conf,
                      boolean useVInts, boolean lowMemoryMode,
                      MemoryEstimate memoryEstimate) throws IOException {
      super(columnId, inspector, writer, nullable, conf, useVInts, memoryEstimate);
      PositionedOutputStream out = writer.createStream(id,
          OrcProto.Stream.Kind.DATA);
      this.writer = new BitFieldWriter(out, 1);
      recordPosition(rowIndexPosition);
    }

    @Override
    void write(Object obj) throws IOException {
      super.write(obj, RawDatasizeConst.BOOLEAN_SIZE);
      if (obj != null) {
        boolean val = ((BooleanObjectInspector) inspector).get(obj);
        indexStatistics.updateBoolean(val);
        writer.write(val ? 1 : 0);
      }
    }

    @Override
    void writeStripe(OrcProto.StripeFooter.Builder builder,
                     int requiredIndexEntries) throws IOException {
      super.writeStripe(builder, requiredIndexEntries);
      writer.flush();
      recordPosition(rowIndexPosition);
    }

    @Override
    void recordPosition(PositionRecorder recorder) throws IOException {
      super.recordPosition(recorder);
      writer.getPosition(recorder);
    }
  }

  private static class ByteTreeWriter extends TreeWriter {
    private final RunLengthByteWriter writer;

    ByteTreeWriter(int columnId,
                      ObjectInspector inspector,
                      StreamFactory writer,
                      boolean nullable, Configuration conf,
                      boolean useVInts, boolean lowMemoryMode,
                      MemoryEstimate memoryEstimate) throws IOException {
      super(columnId, inspector, writer, nullable, conf, useVInts, memoryEstimate);
      this.writer = new RunLengthByteWriter(writer.createStream(id,
          OrcProto.Stream.Kind.DATA));
      recordPosition(rowIndexPosition);
    }

    @Override
    void write(Object obj) throws IOException {
      super.write(obj, RawDatasizeConst.BYTE_SIZE);
      if (obj != null) {
        byte val = ((ByteObjectInspector) inspector).get(obj);
        indexStatistics.updateInteger(val);
        writer.write(val);
      }
    }

    @Override
    void writeStripe(OrcProto.StripeFooter.Builder builder,
                     int requiredIndexEntries) throws IOException {
      super.writeStripe(builder, requiredIndexEntries);
      writer.flush();
      recordPosition(rowIndexPosition);
    }

    @Override
    void recordPosition(PositionRecorder recorder) throws IOException {
      super.recordPosition(recorder);
      writer.getPosition(recorder);
    }
  }

  private static class IntegerTreeWriter extends TreeWriter {
    private final PositionedOutputStream output;
    private DynamicIntArray rows;
    private final PositionedOutputStream inDictionaryStream;
    private final BitFieldWriter inDictionary;
    private final List<OrcProto.RowIndexEntry> savedRowIndex =
        new ArrayList<OrcProto.RowIndexEntry>();
    private final boolean buildIndex;
    private final List<Long> rowIndexValueCount = new ArrayList<Long>();
    private final float dictionaryKeySizeThreshold;

    private IntDictionaryEncoder dictionary;
    private boolean useDictionaryEncoding = true;
    private final StreamFactory writer;
    private final int numBytes;
    private final Long[] buffer;
    private int bufferIndex = 0;
    private long bufferedBytes = 0;
    private final int recomputeStripeEncodingInterval;
    PositionedOutputStream rowOutput;
    private boolean abandonDictionaries = false;
    private final boolean sortKeys;
    private int dictionarySize;
    private final boolean useStrideDictionaries;

    IntegerTreeWriter(int columnId, ObjectInspector inspector, StreamFactory writerFactory,
        boolean nullable, Configuration conf, boolean useVInts, int numBytes,
        boolean lowMemoryMode, MemoryEstimate memoryEstimate) throws IOException {
      super(columnId, inspector, writerFactory, nullable, conf, useVInts, memoryEstimate);
      writer = writerFactory;
      sortKeys = OrcConf.getBoolVar(conf,
          OrcConf.ConfVars.HIVE_ORC_DICTIONARY_SORT_KEYS);
      useStrideDictionaries = OrcConf.getBoolVar(conf,
          OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY);


      this.numBytes = numBytes;
      recomputeStripeEncodingInterval = OrcConf.getIntVar(conf,
          OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL);

      if (!lowMemoryMode) {
        dictionary = new IntDictionaryEncoder(sortKeys, numBytes, useVInts, memoryEstimate);
        rows = new DynamicIntArray(memoryEstimate);
      } else {
        abandonDictionaries = true;
        rowOutput = writer.createStream(id,
            OrcProto.Stream.Kind.DATA);
        useDictionaryEncoding = false;
      }
      output = writer.createStream(id,
          OrcProto.Stream.Kind.DICTIONARY_DATA);
      inDictionaryStream = writer.createStream(id,
          OrcProto.Stream.Kind.IN_DICTIONARY);
      inDictionary = new BitFieldWriter(inDictionaryStream, 1);

      dictionaryKeySizeThreshold = OrcConf.getFloatVar(conf,
          OrcConf.ConfVars.HIVE_ORC_DICTIONARY_NUMERIC_KEY_SIZE_THRESHOLD);


      int bufferLength = OrcConf.getIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ROW_BUFFER_SIZE);

      buffer = new Long[bufferLength];

      recordPosition(rowIndexPosition);
      rowIndexValueCount.add(0L);
      buildIndex = writer.buildIndex();
      if (buildIndex && lowMemoryMode) {
        rowOutput.getPosition(rowIndexPosition);
      }
    }

    boolean determineEncodingStripe() {
      return (getNumStripes() % recomputeStripeEncodingInterval) == 0 && !abandonDictionaries;
    }

    @Override
    void write(Object obj) throws IOException {
      if (obj != null) {
        switch (inspector.getCategory()) {
          case PRIMITIVE:
            switch (((PrimitiveObjectInspector) inspector).getPrimitiveCategory()) {
              case SHORT:
                buffer[bufferIndex++] = new Long(((ShortObjectInspector) inspector).get(obj));
                setRawDataSize(RawDatasizeConst.SHORT_SIZE);
                break;
              case INT:
                buffer[bufferIndex++] = new Long(((IntObjectInspector) inspector).get(obj));
                setRawDataSize(RawDatasizeConst.INT_SIZE);
                break;
              case LONG:
                buffer[bufferIndex++] = new Long(((LongObjectInspector) inspector).get(obj));
                setRawDataSize(RawDatasizeConst.LONG_SIZE);
                break;
              default:
                throw new IllegalArgumentException("Bad Category: Dictionary Encoding not available for " +
                    ((PrimitiveObjectInspector) inspector).getPrimitiveCategory());
            }
            break;
          default:
            throw new IllegalArgumentException("Bad Category: DictionaryEncoding not available for " + inspector.getCategory());
        }
        // Increment the total memory for the buffered long
        memoryEstimate.incrementTotalMemory(RawDatasizeConst.LONG_SIZE);
        bufferedBytes += RawDatasizeConst.LONG_SIZE;
      } else {
        buffer[bufferIndex++] = null;
        setRawDataSize(RawDatasizeConst.NULL_SIZE);
      }
      if (bufferIndex == buffer.length) {
        flush();
      }
    }

    @Override
    void flush() throws IOException {
      for (int i = 0; i < bufferIndex; i++) {
        Long val = buffer[i];
        buffer[i] = null;
        if (val != null) {
          if (useCarriedOverDirectEncoding()) {
            SerializationUtils.writeIntegerType(rowOutput,
                val, numBytes, true, useVInts);
          } else {
            rows.add(dictionary.add(val));
          }
          indexStatistics.updateInteger(val);
        }

        super.flushRow(val);
      }
      bufferIndex = 0;
      memoryEstimate.decrementTotalMemory(bufferedBytes);
      bufferedBytes = 0;
    }

    boolean getUseDictionaryEncoding() {
      return (rows.size() > 0 &&
          (float)(dictionary.size()) / (float)rows.size() <= dictionaryKeySizeThreshold);
    }

    /**
     * Returns true iff the encoding is not being determined using this stripe, and
     * the previously determined encoding was direct.
     */
    private boolean useCarriedOverDirectEncoding() {
      return !determineEncodingStripe() && !useDictionaryEncoding;
    }

    @Override
    void writeStripe(OrcProto.StripeFooter.Builder builder,
                     int requiredIndexEntries) throws IOException {
      if (determineEncodingStripe()) {
        useDictionaryEncoding = getUseDictionaryEncoding();
      }
      if (useDictionaryEncoding) {
        rowOutput =
          new RunLengthIntegerWriter(writer.createStream(id,
          OrcProto.Stream.Kind.DATA), false, INT_BYTE_SIZE, useVInts);
      } else if (determineEncodingStripe()){
        rowOutput = writer.createStream(id,
          OrcProto.Stream.Kind.DATA);
      }

      final long[] dumpOrder;
      final int[] counts;
      if (useDictionaryEncoding) {
        // Traverse the dictionary keys writing out the bytes and lengths; and
        // creating the map from the original order to the final sorted order.
        dumpOrder = new long[dictionary.size()];
        counts = new int[dictionary.size()];
        dictionary.visit(new IntDictionaryEncoder.Visitor<Long>() {
          int currentId = 0;
          public void visit(IntDictionaryEncoder.VisitorContext<Long> context) throws IOException {
            int count = context.getCount();
            counts[context.getOriginalPosition()] = count;
            if (!useStrideDictionaries || count > 1) {
              dictionarySize++;
              context.writeBytes(output);
              dumpOrder[context.getOriginalPosition()] = currentId++;
            } else {
              dumpOrder[context.getOriginalPosition()] = dictionary.getValue(
                  context.getOriginalPosition());
            }
          }
        });
      } else {
        dumpOrder = null;
        counts = null;
      }

      if (!useCarriedOverDirectEncoding()) {
        writeData(useDictionaryEncoding, dumpOrder, counts);
      }
      // we need to build the rowindex before calling super, since it
      // writes it out.
      super.writeStripe(builder, requiredIndexEntries);
      if (useDictionaryEncoding) {
        output.unsuppress();
        inDictionary.flush();
        if (dictionarySize == dictionary.size()) {
          inDictionaryStream.suppress();
        } else {
          inDictionaryStream.unsuppress();
        }
        output.flush();
      } else {
        output.suppress();
        inDictionaryStream.suppress();
      }
      rowOutput.flush();
      savedRowIndex.clear();
      rowIndexValueCount.clear();
      recordPosition(rowIndexPosition);
      dictionarySize = 0;
      if (useCarriedOverDirectEncoding())  {
        rowOutput = writer.createStream(id, OrcProto.Stream.Kind.DATA);
        rowOutput.getPosition(rowIndexPosition);
        if (dictionary != null) {
          dictionary.cleanup();
          dictionary = null;
          rows.cleanup();
          rows = null;
        }
      } else {
        if (dictionary == null) {
          dictionary = new IntDictionaryEncoder(sortKeys, numBytes, useVInts, memoryEstimate);
        } else {
          dictionary.clear();
        }
        if (rows == null) {
          rows = new DynamicIntArray(memoryEstimate);
        } else {
          rows.clear();
        }
      }
      rowIndexValueCount.add(0L);
    }

    private void convertDictionaryToDirect() throws IOException {
      writeData(false, null, null);
    }

    private void writeData(boolean useDictionaryEncoding, long[] dumpOrder, int[] counts)
        throws IOException {
      int length = rows.size();
      int rowIndexEntry = 0;
      OrcProto.RowIndex.Builder rowIndex = getRowIndex();
      // write the values translated into the dump order.
      for(int i = 0; i <= length; ++i) {
        // now that we are writing out the row values, we can finalize the
        // row index
        if (buildIndex) {
          // If we are not using dictionary encoding and savedRowIndex is not empty, this means
          // the conversion from dictionary encoding to direct encoding, so allow rowIndexEntry to
          // exceed the number of saved row indeces, to create the index entry for the current
          //stride
          while (rowIndexEntry < rowIndexValueCount.size() &&
              i == rowIndexValueCount.get(rowIndexEntry) &&
              (rowIndexEntry < savedRowIndex.size() ||
                  (!useDictionaryEncoding && rowIndexEntry == savedRowIndex.size()))) {
            OrcProto.RowIndexEntry.Builder base = null;
            RowIndexPositionRecorder recorder;
            if (rowIndexEntry < savedRowIndex.size()) {
              base = savedRowIndex.get(rowIndexEntry).toBuilder();
              recorder = new RowIndexPositionRecorder(base);
            } else {
              recorder = rowIndexPosition;
            }
            if (useDictionaryEncoding && dumpOrder != null &&
                dictionarySize != dictionary.size()) {
              inDictionary.getPosition(recorder);
            }
            rowOutput.getPosition(recorder);
            if (rowIndexEntry < savedRowIndex.size()) {
              // If we are constructing an index entry from a saved row index, add it
              rowIndex.addEntry(base.build());
            }
            rowIndexEntry++;
          }
        }

        if (i < length) {
          if (useDictionaryEncoding && dumpOrder != null) {
            ((RunLengthIntegerWriter) rowOutput).write(dumpOrder[rows.get(i)]);
            if (!useStrideDictionaries || counts[rows.get(i)] > 1) {
              inDictionary.write(1);
            } else {
              inDictionary.write(0);
            }
          } else {
            SerializationUtils.writeIntegerType(rowOutput,
                dictionary.getValue(rows.get(i)), numBytes, true, useVInts);
          }
        }
      }
    }

    @Override
    OrcProto.ColumnEncoding getEncoding() {
      if (useDictionaryEncoding) {
        return OrcProto.ColumnEncoding.newBuilder().setKind(
            OrcProto.ColumnEncoding.Kind.DICTIONARY).
            setDictionarySize(dictionarySize).build();
      } else {
        return OrcProto.ColumnEncoding.newBuilder().setKind(
            OrcProto.ColumnEncoding.Kind.DIRECT).build();
      }
    }

    /**
     * This method doesn't call the super method, because unlike most of the
     * other TreeWriters, this one can't record the position in the streams
     * until the stripe is being flushed. Therefore it saves all of the entries
     * and augments them with the final information as the stripe is written.
     * @throws IOException
     */
    @Override
    void createRowIndexEntry() throws IOException {
      getFileStatistics().merge(indexStatistics);
      OrcProto.RowIndexEntry.Builder rowIndexEntry = getRowIndexEntry();
      rowIndexEntry.setStatistics(indexStatistics.serialize());
      indexStatistics.reset();
      if (useCarriedOverDirectEncoding()) {
        getRowIndex().addEntry(rowIndexEntry);
      } else {
        savedRowIndex.add(rowIndexEntry.build());
      }
      rowIndexEntry.clear();
      recordPosition(rowIndexPosition);
      if (useCarriedOverDirectEncoding()) {
        rowOutput.getPosition(rowIndexPosition);
      } else {
        rowIndexValueCount.add(Long.valueOf(rows.size()));
      }
    }

    @Override
    public void abandonDictionaries() throws IOException {
      boolean useCarriedOverDirectEncoding = useCarriedOverDirectEncoding();
      abandonDictionaries = true;
      if (!useCarriedOverDirectEncoding) {
        rowOutput = writer.createStream(id, OrcProto.Stream.Kind.DATA);
        useDictionaryEncoding = false;
        convertDictionaryToDirect();
        if (rows.size() == 0) {
          rowOutput.getPosition(rowIndexPosition);
        }
      }
      if (dictionary != null) {
        dictionary.cleanup();
        dictionary = null;
        rows.cleanup();
        rows = null;
      }
      savedRowIndex.clear();
    }
  }

  private static class FloatTreeWriter extends TreeWriter {
    private final PositionedOutputStream stream;

    FloatTreeWriter(int columnId,
                      ObjectInspector inspector,
                      StreamFactory writer,
                      boolean nullable, Configuration conf,
                      boolean useVInts, boolean lowMemoryMode,
                      MemoryEstimate memoryEstimate) throws IOException {
      super(columnId, inspector, writer, nullable, conf, useVInts, memoryEstimate);
      this.stream = writer.createStream(id,
          OrcProto.Stream.Kind.DATA);
      recordPosition(rowIndexPosition);
    }

    @Override
    void write(Object obj) throws IOException {
      super.write(obj, RawDatasizeConst.FLOAT_SIZE);
      if (obj != null) {
        float val = ((FloatObjectInspector) inspector).get(obj);
        indexStatistics.updateDouble(val);
        SerializationUtils.writeFloat(stream, val);
      }
    }

    @Override
    void writeStripe(OrcProto.StripeFooter.Builder builder,
                     int requiredIndexEntries) throws IOException {
      super.writeStripe(builder, requiredIndexEntries);
      stream.flush();
      recordPosition(rowIndexPosition);
    }

    @Override
    void recordPosition(PositionRecorder recorder) throws IOException {
      super.recordPosition(recorder);
      stream.getPosition(recorder);
    }
  }

  private static class DoubleTreeWriter extends TreeWriter {
    private final PositionedOutputStream stream;

    DoubleTreeWriter(int columnId,
                    ObjectInspector inspector,
                    StreamFactory writer,
                    boolean nullable, Configuration conf,
                    boolean useVInts, boolean lowMemoryMode,
                    MemoryEstimate memoryEstimate) throws IOException {
      super(columnId, inspector, writer, nullable, conf, useVInts, memoryEstimate);
      this.stream = writer.createStream(id,
          OrcProto.Stream.Kind.DATA);
      recordPosition(rowIndexPosition);
    }

    @Override
    void write(Object obj) throws IOException {
      super.write(obj, RawDatasizeConst.DOUBLE_SIZE);
      if (obj != null) {
        double val = ((DoubleObjectInspector) inspector).get(obj);
        indexStatistics.updateDouble(val);
        SerializationUtils.writeDouble(stream, val);
      }
    }

    @Override
    void writeStripe(OrcProto.StripeFooter.Builder builder,
                     int requiredIndexEntries) throws IOException {
      super.writeStripe(builder, requiredIndexEntries);
      stream.flush();
      recordPosition(rowIndexPosition);
    }

    @Override
    void recordPosition(PositionRecorder recorder) throws IOException {
      super.recordPosition(recorder);
      stream.getPosition(recorder);
    }
  }

  private static class StringTreeWriter extends TreeWriter {
    private final PositionedOutputStream stringOutput;
    private final RunLengthIntegerWriter lengthOutput;
    private final PositionedOutputStream inDictionaryStream;
    private final BitFieldWriter inDictionary;
    private StringDictionaryEncoder dictionary;
    private DynamicIntArray rows;
    private final RunLengthIntegerWriter directLengthOutput;
    private final RunLengthIntegerWriter strideDictionaryLengthOutput;
    private final List<OrcProto.RowIndexEntry> savedRowIndex =
        new ArrayList<OrcProto.RowIndexEntry>();
    private final boolean buildIndex;
    private final List<Long> rowIndexValueCount = new ArrayList<Long>();
    private final StreamFactory writer;
    // If the number of keys in a dictionary is greater than this fraction of the total number of
    // non-null rows, turn off dictionary encoding
    private final float dictionaryKeySizeThreshold;
    // If the number of keys in a dictionary is greater than this fraction of the total number of
    // non-null rows, don't use the estimated entropy heuristic to turn off dictionary encoding
    private final float entropyKeySizeThreshold;
    private final int entropyMinSamples;
    private final float entropyDictSampleFraction;
    private final int entropyThreshold;

    private boolean useDictionaryEncoding = true;
    private final boolean useStrideDictionaries;
    private final boolean sortKeys;

    private final Text[] buffer;
    private int bufferIndex = 0;
    private long bufferedBytes = 0;
    private final int recomputeStripeEncodingInterval;
    private PositionedOutputStream rowOutput;
    private final PositionedOutputStream strideDictionaryOutput;
    private boolean abandonDictionaries = false;
    private int dictionarySize;

    StringTreeWriter(int columnId,
                     ObjectInspector inspector,
                     StreamFactory writerFactory,
                     boolean nullable, Configuration conf,
                     boolean useVInts, boolean lowMemoryMode,
                     MemoryEstimate memoryEstimate) throws IOException {
      super(columnId, inspector, writerFactory, nullable, conf, useVInts, memoryEstimate);
      writer = writerFactory;
      sortKeys = OrcConf.getBoolVar(conf,
          OrcConf.ConfVars.HIVE_ORC_DICTIONARY_SORT_KEYS);
      useStrideDictionaries = OrcConf.getBoolVar(conf,
          OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY);
      recomputeStripeEncodingInterval = OrcConf.getIntVar(conf,
          OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL);

      if (!lowMemoryMode) {
        dictionary = new StringDictionaryEncoder(sortKeys, useStrideDictionaries, memoryEstimate);
        rows = new DynamicIntArray(memoryEstimate);
      } else {
        abandonDictionaries = true;
        rowOutput = writer.createStream(id,
            OrcProto.Stream.Kind.DATA);
        useDictionaryEncoding = false;
      }
      stringOutput = writer.createStream(id,
          OrcProto.Stream.Kind.DICTIONARY_DATA);
      lengthOutput = new RunLengthIntegerWriter(writer.createStream(id,
          OrcProto.Stream.Kind.LENGTH), false, INT_BYTE_SIZE, useVInts);
      inDictionaryStream = writer.createStream(id, OrcProto.Stream.Kind.IN_DICTIONARY);
      inDictionary = new BitFieldWriter(inDictionaryStream, 1);
      strideDictionaryLengthOutput = new RunLengthIntegerWriter(writer.createStream(id,
          OrcProto.Stream.Kind.STRIDE_DICTIONARY_LENGTH), false, INT_BYTE_SIZE, useVInts);
      strideDictionaryOutput = writer.createStream(id,
          OrcProto.Stream.Kind.STRIDE_DICTIONARY);
      directLengthOutput = new RunLengthIntegerWriter(writer.createStream(id,
          OrcProto.Stream.Kind.LENGTH), false, INT_BYTE_SIZE, useVInts);
      dictionaryKeySizeThreshold = OrcConf.getFloatVar(conf,
          OrcConf.ConfVars.HIVE_ORC_DICTIONARY_STRING_KEY_SIZE_THRESHOLD);
      entropyKeySizeThreshold = OrcConf.getFloatVar(conf,
          OrcConf.ConfVars.HIVE_ORC_ENTROPY_KEY_STRING_SIZE_THRESHOLD);
      entropyMinSamples = OrcConf.getIntVar(conf,
          OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_MIN_SAMPLES);
      entropyDictSampleFraction = OrcConf.getFloatVar(conf,
          OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_DICT_SAMPLE_FRACTION);
      entropyThreshold = OrcConf.getIntVar(conf,
          OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD);

      int bufferLength = OrcConf.getIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ROW_BUFFER_SIZE);
      buffer = new Text[bufferLength];

      recordPosition(rowIndexPosition);
      rowIndexValueCount.add(0L);
      buildIndex = writer.buildIndex();
      if (buildIndex && lowMemoryMode) {
        rowOutput.getPosition(rowIndexPosition);
        directLengthOutput.getPosition(rowIndexPosition);
      }
    }

    boolean determineEncodingStripe() {
      return (getNumStripes() % recomputeStripeEncodingInterval) == 0 && !abandonDictionaries;
    }

    @Override
    void write(Object obj) throws IOException {
      if (obj != null) {
        Text val = ((StringObjectInspector) inspector).getPrimitiveWritableObject(obj);
        buffer[bufferIndex++] = new Text(val);
        setRawDataSize(val.getLength());
        // Increment the memory estimate by the buffered bytes
        memoryEstimate.incrementTotalMemory(val.getLength());
        bufferedBytes += val.getLength();
      } else {
          buffer[bufferIndex++] = null;
          setRawDataSize(RawDatasizeConst.NULL_SIZE);
      }
      if (bufferIndex == buffer.length) {
        flush();
      }
    }

    @Override
    void flush() throws IOException {
      for (int i = 0; i < bufferIndex; i++) {
        Text val = buffer[i];
        // Make sure we don't end up storing it twice
        buffer[i] = null;
        if (val != null) {
          indexStatistics.updateString(val.toString());
          if (useCarriedOverDirectEncoding()) {
            rowOutput.write(val.getBytes());
            directLengthOutput.write(val.getLength());
          } else {
            rows.add(dictionary.add(val, savedRowIndex.size()));
          }
        }
        super.flushRow(val);
      }
      bufferIndex = 0;
      memoryEstimate.decrementTotalMemory(bufferedBytes);
      bufferedBytes = 0;
    }

    private boolean isEntropyThresholdExceeded(Set<Character> chars, Text text, int index) {
      dictionary.getText(text, index);
      for (char character : text.toString().toCharArray()) {
        chars.add(character);
      }
      return chars.size() > entropyThreshold;
    }

    private int[] getSampleIndecesForEntropy() {
      int numSamples = Math.max(entropyMinSamples,
          (int)(entropyDictSampleFraction * dictionary.size()));
      int[] indeces = new int[dictionary.size()];
      int[] samples = new int[numSamples];
      Random rand = new Random();

      // The goal of this loop is to select numSamples number of distinct indeces of
      // dictionary
      //
      // The loop works as follows, start with an array of zeros
      // On each iteration pick a random number in the range of 0 to size of the dictionary
      // minus one minus the number of previous iterations, thus the effective size of the
      // array is decreased by one with each iteration (not actually but logically)
      // Look at the value of the array at that random index, if it is 0, the sample index is
      // the random index because we've never looked at this position before, if it's
      // nonzero, that value is the sample index
      // Then take the value at the end of the logical size of the array (size of the
      // dictionary minus one minus the number of iterations) if it's 0 put that index into
      // the array at the random index, otherwise put the nonzero value
      // Thus, by reducing the logical size of the array and moving the value at the end of
      // the array we are removing indexes we have previously visited and making sure we do
      // not lose any indexes
      for (int i = 0; i < numSamples; i++) {
        int index = rand.nextInt(dictionary.size() - i);
        if (indeces[index] == 0) {
          samples[i] = index;
          } else {
            samples[i] = indeces[index];
        }
        indeces[index] = indeces[dictionary.size() - i - 1] == 0 ?
            dictionary.size() - i - 1 : indeces[dictionary.size() - i - 1];
      }

      return samples;
    }

    private boolean useDictionaryEncodingEntropyHeuristic() {
      Set<Character> chars = new HashSet<Character>();
      Text text = new Text();
      if (dictionary.size() > entropyMinSamples) {

        int[] samples = getSampleIndecesForEntropy();

        for (int sampleIndex : samples) {
          if (isEntropyThresholdExceeded(chars, text, sampleIndex)) {
            return true;
          }
        }
      } else {
        for (int i = 0; i < dictionary.size(); i++) {
          if (isEntropyThresholdExceeded(chars, text, i)) {
            return true;
          }
        }
      }

      return false;
    }

    /**
     * Returns true iff the encoding is not being determined using this stripe, and
     * the previously determined encoding was direct.
     */
    private boolean useCarriedOverDirectEncoding() {
      return !determineEncodingStripe() && !useDictionaryEncoding;
    }

    @Override
    void writeStripe(OrcProto.StripeFooter.Builder builder,
                     int requiredIndexEntries) throws IOException {

      if (determineEncodingStripe()) {

          // Set the flag indicating whether or not to use dictionary encoding based on whether
          // or not the fraction of distinct keys over number of non-null rows is less than the
          // configured threshold, and whether or not the number of distinct characters in a sample
          // of entries in the dictionary (the estimated entropy) exceeds the configured threshold
        if (rows.size() > 0) {
          useDictionaryEncoding = true;
          // The fraction of non-null values in this column that are repeats of values in the
          // dictionary
          float repeatedValuesFraction =
            (float)(rows.size() - dictionary.size()) / (float)rows.size();

          // If the number of repeated values is small enough, consider using the entropy heuristic
          // If the number of repeated values is high, even in the presence of low entropy,
          // dictionary encoding can provide benefits beyond just zlib
          if (repeatedValuesFraction <= entropyKeySizeThreshold) {
            useDictionaryEncoding = useDictionaryEncodingEntropyHeuristic();
          }

          // dictionaryKeySizeThreshold is the fraction of keys that are distinct beyond
          // which dictionary encoding is turned off
          // so 1 - dictionaryKeySizeThreshold is the number of repeated values below which
          // dictionary encoding should be turned off
          useDictionaryEncoding = useDictionaryEncoding && (repeatedValuesFraction > 1.0 - dictionaryKeySizeThreshold);
        }
      }

      if (useDictionaryEncoding) {
        rowOutput = new RunLengthIntegerWriter(writer.createStream(id,
            OrcProto.Stream.Kind.DATA), false, INT_BYTE_SIZE, useVInts);
      } else if (determineEncodingStripe()) {
        rowOutput = writer.createStream(id,
            OrcProto.Stream.Kind.DATA);
      }

      final int[] dumpOrder;
      final int[] counts;
      final int[] strideDictionarySizes;
      final boolean[] strideDictionaryIndexPopulated;

      if (useDictionaryEncoding) {
        dumpOrder = new int[dictionary.size()];
        counts = new int[dictionary.size()];
        strideDictionarySizes = new int[savedRowIndex.size()];
        strideDictionaryIndexPopulated = new boolean[savedRowIndex.size()];
        OrcProto.RowIndexEntry.Builder base = savedRowIndex.get(0).toBuilder();
        PositionRecorder recorder = new RowIndexPositionRecorder(base);
        strideDictionaryOutput.getPosition(recorder);
        strideDictionaryLengthOutput.getPosition(recorder);
        savedRowIndex.set(0, base.build());
        strideDictionaryIndexPopulated[0] = true;

        // Traverse the red-black tree writing out the bytes and lengths; and
        // creating the map from the original order to the final sorted order.
        dictionary.visit(new StringDictionaryEncoder.Visitor<Text>() {
          private int currentId = 0;
          private int directId = 0;
          private int previousIndex = 0;
          @Override
          public void visit(StringDictionaryEncoder.VisitorContext<Text> context
                           ) throws IOException {
            counts[context.getOriginalPosition()] = context.getCount();
            if (!useStrideDictionaries || context.getCount() > 1) {
              dictionarySize++;
              context.writeBytes(stringOutput);
              lengthOutput.write(context.getLength());
              dumpOrder[context.getOriginalPosition()] = currentId++;
            } else {
              int nextIndex = context.getIndexStride();
              if (nextIndex != previousIndex) {
                for (int i = previousIndex; i < nextIndex; i++) {
                  OrcProto.RowIndexEntry.Builder base = savedRowIndex.get(i + 1).toBuilder();
                  PositionRecorder recorder = new RowIndexPositionRecorder(base);
                  strideDictionaryOutput.getPosition(recorder);
                  strideDictionaryLengthOutput.getPosition(recorder);
                  savedRowIndex.set(i + 1, base.build());
                  strideDictionaryIndexPopulated[i + 1] = true;
                }

                previousIndex = context.getIndexStride();
                directId = 0;
              }
              context.writeBytes(strideDictionaryOutput);
              strideDictionarySizes[previousIndex]++;
              strideDictionaryLengthOutput.write(context.getLength());
              dumpOrder[context.getOriginalPosition()] = directId++;
            }
          }
        });
      } else {
        dumpOrder = null;
        counts = null;
        strideDictionarySizes = null;
        strideDictionaryIndexPopulated = null;
      }

      if (!useCarriedOverDirectEncoding()) {
        writeData(useDictionaryEncoding, dumpOrder, counts, strideDictionarySizes,
            strideDictionaryIndexPopulated);
      }
      // we need to build the rowindex before calling super, since it
      // writes it out.
      super.writeStripe(builder, requiredIndexEntries);
      rowOutput.flush();
      if (useDictionaryEncoding) {
        stringOutput.unsuppress();
        lengthOutput.unsuppress();
        inDictionary.flush();
        strideDictionaryOutput.flush();
        strideDictionaryLengthOutput.flush();
        if (dictionarySize == dictionary.size()) {
          inDictionaryStream.suppress();
          strideDictionaryOutput.suppress();
          strideDictionaryLengthOutput.suppress();
        } else {
          inDictionaryStream.unsuppress();
          strideDictionaryOutput.unsuppress();
          strideDictionaryLengthOutput.unsuppress();
        }
        directLengthOutput.suppress();
        stringOutput.flush();
        lengthOutput.flush();
      } else {
        directLengthOutput.unsuppress();
        stringOutput.suppress();
        lengthOutput.suppress();
        inDictionaryStream.suppress();
        strideDictionaryOutput.suppress();
        strideDictionaryLengthOutput.suppress();
        directLengthOutput.flush();
      }

      // reset all of the fields to be ready for the next stripe.
      savedRowIndex.clear();
      rowIndexValueCount.clear();
      recordPosition(rowIndexPosition);
      dictionarySize = 0;
      if (useCarriedOverDirectEncoding())  {
        rowOutput = writer.createStream(id, OrcProto.Stream.Kind.DATA);
        rowOutput.getPosition(rowIndexPosition);
        directLengthOutput.getPosition(rowIndexPosition);
        if (dictionary != null) {
          dictionary.cleanup();
          dictionary = null;
          rows.cleanup();
          rows = null;
        }
      } else {
        if (dictionary == null) {
          dictionary = new StringDictionaryEncoder(sortKeys, useStrideDictionaries, memoryEstimate);
        } else {
          dictionary.clear();
        }
        if (rows == null) {
          rows = new DynamicIntArray(memoryEstimate);
        } else {
          rows.clear();
        }
      }
      rowIndexValueCount.add(0L);

    }

    private void convertDictionaryToDirect() throws IOException {
      // If this is still in the first index stride, savedRowIndex is empty, so we need to
      // explicitly record the positions for the first index stride
      writeData(false, null, null, null, null);
    }

    private void writeData(boolean useDictionaryEncoding, int[] dumpOrder, int[] counts,
        int[] strideDictionarySizes, boolean[] strideDictionaryIndexPopulated) throws IOException {
      int rowIndexEntry = 0;
      OrcProto.RowIndex.Builder rowIndex = getRowIndex();
      int length = rows.size();
      Text text = new Text();
      for(int i = 0; i <= length; ++i) {
        // now that we are writing out the row values, we can finalize the
        // row index
        if (buildIndex) {
       // If we are not using dictionary encoding and savedRowIndex is not empty, this means
          // the conversion from dictionary encoding to direct encoding, so allow rowIndexEntry to
          // exceed the number of saved row indeces, to create the index entry for the current
          //stride
          while (rowIndexEntry < rowIndexValueCount.size() &&
              i == rowIndexValueCount.get(rowIndexEntry) &&
              (rowIndexEntry < savedRowIndex.size() ||
                  (!useDictionaryEncoding && rowIndexEntry == savedRowIndex.size()))) {
            OrcProto.RowIndexEntry.Builder base = null;
            RowIndexPositionRecorder recorder;
            if (rowIndexEntry < savedRowIndex.size()) {
              base = savedRowIndex.get(rowIndexEntry).toBuilder();
              recorder = new RowIndexPositionRecorder(base);
            } else {
              recorder = rowIndexPosition;
            }

            if (useStrideDictionaries && useDictionaryEncoding &&
                !strideDictionaryIndexPopulated[rowIndexEntry] &&
                dictionary.size() != dictionarySize) {
              strideDictionaryOutput.getPosition(recorder);
              strideDictionaryLengthOutput.getPosition(recorder);
            }
            if (useStrideDictionaries && strideDictionarySizes != null &&
                dictionary.size() != dictionarySize) {
              base.addPositions(strideDictionarySizes[rowIndexEntry]);
            }
            recordOutputPosition(rowOutput, recorder);
            if (rowIndexEntry < savedRowIndex.size()) {
              // If we are constructing an index entry from a saved row index, add it
              rowIndex.addEntry(base.build());
            }
            rowIndexEntry++;
          }
        }
        if (i != length) {
          if (useDictionaryEncoding) {
            rowOutput.write(dumpOrder[rows.get(i)]);
            if (!useStrideDictionaries || counts[rows.get(i)] > 1) {
              inDictionary.write(1);
            } else {
              inDictionary.write(0);
            }
          } else {
            dictionary.getText(text, rows.get(i));
            rowOutput.write(text.getBytes(), 0, text.getLength());
            directLengthOutput.write(text.getLength());
          }
        }
      }
    }

    // Calls getPosition on the row output stream if dictionary encoding is used, and the direct
    // output stream if direct encoding is used
    private void recordOutputPosition(PositionedOutputStream rowOutput,
        RowIndexPositionRecorder recorder) throws IOException {
      rowOutput.getPosition(recorder);
      if (!useDictionaryEncoding) {
        directLengthOutput.getPosition(recorder);
      } else if (dictionary.size() != dictionarySize) {
        inDictionary.getPosition(recorder);
      }
    }

    @Override
    OrcProto.ColumnEncoding getEncoding() {
      // Returns the encoding used for the last call to writeStripe
      if (useDictionaryEncoding) {
        return OrcProto.ColumnEncoding.newBuilder().setKind(
            OrcProto.ColumnEncoding.Kind.DICTIONARY).
            setDictionarySize(dictionarySize).build();
      } else {
        return OrcProto.ColumnEncoding.newBuilder().setKind(
            OrcProto.ColumnEncoding.Kind.DIRECT).build();
      }
    }

    /**
     * This method doesn't call the super method, because unlike most of the
     * other TreeWriters, this one can't record the position in the streams
     * until the stripe is being flushed. Therefore it saves all of the entries
     * and augments them with the final information as the stripe is written.
     * @throws IOException
     */
    @Override
    void createRowIndexEntry() throws IOException {
      getFileStatistics().merge(indexStatistics);
      OrcProto.RowIndexEntry.Builder rowIndexEntry = getRowIndexEntry();
      rowIndexEntry.setStatistics(indexStatistics.serialize());
      indexStatistics.reset();
      if (useCarriedOverDirectEncoding()) {
        getRowIndex().addEntry(rowIndexEntry);
      } else {
        savedRowIndex.add(rowIndexEntry.build());
      }
      rowIndexEntry.clear();
      recordPosition(rowIndexPosition);
      if (useCarriedOverDirectEncoding()) {
        rowOutput.getPosition(rowIndexPosition);
        directLengthOutput.getPosition(rowIndexPosition);
      } else {
        rowIndexValueCount.add(Long.valueOf(rows.size()));
      }
    }

    @Override
    public void abandonDictionaries() throws IOException {
      boolean useCarriedOverDirectEncoding = useCarriedOverDirectEncoding();
      abandonDictionaries = true;
      if (!useCarriedOverDirectEncoding) {
        rowOutput = writer.createStream(id,
            OrcProto.Stream.Kind.DATA);
        useDictionaryEncoding = false;
        convertDictionaryToDirect();
        if (rows.size() == 0) {
          rowOutput.getPosition(rowIndexPosition);
          directLengthOutput.getPosition(rowIndexPosition);
        }
      }
      if (dictionary != null) {
        dictionary.cleanup();
        dictionary = null;
        rows.cleanup();
        rows = null;
      }
      savedRowIndex.clear();
    }
  }

  private static class BinaryTreeWriter extends TreeWriter {
    private final PositionedOutputStream stream;
    private final RunLengthIntegerWriter length;

    BinaryTreeWriter(int columnId,
                     ObjectInspector inspector,
                     StreamFactory writer,
                     boolean nullable, Configuration conf,
                     boolean useVInts, boolean lowMemoryMode,
                     MemoryEstimate memoryEstimate) throws IOException {
      super(columnId, inspector, writer, nullable, conf, useVInts, memoryEstimate);
      this.stream = writer.createStream(id,
          OrcProto.Stream.Kind.DATA);
      this.length = new RunLengthIntegerWriter(writer.createStream(id,
          OrcProto.Stream.Kind.LENGTH), false, INT_BYTE_SIZE, useVInts);
      recordPosition(rowIndexPosition);
    }

    @Override
    void write(Object obj) throws IOException {
      long rawDataSize = 0;
      if (obj != null) {
        BytesWritable val =
            ((BinaryObjectInspector) inspector).getPrimitiveWritableObject(obj);
        stream.write(val.getBytes(), 0, val.getLength());
        length.write(val.getLength());

        // Raw data size is the length of the BytesWritable, i.e. the number of bytes
        rawDataSize = val.getLength();
      }
      super.write(obj, rawDataSize);
    }

    @Override
    void writeStripe(OrcProto.StripeFooter.Builder builder,
                     int requiredIndexEntries) throws IOException {
      super.writeStripe(builder, requiredIndexEntries);
      stream.flush();
      length.flush();
      recordPosition(rowIndexPosition);
    }

    @Override
    void recordPosition(PositionRecorder recorder) throws IOException {
      super.recordPosition(recorder);
      stream.getPosition(recorder);
      length.getPosition(recorder);
    }
  }

  public static final int MILLIS_PER_SECOND = 1000;
  public static final long BASE_TIMESTAMP =
      Timestamp.valueOf("2015-01-01 00:00:00").getTime() / MILLIS_PER_SECOND;

  private static class TimestampTreeWriter extends TreeWriter {
    private final RunLengthIntegerWriter seconds;
    private final RunLengthIntegerWriter nanos;

    TimestampTreeWriter(int columnId,
                     ObjectInspector inspector,
                     StreamFactory writer,
                     boolean nullable, Configuration conf,
                     boolean useVInts, boolean lowMemoryMode,
                     MemoryEstimate memoryEstimate) throws IOException {
      super(columnId, inspector, writer, nullable, conf, useVInts, memoryEstimate);
      this.seconds = new RunLengthIntegerWriter(writer.createStream(id,
          OrcProto.Stream.Kind.DATA), true, LONG_BYTE_SIZE, useVInts);
      this.nanos = new RunLengthIntegerWriter(writer.createStream(id,
          OrcProto.Stream.Kind.NANO_DATA), false, LONG_BYTE_SIZE, useVInts);
      recordPosition(rowIndexPosition);
    }

    @Override
    void write(Object obj) throws IOException {
      // Raw data size is:
      //   the number of bytes needed to store the milliseconds since the epoch
      //   (8 since it's a long)
      //   +
      //   the number of bytes needed to store the nanos field (4 since it's an int)
      super.write(obj, RawDatasizeConst.TIMESTAMP_SIZE);
      if (obj != null) {
        Timestamp val =
            ((TimestampObjectInspector) inspector).
                getPrimitiveJavaObject(obj);
        seconds.write((val.getTime() / MILLIS_PER_SECOND) - BASE_TIMESTAMP);
        nanos.write(formatNanos(val.getNanos()));
      }
    }

    @Override
    void writeStripe(OrcProto.StripeFooter.Builder builder,
                     int requiredIndexEntries) throws IOException {
      super.writeStripe(builder, requiredIndexEntries);
      seconds.flush();
      nanos.flush();
      recordPosition(rowIndexPosition);
    }

    private static long formatNanos(int nanos) {
      if (nanos == 0) {
        return 0;
      } else if (nanos % 100 != 0) {
        return ((long) nanos) << 3;
      } else {
        nanos /= 100;
        int trailingZeros = 1;
        while (nanos % 10 == 0 && trailingZeros < 7) {
          nanos /= 10;
          trailingZeros += 1;
        }
        return ((long) nanos) << 3 | trailingZeros;
      }
    }

    @Override
    void recordPosition(PositionRecorder recorder) throws IOException {
      super.recordPosition(recorder);
      seconds.getPosition(recorder);
      nanos.getPosition(recorder);
    }
  }

  private static class StructTreeWriter extends TreeWriter {
    private final List<? extends StructField> fields;
    StructTreeWriter(int columnId,
                     ObjectInspector inspector,
                     StreamFactory writer,
                     boolean nullable, Configuration conf,
                     boolean useVInts, boolean lowMemoryMode,
                     MemoryEstimate memoryEstimate) throws IOException {
      super(columnId, inspector, writer, nullable, conf, useVInts, memoryEstimate);
      StructObjectInspector structObjectInspector =
        (StructObjectInspector) inspector;
      fields = structObjectInspector.getAllStructFieldRefs();
      childrenWriters = new TreeWriter[fields.size()];
      for(int i=0; i < childrenWriters.length; ++i) {
        childrenWriters[i] = createTreeWriter(
          fields.get(i).getFieldObjectInspector(), writer, true, conf, useVInts,
          lowMemoryMode, memoryEstimate);
      }
      recordPosition(rowIndexPosition);
    }

    @Override
    void write(Object obj) throws IOException {
      long rawDataSize = 0;
      if (obj != null) {
        StructObjectInspector insp = (StructObjectInspector) inspector;
        List<Object> fieldDataList = insp.getStructFieldsDataAsList(obj);

        for(int i = 0; i < fields.size(); ++i) {
          TreeWriter writer = childrenWriters[i];
          writer.write(fieldDataList.get(i));
          rawDataSize += writer.getRowRawDataSize();
        }
      }
      super.write(obj, rawDataSize);
    }

    @Override
    void writeStripe(OrcProto.StripeFooter.Builder builder,
                     int requiredIndexEntries) throws IOException {
      super.writeStripe(builder, requiredIndexEntries);
      for(TreeWriter child: childrenWriters) {
        child.writeStripe(builder, requiredIndexEntries);
      }
      recordPosition(rowIndexPosition);
    }
  }

  private static class ListTreeWriter extends TreeWriter {
    private final RunLengthIntegerWriter lengths;

    ListTreeWriter(int columnId,
                   ObjectInspector inspector,
                   StreamFactory writer,
                   boolean nullable, Configuration conf,
                   boolean useVInts, boolean lowMemoryMode,
                   MemoryEstimate memoryEstimate) throws IOException {
      super(columnId, inspector, writer, nullable, conf, useVInts, memoryEstimate);
      ListObjectInspector listObjectInspector = (ListObjectInspector) inspector;
      childrenWriters = new TreeWriter[1];
      childrenWriters[0] =
        createTreeWriter(listObjectInspector.getListElementObjectInspector(),
          writer, true, conf, useVInts, lowMemoryMode, memoryEstimate);
      lengths =
        new RunLengthIntegerWriter(writer.createStream(columnId,
            OrcProto.Stream.Kind.LENGTH), false, INT_BYTE_SIZE, useVInts);
      recordPosition(rowIndexPosition);
    }

    @Override
    void write(Object obj) throws IOException {
      long rawDataSize = 0;
      if (obj != null) {
        ListObjectInspector insp = (ListObjectInspector) inspector;
        int len = insp.getListLength(obj);
        lengths.write(len);
        for(int i=0; i < len; ++i) {
          childrenWriters[0].write(insp.getListElement(obj, i));
          rawDataSize += childrenWriters[0].getRowRawDataSize();
        }
      }
      super.write(obj, rawDataSize);
    }

    @Override
    void writeStripe(OrcProto.StripeFooter.Builder builder,
                     int requiredIndexEntries) throws IOException {
      super.writeStripe(builder, requiredIndexEntries);
      lengths.flush();
      for(TreeWriter child: childrenWriters) {
        child.writeStripe(builder, requiredIndexEntries);
      }
      recordPosition(rowIndexPosition);
    }

    @Override
    void recordPosition(PositionRecorder recorder) throws IOException {
      super.recordPosition(recorder);
      lengths.getPosition(recorder);
    }
  }

  private static class MapTreeWriter extends TreeWriter {
    private final RunLengthIntegerWriter lengths;

    MapTreeWriter(int columnId,
                  ObjectInspector inspector,
                  StreamFactory writer,
                  boolean nullable, Configuration conf,
                  boolean useVInts, boolean lowMemoryMode,
                  MemoryEstimate memoryEstimate) throws IOException {
      super(columnId, inspector, writer, nullable, conf, useVInts, memoryEstimate);
      MapObjectInspector insp = (MapObjectInspector) inspector;
      childrenWriters = new TreeWriter[2];
      childrenWriters[0] =
        createTreeWriter(insp.getMapKeyObjectInspector(), writer, true, conf, useVInts,
            lowMemoryMode, memoryEstimate);
      childrenWriters[1] =
        createTreeWriter(insp.getMapValueObjectInspector(), writer, true, conf, useVInts,
            lowMemoryMode, memoryEstimate);
      lengths =
        new RunLengthIntegerWriter(writer.createStream(columnId,
            OrcProto.Stream.Kind.LENGTH), false, INT_BYTE_SIZE, useVInts);
      recordPosition(rowIndexPosition);
    }

    @Override
    void write(Object obj) throws IOException {
      long rawDataSize = 0;
      if (obj != null) {
        MapObjectInspector insp = (MapObjectInspector) inspector;
        // this sucks, but it will have to do until we can get a better
        // accessor in the MapObjectInspector.
        Map<?, ?> valueMap = insp.getMap(obj);
        // Don't use getMapSize(), it's inconsistent for some object inspectors
        int len = valueMap.size();
        lengths.write(len);
        for(Map.Entry<?, ?> entry: valueMap.entrySet()) {
          childrenWriters[0].write(entry.getKey());
          childrenWriters[1].write(entry.getValue());
          rawDataSize += childrenWriters[0].getRowRawDataSize();
          rawDataSize += childrenWriters[1].getRowRawDataSize();
        }
      }
      super.write(obj, rawDataSize);
    }

    @Override
    void writeStripe(OrcProto.StripeFooter.Builder builder,
                     int requiredIndexEntries) throws IOException {
      super.writeStripe(builder, requiredIndexEntries);
      lengths.flush();
      for(TreeWriter child: childrenWriters) {
        child.writeStripe(builder, requiredIndexEntries);
      }
      recordPosition(rowIndexPosition);
    }

    @Override
    void recordPosition(PositionRecorder recorder) throws IOException {
      super.recordPosition(recorder);
      lengths.getPosition(recorder);
    }
  }

  private static class UnionTreeWriter extends TreeWriter {
    private final RunLengthByteWriter tags;

    UnionTreeWriter(int columnId,
                  ObjectInspector inspector,
                  StreamFactory writer,
                  boolean nullable, Configuration conf,
                  boolean useVInts, boolean lowMemoryMode,
                  MemoryEstimate memoryEstimate) throws IOException {
      super(columnId, inspector, writer, nullable, conf, useVInts, memoryEstimate);
      UnionObjectInspector insp = (UnionObjectInspector) inspector;
      List<ObjectInspector> choices = insp.getObjectInspectors();
      childrenWriters = new TreeWriter[choices.size()];
      for(int i=0; i < childrenWriters.length; ++i) {
        childrenWriters[i] = createTreeWriter(choices.get(i), writer, true, conf, useVInts,
            lowMemoryMode, memoryEstimate);
      }
      tags =
        new RunLengthByteWriter(writer.createStream(columnId,
            OrcProto.Stream.Kind.DATA));
      recordPosition(rowIndexPosition);
    }

    @Override
    void write(Object obj) throws IOException {
      long rawDataSize = 0;
      if (obj != null) {
        UnionObjectInspector insp = (UnionObjectInspector) inspector;
        byte tag = insp.getTag(obj);
        tags.write(tag);
        childrenWriters[tag].write(insp.getField(obj));
        // raw data size is size of tag (1) + size of value
        rawDataSize = childrenWriters[tag].getRowRawDataSize() + RawDatasizeConst.UNION_TAG_SIZE;
      }
      super.write(obj, rawDataSize);
    }

    @Override
    void writeStripe(OrcProto.StripeFooter.Builder builder,
                     int requiredIndexEntries) throws IOException {
      super.writeStripe(builder, requiredIndexEntries);
      tags.flush();
      for(TreeWriter child: childrenWriters) {
        child.writeStripe(builder, requiredIndexEntries);
      }
      recordPosition(rowIndexPosition);
    }

    @Override
    void recordPosition(PositionRecorder recorder) throws IOException {
      super.recordPosition(recorder);
      tags.getPosition(recorder);
    }
  }

  private static TreeWriter createTreeWriter(ObjectInspector inspector,
      StreamFactory streamFactory, boolean nullable, Configuration conf, boolean useVInts,
      boolean lowMemoryMode, MemoryEstimate memoryEstimate) throws IOException {
    switch (inspector.getCategory()) {
      case PRIMITIVE:
        switch (((PrimitiveObjectInspector) inspector).getPrimitiveCategory()) {
          case BOOLEAN:
            return new BooleanTreeWriter(streamFactory.getNextColumnId(),
                inspector, streamFactory, nullable, conf, useVInts, lowMemoryMode, memoryEstimate);
          case BYTE:
            return new ByteTreeWriter(streamFactory.getNextColumnId(),
                inspector, streamFactory, nullable, conf, useVInts, lowMemoryMode, memoryEstimate);
          case SHORT:
            return new IntegerTreeWriter(streamFactory.getNextColumnId(),
                inspector, streamFactory, nullable, conf, useVInts , SHORT_BYTE_SIZE,
                lowMemoryMode, memoryEstimate);
          case INT:
            return new IntegerTreeWriter(streamFactory.getNextColumnId(),
                inspector, streamFactory, nullable, conf, useVInts , INT_BYTE_SIZE,
                lowMemoryMode, memoryEstimate);
          case LONG:
            return new IntegerTreeWriter(streamFactory.getNextColumnId(),
                inspector, streamFactory, nullable, conf, useVInts , LONG_BYTE_SIZE,
                lowMemoryMode, memoryEstimate);
          case FLOAT:
            return new FloatTreeWriter(streamFactory.getNextColumnId(),
                inspector, streamFactory, nullable, conf, useVInts, lowMemoryMode, memoryEstimate);
          case DOUBLE:
            return new DoubleTreeWriter(streamFactory.getNextColumnId(),
                inspector, streamFactory, nullable, conf, useVInts, lowMemoryMode, memoryEstimate);
          case STRING:
            return new StringTreeWriter(streamFactory.getNextColumnId(),
                inspector, streamFactory, nullable, conf, useVInts, lowMemoryMode, memoryEstimate);
          case BINARY:
            return new BinaryTreeWriter(streamFactory.getNextColumnId(),
                inspector, streamFactory, nullable, conf, useVInts, lowMemoryMode, memoryEstimate);
          case TIMESTAMP:
            return new TimestampTreeWriter(streamFactory.getNextColumnId(),
                inspector, streamFactory, nullable, conf, useVInts, lowMemoryMode, memoryEstimate);
          default:
            throw new IllegalArgumentException("Bad primitive category " +
              ((PrimitiveObjectInspector) inspector).getPrimitiveCategory());
        }
      case STRUCT:
        return new StructTreeWriter(streamFactory.getNextColumnId(), inspector,
            streamFactory, nullable, conf, useVInts, lowMemoryMode, memoryEstimate);
      case MAP:
        return new MapTreeWriter(streamFactory.getNextColumnId(), inspector,
            streamFactory, nullable, conf, useVInts, lowMemoryMode, memoryEstimate);
      case LIST:
        return new ListTreeWriter(streamFactory.getNextColumnId(), inspector,
            streamFactory, nullable, conf, useVInts, lowMemoryMode, memoryEstimate);
      case UNION:
        return new UnionTreeWriter(streamFactory.getNextColumnId(), inspector,
            streamFactory, nullable, conf, useVInts, lowMemoryMode, memoryEstimate);
      default:
        throw new IllegalArgumentException("Bad category: " +
          inspector.getCategory());
    }
  }

  private static void writeTypes(OrcProto.Footer.Builder builder,
                                 TreeWriter treeWriter) {
    OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
    switch (treeWriter.inspector.getCategory()) {
      case PRIMITIVE:
        switch (((PrimitiveObjectInspector) treeWriter.inspector).
                 getPrimitiveCategory()) {
          case BOOLEAN:
            type.setKind(OrcProto.Type.Kind.BOOLEAN);
            break;
          case BYTE:
            type.setKind(OrcProto.Type.Kind.BYTE);
            break;
          case SHORT:
            type.setKind(OrcProto.Type.Kind.SHORT);
            break;
          case INT:
            type.setKind(OrcProto.Type.Kind.INT);
            break;
          case LONG:
            type.setKind(OrcProto.Type.Kind.LONG);
            break;
          case FLOAT:
            type.setKind(OrcProto.Type.Kind.FLOAT);
            break;
          case DOUBLE:
            type.setKind(OrcProto.Type.Kind.DOUBLE);
            break;
          case STRING:
            type.setKind(OrcProto.Type.Kind.STRING);
            break;
          case BINARY:
            type.setKind(OrcProto.Type.Kind.BINARY);
            break;
          case TIMESTAMP:
            type.setKind(OrcProto.Type.Kind.TIMESTAMP);
            break;
          default:
            throw new IllegalArgumentException("Unknown primitive category: " +
              ((PrimitiveObjectInspector) treeWriter.inspector).
                getPrimitiveCategory());
        }
        break;
      case LIST:
        type.setKind(OrcProto.Type.Kind.LIST);
        type.addSubtypes(treeWriter.childrenWriters[0].id);
        break;
      case MAP:
        type.setKind(OrcProto.Type.Kind.MAP);
        type.addSubtypes(treeWriter.childrenWriters[0].id);
        type.addSubtypes(treeWriter.childrenWriters[1].id);
        break;
      case STRUCT:
        type.setKind(OrcProto.Type.Kind.STRUCT);
        for(TreeWriter child: treeWriter.childrenWriters) {
          type.addSubtypes(child.id);
        }
        for(StructField field: ((StructTreeWriter) treeWriter).fields) {
          type.addFieldNames(field.getFieldName());
        }
        break;
      case UNION:
        type.setKind(OrcProto.Type.Kind.UNION);
        for(TreeWriter child: treeWriter.childrenWriters) {
          type.addSubtypes(child.id);
        }
        break;
      default:
        throw new IllegalArgumentException("Unknown category: " +
          treeWriter.inspector.getCategory());
    }
    builder.addTypes(type);
    for(TreeWriter child: treeWriter.childrenWriters) {
      writeTypes(builder, child);
    }
  }

  private void ensureWriter() throws IOException {
    if (rawWriter == null) {
      rawWriter = fs.create(path, false, HDFS_BUFFER_SIZE,
        fs.getDefaultReplication(),
          // Use the largest value less than or equal to Integer.MAX_VALUE which is divisible
          // by dfsBytesPerChecksum as an upper bound (otherwise HDFS will throw an exception)
          Math.min(stripeSize * 2L, (Integer.MAX_VALUE / dfsBytesPerChecksum) * dfsBytesPerChecksum));
      rawWriter.writeBytes(OrcFile.MAGIC);
      headerLength = rawWriter.getPos();
      writer = new OutStream("metadata", bufferSize, codec,
        new DirectStream(rawWriter), memoryEstimate);
      protobufWriter = CodedOutputStream.newInstance(writer);
    }
  }

  private void createRowIndexEntry() throws IOException {
    treeWriter.flush();
    treeWriter.createRowIndexEntry();
    rowsInIndex = 0;
  }

  public void addStripe(StripeInformation si, byte[] data) throws IOException {
    ensureWriter();
    OrcProto.StripeInformation dirEntry =
      OrcProto.StripeInformation.newBuilder()
          .setOffset(rawWriter.getPos())
          .setIndexLength(si.getIndexLength())
          .setDataLength(si.getDataLength())
          .setNumberOfRows(si.getNumberOfRows())
          .setFooterLength(si.getFooterLength())
          .setRawDataSize(si.getRawDataSize()).build();
    stripes.add(dirEntry);
    rowCount += si.getNumberOfRows();
    rawWriter.write(data);
  }

  // Before we write out the streams to disk, do some clean up.  For now this consists of two
  // things:
  // 1) Ignore any suppressed streams, this is done by not including them in the return list
  //    and clearing their contents
  // 2) If a stream has dictionary data, put the length stream before the dictionary data in the
  //    DICTIONARY area.  This is because this more closely matches the order in which we actually
  //    read the data.
  private List<Map.Entry<StreamName, BufferedStream>> cleanUpStreams() throws IOException {
    List<Map.Entry<StreamName, BufferedStream>> streamList =
      new ArrayList<Map.Entry<StreamName, BufferedStream>>(streams.size());
    Map<StreamName, Integer> indexMap = new HashMap<StreamName, Integer>(streams.size());

    int increment = 0;

    for(Map.Entry<StreamName, BufferedStream> pair: streams.entrySet()) {
      if (!pair.getValue().isSuppressed()) {
        StreamName name = pair.getKey();
        if (name.getKind() == Kind.LENGTH) {
          Integer index = indexMap.get(new StreamName(name.getColumn(), Kind.DICTIONARY_DATA));
          if (index != null) {
            streamList.add(index + increment, pair);
            increment++;
            continue;
          }
        }

        indexMap.put(name, new Integer(streamList.size()));
        streamList.add(pair);
      } else {
        pair.getValue().clear();
      }
    }

    return streamList;
  }

  protected void flushStripe() throws IOException {
    ensureWriter();

    ReaderWriterProfiler.start(ReaderWriterProfiler.Counter.ENCODING_TIME);
    treeWriter.flush();

    if (buildIndex && rowsInIndex != 0) {
      createRowIndexEntry();
    }
    ReaderWriterProfiler.end(ReaderWriterProfiler.Counter.ENCODING_TIME);
    if (rowsInStripe != 0) {
      int requiredIndexEntries = rowIndexStride == 0 ? 0 :
          (int) ((rowsInStripe + rowIndexStride - 1) / rowIndexStride);
      OrcProto.StripeFooter.Builder builder =
          OrcProto.StripeFooter.newBuilder();
      long stripeRawDataSize = treeWriter.getStripeRawDataSize();

      ReaderWriterProfiler.start(ReaderWriterProfiler.Counter.SERIALIZATION_TIME);
      treeWriter.writeStripe(builder, requiredIndexEntries);
      ReaderWriterProfiler.end(ReaderWriterProfiler.Counter.SERIALIZATION_TIME);

      long start = rawWriter.getPos();
      long section = start;
      long indexEnd = start;
      List<Map.Entry<StreamName, BufferedStream>> streamList = cleanUpStreams();
      for (Map.Entry<StreamName, BufferedStream> pair : streamList) {
        BufferedStream stream = pair.getValue();
        stream.flush(true);
        stream.spillTo(rawWriter);
        long end = rawWriter.getPos();
        StreamName name = pair.getKey();
        builder.addStreams(OrcProto.Stream.newBuilder()
            .setColumn(name.getColumn())
            .setKind(name.getKind())
            .setLength(end-section)
            .setUseVInts(useVInts));
        section = end;
        if (StreamName.Area.INDEX == name.getArea()) {
          indexEnd = end;
        }
        stream.clear();
      }
      builder.build().writeTo(protobufWriter);
      protobufWriter.flush();
      writer.flush();
      long end = rawWriter.getPos();
      OrcProto.StripeInformation dirEntry =
          OrcProto.StripeInformation.newBuilder()
              .setOffset(start)
              .setIndexLength(indexEnd - start)
              .setDataLength(section - indexEnd)
              .setNumberOfRows(rowsInStripe)
              .setFooterLength(end - section)
              .setRawDataSize(stripeRawDataSize).build();
      stripes.add(dirEntry);
      rowCount += rowsInStripe;
      rawDataSize += stripeRawDataSize;
      rowsInStripe = 0;
    }
  }

  private OrcProto.CompressionKind writeCompressionKind(CompressionKind kind) {
    switch (kind) {
      case NONE: return OrcProto.CompressionKind.NONE;
      case ZLIB: return OrcProto.CompressionKind.ZLIB;
      case SNAPPY: return OrcProto.CompressionKind.SNAPPY;
      case LZO: return OrcProto.CompressionKind.LZO;
      default:
        throw new IllegalArgumentException("Unknown compression " + kind);
    }
  }

  private int writeFileStatistics(OrcProto.Footer.Builder builder,
      TreeWriter writer, ColumnStatisticsImpl[] columnStats, int column) {
    if (columnStats != null) {
      writer.fileStatistics.merge(columnStats[column++]);
    }
    builder.addStatistics(writer.fileStatistics.serialize());
    for(TreeWriter child: writer.getChildrenWriters()) {
      column = writeFileStatistics(builder, child, columnStats, column);
    }
    return column;
  }

  private int writeFooter(long bodyLength, ColumnStatisticsImpl[] columnStats) throws IOException {
    ensureWriter();
    OrcProto.Footer.Builder builder = OrcProto.Footer.newBuilder();
    builder.setContentLength(bodyLength);
    builder.setHeaderLength(headerLength);
    builder.setNumberOfRows(rowCount);
    builder.setRawDataSize(rawDataSize);
    builder.setRowIndexStride(rowIndexStride);
    // serialize the types
    writeTypes(builder, treeWriter);
    // add the stripe information
    for(OrcProto.StripeInformation stripe: stripes) {
      builder.addStripes(stripe);
    }
    // add the column statistics
    writeFileStatistics(builder, treeWriter, columnStats, 0);
    // add all of the user metadata
    for(Map.Entry<String, ByteString> entry: userMetadata.entrySet()) {
      builder.addMetadata(OrcProto.UserMetadataItem.newBuilder()
        .setName(entry.getKey()).setValue(entry.getValue()));
    }
    long startPosn = rawWriter.getPos();
    builder.build().writeTo(protobufWriter);
    protobufWriter.flush();
    writer.flush();
    return (int) (rawWriter.getPos() - startPosn);
  }

  private int writePostScript(int footerLength) throws IOException {
    OrcProto.PostScript.Builder builder =
      OrcProto.PostScript.newBuilder()
        .setCompression(writeCompressionKind(compress))
        .setFooterLength(footerLength);
    if (compress != CompressionKind.NONE) {
      builder.setCompressionBlockSize(bufferSize);
    }
    OrcProto.PostScript ps = builder.build();
    // need to write this uncompressed
    long startPosn = rawWriter.getPos();
    ps.writeTo(rawWriter);
    long length = rawWriter.getPos() - startPosn;
    if (length > 255) {
      throw new IllegalArgumentException("PostScript too large at " + length);
    }
    return (int) length;
  }

  @Override
  public synchronized void addUserMetadata(String name, ByteBuffer value) {
    userMetadata.put(name, ByteString.copyFrom(value));
  }

  @Override
  public void addRow(Object row) throws IOException {

    ReaderWriterProfiler.start(ReaderWriterProfiler.Counter.ENCODING_TIME);
    synchronized (this) {
      treeWriter.write(row);
      rowsInStripe += 1;
      if (buildIndex) {
        rowsInIndex += 1;

        if (rowsInIndex >= rowIndexStride) {
          createRowIndexEntry();
        }
      }
    }
    if (memoryManager.shouldFlush(memoryEstimate, path, stripeSize, maxDictSize)) {
      flushStripe();
    }
    memoryManager.addedRow();
    ReaderWriterProfiler.end(ReaderWriterProfiler.Counter.ENCODING_TIME);
  }

  @Override
  public long getRowRawDataSize() {
    return treeWriter.getRowRawDataSize();
  }

  @Override
  public void close() throws IOException {
    close(null);
  }

  public void close(ColumnStatisticsImpl[] columnStats) throws IOException {
    // remove us from the memory manager so that we don't get any callbacks
    memoryManager.removeWriter(path);
    // actually close the file
    synchronized (this) {
      flushStripe();
      int footerLength = writeFooter(rawWriter.getPos(), columnStats);
      rawWriter.writeByte(writePostScript(footerLength));
      rawWriter.flush();
      rawWriter.close();
    }
  }
}
TOP

Related Classes of com.facebook.hive.orc.WriterImpl

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.