Package org.apache.hadoop.hive.ql.exec.vector

Examples of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch


    return result;
  }

  @Override
  public VectorizedRowBatch nextBatch(VectorizedRowBatch previous) throws IOException {
    VectorizedRowBatch result = null;
    if (rowInStripe >= rowCountInStripe) {
      currentStripe += 1;
      readStripe();
    }

    long batchSize = 0;

    // In case of PPD, batch size should be aware of row group boundaries. If only a subset of row
    // groups are selected then marker position is set to the end of range (subset of row groups
    // within strip). Batch size computed out of marker position makes sure that batch size is
    // aware of row group boundary and will not cause overflow when reading rows
    // illustration of this case is here https://issues.apache.org/jira/browse/HIVE-6287
    if (rowIndexStride != 0 && includedRowGroups != null && rowInStripe < rowCountInStripe) {
      int startRowGroup = (int) (rowInStripe / rowIndexStride);
      if (!includedRowGroups[startRowGroup]) {
        while (startRowGroup < includedRowGroups.length && !includedRowGroups[startRowGroup]) {
          startRowGroup += 1;
        }
      }

      int endRowGroup = startRowGroup;
      while (endRowGroup < includedRowGroups.length && includedRowGroups[endRowGroup]) {
        endRowGroup += 1;
      }

      final long markerPosition = (endRowGroup * rowIndexStride) < rowCountInStripe ? (endRowGroup * rowIndexStride)
          : rowCountInStripe;
      batchSize = Math.min(VectorizedRowBatch.DEFAULT_SIZE, (markerPosition - rowInStripe));

      if (LOG.isDebugEnabled() && batchSize < VectorizedRowBatch.DEFAULT_SIZE) {
        LOG.debug("markerPosition: " + markerPosition + " batchSize: " + batchSize);
      }
    } else {
      batchSize = Math.min(VectorizedRowBatch.DEFAULT_SIZE, (rowCountInStripe - rowInStripe));
    }

    rowInStripe += batchSize;
    if (previous == null) {
      ColumnVector[] cols = (ColumnVector[]) reader.nextVector(null, (int) batchSize);
      result = new VectorizedRowBatch(cols.length);
      result.cols = cols;
    } else {
      result = (VectorizedRowBatch) previous;
      result.selectedInUse = false;
      reader.nextVector(result.cols, (int) batchSize);
View Full Code Here


    assertEquals(1, splits.length);

    org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch>
        reader = inputFormat.getRecordReader(splits[0], conf, Reporter.NULL);
    NullWritable key = reader.createKey();
    VectorizedRowBatch value = reader.createValue();
    assertEquals(true, reader.next(key, value));
    assertEquals(10, value.count());
    LongColumnVector col0 = (LongColumnVector) value.cols[0];
    for(int i=0; i < 10; i++) {
      assertEquals("checking " + i, i, col0.vector[i]);
    }
    assertEquals(false, reader.next(key, value));
View Full Code Here

    assertEquals(1, splits.length);

    org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch>
        reader = inputFormat.getRecordReader(splits[0], conf, Reporter.NULL);
    NullWritable key = reader.createKey();
    VectorizedRowBatch value = reader.createValue();
    assertEquals(true, reader.next(key, value));
    assertEquals(10, value.count());
    LongColumnVector col0 = (LongColumnVector) value.cols[0];
    for(int i=0; i < 10; i++) {
      assertEquals("checking " + i, i, col0.vector[i]);
    }
    assertEquals(false, reader.next(key, value));
View Full Code Here

    assertEquals(1, splits.length);

    org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch>
          reader = inputFormat.getRecordReader(splits[0], conf, Reporter.NULL);
    NullWritable key = reader.createKey();
    VectorizedRowBatch value = reader.createValue();
    assertEquals(true, reader.next(key, value));
    assertEquals(100, value.count());
    LongColumnVector booleanColumn = (LongColumnVector) value.cols[0];
    LongColumnVector byteColumn = (LongColumnVector) value.cols[1];
    LongColumnVector shortColumn = (LongColumnVector) value.cols[2];
    LongColumnVector intColumn = (LongColumnVector) value.cols[3];
    LongColumnVector longColumn = (LongColumnVector) value.cols[4];
View Full Code Here

        OrcFile.readerOptions(conf));
    Reader reader = OrcFile.createReader(testFilePath,
        OrcFile.readerOptions(conf));
    RecordReaderImpl vrr = (RecordReaderImpl) vreader.rows();
    RecordReaderImpl rr = (RecordReaderImpl) reader.rows();
    VectorizedRowBatch batch = null;
    OrcStruct row = null;

    // Check Vectorized ORC reader against ORC row reader
    while (vrr.hasNext()) {
      batch = vrr.nextBatch(batch);
View Full Code Here

    assertEquals(1, splits.length);

    org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch>
        reader = inputFormat.getRecordReader(splits[0], conf, Reporter.NULL);
    NullWritable key = reader.createKey();
    VectorizedRowBatch value = reader.createValue();
    assertEquals(true, reader.next(key, value));
    assertEquals(10, value.count());
    LongColumnVector col0 = (LongColumnVector) value.cols[0];
    for(int i=0; i < 10; i++) {
      assertEquals("checking " + i, i, col0.vector[i]);
    }
    assertEquals(false, reader.next(key, value));
View Full Code Here

    assertEquals(1, splits.length);

    org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch>
        reader = inputFormat.getRecordReader(splits[0], conf, Reporter.NULL);
    NullWritable key = reader.createKey();
    VectorizedRowBatch value = reader.createValue();
    assertEquals(true, reader.next(key, value));
    assertEquals(10, value.count());
    LongColumnVector col0 = (LongColumnVector) value.cols[0];
    for(int i=0; i < 10; i++) {
      assertEquals("checking " + i, i, col0.vector[i]);
    }
    assertEquals(false, reader.next(key, value));
View Full Code Here

    assertEquals(1, splits.length);

    org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch>
          reader = inputFormat.getRecordReader(splits[0], conf, Reporter.NULL);
    NullWritable key = reader.createKey();
    VectorizedRowBatch value = reader.createValue();
    assertEquals(true, reader.next(key, value));
    assertEquals(100, value.count());
    LongColumnVector booleanColumn = (LongColumnVector) value.cols[0];
    LongColumnVector byteColumn = (LongColumnVector) value.cols[1];
    LongColumnVector shortColumn = (LongColumnVector) value.cols[2];
    LongColumnVector intColumn = (LongColumnVector) value.cols[3];
    LongColumnVector longColumn = (LongColumnVector) value.cols[4];
View Full Code Here

    return NullWritable.get();
  }

  @Override
  public VectorizedRowBatch createValue() {
    VectorizedRowBatch result = null;
    try {
      result = rbCtx.createVectorizedRowBatch();
    } catch (HiveException e) {
      new RuntimeException("Error creating a batch", e);
    }
View Full Code Here

    return result;
  }

  @Override
  public VectorizedRowBatch nextBatch(VectorizedRowBatch previous) throws IOException {
    VectorizedRowBatch result = null;
    if (rowInStripe >= rowCountInStripe) {
      currentStripe += 1;
      readStripe();
    }

    long batchSize = 0;

    // In case of PPD, batch size should be aware of row group boundaries. If only a subset of row
    // groups are selected then marker position is set to the end of range (subset of row groups
    // within strip). Batch size computed out of marker position makes sure that batch size is
    // aware of row group boundary and will not cause overflow when reading rows
    // illustration of this case is here https://issues.apache.org/jira/browse/HIVE-6287
    if (rowIndexStride != 0 && includedRowGroups != null && rowInStripe < rowCountInStripe) {
      int startRowGroup = (int) (rowInStripe / rowIndexStride);
      if (!includedRowGroups[startRowGroup]) {
        while (startRowGroup < includedRowGroups.length && !includedRowGroups[startRowGroup]) {
          startRowGroup += 1;
        }
      }

      int endRowGroup = startRowGroup;
      while (endRowGroup < includedRowGroups.length && includedRowGroups[endRowGroup]) {
        endRowGroup += 1;
      }

      final long markerPosition = (endRowGroup * rowIndexStride) < rowCountInStripe ? (endRowGroup * rowIndexStride)
          : rowCountInStripe;
      batchSize = Math.min(VectorizedRowBatch.DEFAULT_SIZE, (markerPosition - rowInStripe));

      if (LOG.isDebugEnabled() && batchSize < VectorizedRowBatch.DEFAULT_SIZE) {
        LOG.debug("markerPosition: " + markerPosition + " batchSize: " + batchSize);
      }
    } else {
      batchSize = Math.min(VectorizedRowBatch.DEFAULT_SIZE, (rowCountInStripe - rowInStripe));
    }

    rowInStripe += batchSize;
    if (previous == null) {
      ColumnVector[] cols = (ColumnVector[]) reader.nextVector(null, (int) batchSize);
      result = new VectorizedRowBatch(cols.length);
      result.cols = cols;
    } else {
      result = (VectorizedRowBatch) previous;
      result.selectedInUse = false;
      reader.nextVector(result.cols, (int) batchSize);
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.