Package org.apache.drill.exec.record

Examples of org.apache.drill.exec.record.VectorContainer


      tunnel.sendRecordBatch(statusHandler, context, writableBatch);
    } else {
      logger.debug("Flush requested on an empty outgoing record batch" + (isLast ? " (last batch)" : ""));
      if (isLast) {
        // if the last batch is empty, it must not contain any value vectors.
        vectorContainer = new VectorContainer();

        // send final batch
        FragmentWritableBatch writableBatch = new FragmentWritableBatch(isLast,
                                                                        handle.getQueryId(),
                                                                        handle.getMajorFragmentId(),
View Full Code Here


   * Create a new output schema and allocate space for value vectors based on the incoming record batch.
   */
  public void initializeBatch() {
    isLast = false;
    recordCapacity = incoming.getRecordCount();
    vectorContainer = new VectorContainer();

    SchemaBuilder bldr = BatchSchema.newBuilder().setSelectionVectorMode(BatchSchema.SelectionVectorMode.NONE);
    for (VectorWrapper<?> v : incoming) {

      // add field to the output schema
View Full Code Here

  private boolean retain = false;

  public VectorAccessibleSerializable(BufferAllocator allocator) {
    this.allocator = allocator;
    this.va = new VectorContainer();
  }
View Full Code Here

   * @param input the InputStream to read from
   * @throws IOException
   */
  @Override
  public void readFromStream(InputStream input) throws IOException {
    VectorContainer container = new VectorContainer();
    UserBitShared.RecordBatchDef batchDef = UserBitShared.RecordBatchDef.parseDelimitedFrom(input);
    recordCount = batchDef.getRecordCount();
    if (batchDef.hasCarriesTwoByteSelectionVector() && batchDef.getCarriesTwoByteSelectionVector()) {

      if (sv2 == null) {
        sv2 = new SelectionVector2(allocator);
      }
      sv2.allocateNew(recordCount * SelectionVector2.RECORD_SIZE);
      sv2.getBuffer().setBytes(0, input, recordCount * SelectionVector2.RECORD_SIZE);
      svMode = BatchSchema.SelectionVectorMode.TWO_BYTE;
    }
    List<ValueVector> vectorList = Lists.newArrayList();
    List<SerializedField> fieldList = batchDef.getFieldList();
    for (SerializedField metaData : fieldList) {
      int dataLength = metaData.getBufferLength();
      MaterializedField field = MaterializedField.create(metaData);
      ByteBuf buf = allocator.buffer(dataLength);
      buf.writeBytes(input, dataLength);
      ValueVector vector = TypeHelper.getNewVector(field, allocator);
      vector.load(metaData, buf);
      buf.release();
      vectorList.add(vector);
    }
    container.addCollection(vectorList);
    container.buildSchema(svMode);
    container.setRecordCount(recordCount);
    va = container;
  }
View Full Code Here

  }

  private void getTwoBatches() throws IOException {
    firstContainer.zeroVectors();
    if (spilledBatches > 0) {
      VectorContainer c = getBatch();
      Iterator<VectorWrapper<?>> wrapperIterator = c.iterator();
      for (VectorWrapper w : firstContainer) {
        TransferPair pair = wrapperIterator.next().getValueVector().makeTransferPair(w.getValueVector());
        pair.transfer();
      }
      firstContainer.setRecordCount(c.getRecordCount());
      c.zeroVectors();
    } else {
      batchPointer = -1;
      pointer = -1;
      firstContainer.zeroVectors();
      secondContainer.zeroVectors();
    }
    if (spilledBatches > 0) {
      VectorContainer c = getBatch();
      Iterator<VectorWrapper<?>> wrapperIterator = c.iterator();
      for (VectorWrapper w : secondContainer) {
        TransferPair pair = wrapperIterator.next().getValueVector().makeTransferPair(w.getValueVector());
        pair.transfer();
      }
      secondContainer.setRecordCount(c.getRecordCount());
      c.zeroVectors();
    } else {
      secondContainer.zeroVectors();
      hasSecond = false;
    }
    batchPointer = 0;
View Full Code Here

  private int recordCount;
  private PreAllocator svAllocator;
  private JoinStatus status;

  public MergeJoinBatchBuilder(BufferAllocator allocator, JoinStatus status) {
    this.container = new VectorContainer();
    this.status = status;
    this.svAllocator = allocator.getNewPreAllocator();
  }
View Full Code Here

      builder.add(incoming);
      recordsSampled += incoming.getRecordCount();
      if (upstream == IterOutcome.NONE)
        break;
    }
  VectorContainer sortedSamples = new VectorContainer();
    builder.build(context, sortedSamples);

    // Sort the records according the orderings given in the configuration

    Sorter sorter = SortBatch.createNewSorter(context, popConfig.getOrderings(), sortedSamples);
    SelectionVector4 sv4 = builder.getSv4();
    sorter.setup(context, sv4, sortedSamples);
    sorter.sort(sv4, sortedSamples);

    // Project every Nth record to a new vector container, where N = recordsSampled/(samplingFactor * partitions).
    // Uses the
    // the expressions from the Orderings to populate each column. There is one column for each Ordering in
    // popConfig.orderings.

    VectorContainer containerToCache = new VectorContainer();
    List<ValueVector> localAllocationVectors = Lists.newArrayList();
    SampleCopier copier = getCopier(sv4, sortedSamples, containerToCache, popConfig.getOrderings(), localAllocationVectors);
    int allocationSize = 50;
    while (true) {
      for (ValueVector vv : localAllocationVectors) {
        AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
      }
      if (copier.copyRecords(recordsSampled / (samplingFactor * partitions), 0, samplingFactor * partitions)) {
        break;
      } else {
        containerToCache.zeroVectors();
        allocationSize *= 2;
      }
    }
    for (VectorWrapper<?> vw : containerToCache) {
      vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
    }
    containerToCache.setRecordCount(copier.getOutputRecords());

    // Get a distributed multimap handle from the distributed cache, and put the vectors from the new vector container
    // into a serializable wrapper object, and then add to distributed map

    WritableBatch batch = WritableBatch.getBatchNoHVWrap(containerToCache.getRecordCount(), containerToCache, false);
    CachedVectorContainer sampleToSave = new CachedVectorContainer(batch, context.getAllocator());

    mmap.put(mapKey, sampleToSave);
    this.sampledIncomingBatches = builder.getHeldRecordBatches();
    builder.clear();
    batch.clear();
    containerToCache.clear();
    sampleToSave.clear();
    return true;


  }
View Full Code Here

    SortRecordBatchBuilder containerBuilder = new SortRecordBatchBuilder(context.getAllocator(), MAX_SORT_BYTES);
    for (CachedVectorContainer w : mmap.get(mapKey)) {
      containerBuilder.add(w.get());
    }
    VectorContainer allSamplesContainer = new VectorContainer();
    containerBuilder.build(context, allSamplesContainer);

    List<Ordering> orderDefs = Lists.newArrayList();
    int i = 0;
    for (Ordering od : popConfig.getOrderings()) {
      SchemaPath sp = SchemaPath.getSimplePath("f" + i++);
      orderDefs.add(new Ordering(od.getDirection(), new FieldReference(sp)));
    }

    // sort the data incoming samples.
    SelectionVector4 newSv4 = containerBuilder.getSv4();
    Sorter sorter = SortBatch.createNewSorter(context, orderDefs, allSamplesContainer);
    sorter.setup(context, newSv4, allSamplesContainer);
    sorter.sort(newSv4, allSamplesContainer);

    // Copy every Nth record from the samples into a candidate partition table, where N = totalSampledRecords/partitions
    // Attempt to push this to the distributed map. Only the first candidate to get pushed will be used.
    VectorContainer candidatePartitionTable = new VectorContainer();
    SampleCopier copier = null;
    List<ValueVector> localAllocationVectors = Lists.newArrayList();
    copier = getCopier(newSv4, allSamplesContainer, candidatePartitionTable, orderDefs, localAllocationVectors);
    int allocationSize = 50;
    while (true) {
      for (ValueVector vv : localAllocationVectors) {
        AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
      }
      int skipRecords = containerBuilder.getSv4().getTotalCount() / partitions;
      if (copier.copyRecords(skipRecords, skipRecords, partitions - 1)) {
        assert copier.getOutputRecords() == partitions - 1 : String.format("output records: %d partitions: %d", copier.getOutputRecords(), partitions);
        for (VectorWrapper<?> vw : candidatePartitionTable) {
          vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
        }
        break;
      } else {
        candidatePartitionTable.zeroVectors();
        allocationSize *= 2;
      }
    }
    candidatePartitionTable.setRecordCount(copier.getOutputRecords());
    WritableBatch batch = WritableBatch.getBatchNoHVWrap(candidatePartitionTable.getRecordCount(), candidatePartitionTable, false);
    CachedVectorContainer wrap = new CachedVectorContainer(batch, context.getDrillbitContext().getAllocator());
    tableMap.putIfAbsent(mapKey + "final", wrap, 1, TimeUnit.MINUTES);

    candidatePartitionTable.clear();
    allSamplesContainer.clear();
    containerBuilder.clear();
    wrap.clear();

  }
View Full Code Here

    if (upstreamNone && (batchQueue == null || batchQueue.size() == 0))
      return IterOutcome.NONE;

    // if there are batches on the queue, process them first, rather than calling incoming.next()
    if (batchQueue != null && batchQueue.size() > 0) {
      VectorContainer vc = batchQueue.poll();
      recordCount = vc.getRecordCount();
      try {

        // Must set up a new schema each time, because ValueVectors are not reused between containers in queue
        setupNewSchema(vc);
      } catch (SchemaChangeException ex) {
        kill(false);
        logger.error("Failure during query", ex);
        context.fail(ex);
        return IterOutcome.STOP;
      }
      doWork(vc);
      vc.zeroVectors();
      return IterOutcome.OK_NEW_SCHEMA;
    }

    // Reaching this point, either this is the first iteration, or there are no batches left on the queue and there are
    // more incoming
    IterOutcome upstream = next(incoming);

    if (this.first && upstream == IterOutcome.OK) {
      throw new RuntimeException("Invalid state: First batch should have OK_NEW_SCHEMA");
    }

    // If this is the first iteration, we need to generate the partition vectors before we can proceed
    if (this.first && upstream == IterOutcome.OK_NEW_SCHEMA) {
      if (!getPartitionVectors()){
        cleanup();
        return IterOutcome.STOP;
      }

      batchQueue = new LinkedBlockingQueue<>(this.sampledIncomingBatches);
      first = false;

      // Now that we have the partition vectors, we immediately process the first batch on the queue
      VectorContainer vc = batchQueue.poll();
      try {
        setupNewSchema(vc);
      } catch (SchemaChangeException ex) {
        kill(false);
        logger.error("Failure during query", ex);
        context.fail(ex);
        return IterOutcome.STOP;
      }
      doWork(vc);
      vc.zeroVectors();
      recordCount = vc.getRecordCount();
      return IterOutcome.OK_NEW_SCHEMA;
    }

    // if this now that all the batches on the queue are processed, we begin processing the incoming batches. For the
    // first one
View Full Code Here

    }
  }

  public void mergeAndSpill() throws SchemaChangeException {
    logger.debug("Copier allocator current allocation {}", copierAllocator.getAllocatedMemory());
    VectorContainer hyperBatch = new VectorContainer();
    VectorContainer outputContainer = new VectorContainer();
    List<BatchGroup> batchGroupList = Lists.newArrayList();
    int recordCount = 0;
    for (int i = 0; i < SPILL_BATCH_GROUP_SIZE; i++) {
      if (batchGroups.size() == 0) {
        break;
      }
      if (batchGroups.peekLast().getSecondContainer() != null) {
        break;
      }
      BatchGroup batch = batchGroups.pollLast();
      recordCount += batch.getSv2().getCount();
      batchGroupList.add(batch);
    }
    if (batchGroupList.size() == 0) {
      return;
    }
    constructHyperBatch(batchGroupList, hyperBatch);
    createCopier(hyperBatch, batchGroupList, outputContainer, recordCount);

    int count = copier.next();
    assert count > 0;

    VectorContainer c1 = VectorContainer.getTransferClone(outputContainer);
    c1.buildSchema(BatchSchema.SelectionVectorMode.NONE);
    c1.setRecordCount(count);

    count = copier.next();
    assert count > 0;


    VectorContainer c2 = VectorContainer.getTransferClone(outputContainer);
    c2.buildSchema(BatchSchema.SelectionVectorMode.NONE);
    c2.setRecordCount(count);

    String outputFile = String.format(Utilities.getFileNameForQueryFragment(context, dirs.next(), "spill" + uid + "_" + spillCount++));
    BatchGroup newGroup = new BatchGroup(c1, c2, fs, outputFile, oContext.getAllocator());

    try {
View Full Code Here

TOP

Related Classes of org.apache.drill.exec.record.VectorContainer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.