// set up each (non-empty) incoming record batch
List<RawFragmentBatch> rawBatches = Lists.newArrayList();
boolean firstBatch = true;
for (RawFragmentBatchProvider provider : fragProviders) {
RawFragmentBatch rawBatch = null;
try {
rawBatch = getNext(provider);
if (rawBatch == null && context.isCancelled()) {
return IterOutcome.STOP;
}
} catch (IOException e) {
context.fail(e);
return IterOutcome.STOP;
}
if (rawBatch.getHeader().getDef().getRecordCount() != 0) {
rawBatches.add(rawBatch);
} else {
if (emptyBatch == null) {
emptyBatch = rawBatch;
}
try {
while ((rawBatch = getNext(provider)) != null && rawBatch.getHeader().getDef().getRecordCount() == 0);
if (rawBatch == null && context.isCancelled()) {
return IterOutcome.STOP;
}
} catch (IOException e) {
context.fail(e);
return IterOutcome.STOP;
}
if (rawBatch != null) {
rawBatches.add(rawBatch);
} else {
rawBatches.add(emptyBatch);
}
}
}
// allocate the incoming record batch loaders
senderCount = rawBatches.size();
if (senderCount == 0) {
if (firstBatch) {
RecordBatchLoader loader = new RecordBatchLoader(oContext.getAllocator());
try {
loader.load(emptyBatch.getHeader().getDef(), emptyBatch.getBody());
} catch (SchemaChangeException e) {
throw new RuntimeException(e);
}
for (VectorWrapper w : loader) {
outgoingContainer.add(w.getValueVector());
}
outgoingContainer.buildSchema(SelectionVectorMode.NONE);
done = true;
return IterOutcome.OK_NEW_SCHEMA;
}
return IterOutcome.NONE;
}
incomingBatches = new RawFragmentBatch[senderCount];
batchOffsets = new int[senderCount];
batchLoaders = new RecordBatchLoader[senderCount];
for (int i = 0; i < senderCount; ++i) {
incomingBatches[i] = rawBatches.get(i);
batchLoaders[i] = new RecordBatchLoader(oContext.getAllocator());
}
int i = 0;
for (RawFragmentBatch batch : incomingBatches) {
// initialize the incoming batchLoaders
UserBitShared.RecordBatchDef rbd = batch.getHeader().getDef();
try {
batchLoaders[i].load(rbd, batch.getBody());
} catch(SchemaChangeException e) {
logger.error("MergingReceiver failed to load record batch from remote host. {}", e);
context.fail(e);
return IterOutcome.STOP;
}
batch.release();
++batchOffsets[i];
++i;
}
// Canonicalize each incoming batch, so that vectors are alphabetically sorted based on SchemaPath.
for (RecordBatchLoader loader : batchLoaders) {
loader.canonicalize();
}
// Ensure all the incoming batches have the identical schema.
if (!isSameSchemaAmongBatches(batchLoaders)) {
logger.error("Incoming batches for merging receiver have diffferent schemas!");
context.fail(new SchemaChangeException("Incoming batches for merging receiver have diffferent schemas!"));
return IterOutcome.STOP;
}
// create the outgoing schema and vector container, and allocate the initial batch
SchemaBuilder bldr = BatchSchema.newBuilder().setSelectionVectorMode(BatchSchema.SelectionVectorMode.NONE);
int vectorCount = 0;
for (VectorWrapper<?> v : batchLoaders[0]) {
// add field to the output schema
bldr.addField(v.getField());
// allocate a new value vector
ValueVector outgoingVector = TypeHelper.getNewVector(v.getField(), oContext.getAllocator());
outgoingVector.allocateNew();
outgoingContainer.add(outgoingVector);
++vectorCount;
}
schema = bldr.build();
if (schema != null && !schema.equals(schema)) {
// TODO: handle case where one or more batches implicitly indicate schema change
logger.debug("Initial state has incoming batches with different schemas");
}
outgoingContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
// generate code for merge operations (copy and compare)
try {
merger = createMerger();
} catch (SchemaChangeException e) {
logger.error("Failed to generate code for MergingReceiver. {}", e);
context.fail(e);
return IterOutcome.STOP;
}
// allocate the priority queue with the generated comparator
this.pqueue = new PriorityQueue<Node>(fragProviders.length, new Comparator<Node>() {
public int compare(Node node1, Node node2) {
int leftIndex = (node1.batchId << 16) + node1.valueIndex;
int rightIndex = (node2.batchId << 16) + node2.valueIndex;
return merger.doEval(leftIndex, rightIndex);
}
});
// populate the priority queue with initial values
for (int b = 0; b < senderCount; ++b) {
while (batchLoaders[b] != null && batchLoaders[b].getRecordCount() == 0) {
try {
RawFragmentBatch batch = getNext(fragProviders[b]);
incomingBatches[b] = batch;
if (batch != null) {
batchLoaders[b].load(batch.getHeader().getDef(), batch.getBody());
} else {
batchLoaders[b].clear();
batchLoaders[b] = null;
if (context.isCancelled()) {
return IterOutcome.STOP;
}
}
} catch (IOException | SchemaChangeException e) {
context.fail(e);
return IterOutcome.STOP;
}
}
if (batchLoaders[b] != null) {
pqueue.add(new Node(b, 0));
}
}
hasRun = true;
// finished lazy initialization
}
while (!pqueue.isEmpty()) {
// pop next value from pq and copy to outgoing batch
Node node = pqueue.peek();
if (!copyRecordToOutgoingBatch(node)) {
logger.debug("Outgoing vectors space is full; breaking");
prevBatchWasFull = true;
break;
}
pqueue.poll();
// if (isOutgoingFull()) {
// // set a flag so that we reallocate on the next iteration
// logger.debug("Outgoing vectors record batch size reached; breaking");
// prevBatchWasFull = true;
// }
if (node.valueIndex == batchLoaders[node.batchId].getRecordCount() - 1) {
// reached the end of an incoming record batch
RawFragmentBatch nextBatch = null;
try {
nextBatch = getNext(fragProviders[node.batchId]);
while (nextBatch != null && nextBatch.getHeader().getDef().getRecordCount() == 0) {
nextBatch = getNext(fragProviders[node.batchId]);
}
if (nextBatch == null && context.isCancelled()) {
return IterOutcome.STOP;
}