case POStatus.STATUS_EOP:
break;
}
}
NullableTuple it = null;
// If we see a new NullableTupleIterator, materialize n-1 inputs, construct ForEach input
// tuple res = (key, input#1, input#2....input#n), the only missing value is input#n,
// we will get input#n one tuple a time, fill in res, feed to ForEach.
// After this block, we have the first tuple of input#n in hand (kept in variable it)
if (newKey)
{
lastInputTuple = false;
//Put n-1 inputs into bags
dbs = new DataBag[numInputs];
for (int i = 0; i < numInputs - 1; i++) {
dbs[i] = useDefaultBag ? BagFactory.getInstance().newDefaultBag()
// In a very rare case if there is a POStream after this
// POJoinPackage in the pipeline and is also blocking the pipeline;
// constructor argument should be 2 * numInputs. But for one obscure
// case we don't want to pay the penalty all the time.
: new InternalCachedBag(numInputs-1);
}
// For last bag, we always use NonSpillableBag.
dbs[lastBagIndex] = new NonSpillableDataBag((int)chunkSize);
//For each Nullable tuple in the input, put it
//into the corresponding bag based on the index,
// except for the last input, which we will stream
// The tuples will arrive in the order of the index,
// starting from index 0 and such that all tuples for
// a given index arrive before a tuple for the next
// index does.
while (tupIter.hasNext()) {
it = tupIter.next();
int itIndex = it.getIndex();
if (itIndex!= numInputs - 1)
{
dbs[itIndex].add(getValueTuple(it, itIndex));
}
else
{
lastInputTuple = true;
break;
}
if(reporter!=null) reporter.progress();
}
// If we don't have any tuple for input#n
// we do not need any further process, return EOP
if (!lastInputTuple)
{
// we will return at this point because we ought
// to be having a flatten on this last input
// and we have an empty bag which should result
// in this key being taken out of the output
newKey = true;
return eopResult;
}
res = mTupleFactory.newTuple(numInputs+1);
for (int i = 0; i < dbs.length; i++)
res.set(i+1,dbs[i]);
res.set(0,key);
// if we have an inner anywhere and the corresponding
// bag is empty, we can just return
for (int i = 0; i < dbs.length - 1; i++) {
if(inner[i]&&dbs[i].size()==0){
detachInput();
return eopResult;
}
}
newKey = false;
// set up the bag with last input to contain
// a chunk of CHUNKSIZE values OR the entire bag if
// it has less than CHUNKSIZE values - the idea is in most
// cases the values are > CHUNKSIZE in number and in
// those cases we will be sending the last bag
// as a set of smaller chunked bags thus holding lesser
// in memory
// the first tuple can be directly retrieved from "it"
dbs[lastBagIndex].add(getValueTuple(it, it.getIndex()));
for(int i = 0; i < chunkSize -1 && tupIter.hasNext(); i++) {
it = tupIter.next();
dbs[lastBagIndex].add(getValueTuple(it, it.getIndex()));
}
// Attach the input to forEach
forEach.attachInput(res);
// pull output tuple from ForEach
Result forEachResult = forEach.getNext(t1);
{
switch (forEachResult.returnStatus)
{
case POStatus.STATUS_OK:
case POStatus.STATUS_NULL:
case POStatus.STATUS_ERR:
return forEachResult;
case POStatus.STATUS_EOP:
break;
}
}
}
// Keep attaching input tuple to ForEach, until:
// 1. We can initialize ForEach.getNext();
// 2. There is no more input#n
while (true)
{
if (tupIter.hasNext()) {
// try setting up a bag of CHUNKSIZE OR
// the remainder of the bag of last input
// (if < CHUNKSIZE) to foreach
dbs[lastBagIndex].clear(); // clear last chunk
for(int i = 0; i < chunkSize && tupIter.hasNext(); i++) {
it = tupIter.next();
dbs[lastBagIndex].add(getValueTuple(it, it.getIndex()));
}
}
else
// if we do not have any more tuples for input#n, return EOP
{