visited.add(current);
if(current.isMaterialized()) { // condition for being a materialized input. This may change.
inputs.add(current);
continue;
}
DeferredOp op = current.getDeferredOp();
if(op instanceof MultipleParallelDo) { // second condition for being an input
MultipleParallelDo<?> mPDo = (MultipleParallelDo)current.getDeferredOp();
if(((LazyCollection<?>)mPDo.getOrigin()).isMaterialized()) {
inputs.add(mPDo.getOrigin()); // will be done in Mapper
} else if(op instanceof ParallelDo) {
inputs.add(current); // will be done in Reducer
} else {
inputs.add(mPDo.getOrigin()); // will be done in Mapper
}
// Check for bypass channels & output channels with no group-by
for(Map.Entry entry: mPDo.getDests().entrySet()) {
LazyCollection coll = (LazyCollection)entry.getKey();
if(coll.getDownOps() == null || coll.getDownOps().size() == 0) {
bypassChannels.add(coll); // leaf node
} else if(coll.getDownOps().get(0) instanceof MultipleParallelDo) {
bypassChannels.add(coll);
/*
* Case of an output channel that Flattens with no Group By
*/
} else if(coll.getDownOps().get(0) instanceof Flatten) {
Flatten<?> thisFlatten = (Flatten<?>)coll.getDownOps().get(0);
LazyCollection ldest = (LazyCollection)thisFlatten.getDest();
if(ldest.getDownOps() == null || ldest.getDownOps().size() == 0 ||
ldest.getDownOps().get(0) instanceof MultipleParallelDo) {
unGroupedOutputChannels.add(thisFlatten);
// Add the rest of this flatten's origins to the stack in order to possibly discover more output channels
for(PCollection<?> col: thisFlatten.getOrigins()) {
if(!visited.contains(col)) {
toVisit.push((LazyCollection<?>)col);
}
}
}
}
}
continue;
}
if(op instanceof GroupByKey) { // third condition for being an input - rare case when one GBK follows another
inputs.add(current);
continue;
}
if(op instanceof Flatten) {
Flatten<?> flatten = (Flatten<?>)op;
for(PCollection<?> input: flatten.getOrigins()) {
LazyCollection<?> in = (LazyCollection<?>)input;
if(!visited.contains(in)) {
toVisit.push(in);
}
}
continue;
}
if(op instanceof OneToOneOp) {
LazyCollection<?> input = (LazyCollection<?>)((OneToOneOp<?, ?>)op).getOrigin();
if(!visited.contains(input)) {
toVisit.push(input);
}
continue;
}
}
MSCR mscrToAdd = null;
// Check if there is already one MSCR with at least one of this inputs
for(MSCR mscr: mscrs) {
for(PCollection<?> input: inputs) {
if(mscr.hasInput(input)) {
mscrToAdd = mscr;
break;
}
}
}
if(mscrToAdd == null) { // otherwise create new MSCR
mscrToAdd = new MSCR(mscrId);
mscrId++;
}
// Add all missing input channels to current MSCR
for(PCollection<?> input: inputs) {
if(!mscrToAdd.hasInput(input)) {
mscrToAdd.addInput(input);
}
}
// Add all missing bypass outputs to current MSCR
for(PCollection<?> col: bypassChannels) {
if(!mscrToAdd.hasOutputChannel(col)) {
// Create new by-pass channel
MSCR.OutputChannel oC = new MSCR.OutputChannel(col);
mscrToAdd.addOutputChannel(oC);
}
}
// Add all missing flatten-with-no-groupby outputs to current MSCR
for(Flatten flatten: unGroupedOutputChannels) {
if(!mscrToAdd.hasOutputChannel(flatten.getDest())) {
// Create new channel with flatten and nothing else
MSCR.OutputChannel oC = new MSCR.OutputChannel(flatten.getDest());
oC.output = flatten.getDest();
oC.flatten = flatten;
mscrToAdd.addOutputChannel(oC);
}
}
// Add all missing output channels to current MSCR
for(GroupByKey groupByKey: outputChannels) {
if(!mscrToAdd.hasOutputChannel(groupByKey.getOrigin())) {
// Create new channel with group by key. It might have combiner and reducer as well.
MSCR.OutputChannel oC = new MSCR.OutputChannel(groupByKey);
oC.output = groupByKey.getDest();
if(groupByKey.getOrigin().getDeferredOp() instanceof Flatten) {
oC.flatten = (Flatten)groupByKey.getOrigin().getDeferredOp();
}
if(groupByKey.getDest().getDownOps() != null && groupByKey.getDest().getDownOps().size() == 1) {
DeferredOp op = (DeferredOp)groupByKey.getDest().getDownOps().get(0);
if(op instanceof CombineValues) {
oC.combiner = (CombineValues)op;
oC.output = oC.combiner.getDest();
LazyCollection dest = (LazyCollection)oC.combiner.getDest();
if(dest.getDownOps() != null && dest.getDownOps().size() == 1) {