}
}
for (Entry<BaseWork,TezEdgeProperty> parentWorkMap : linkWorkMap.entrySet()) {
BaseWork parentWork = parentWorkMap.getKey();
LOG.debug("connecting "+parentWork.getName()+" with "+work.getName());
TezEdgeProperty edgeProp = parentWorkMap.getValue();
tezWork.connect(parentWork, work, edgeProp);
// need to set up output name for reduce sink now that we know the name
// of the downstream work
for (ReduceSinkOperator r:
context.linkWorkWithReduceSinkMap.get(parentWork)) {
if (r.getConf().getOutputName() != null) {
LOG.debug("Cloning reduce sink for multi-child broadcast edge");
// we've already set this one up. Need to clone for the next work.
r = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(
(ReduceSinkDesc)r.getConf().clone(), r.getParentOperators());
context.clonedReduceSinks.add(r);
}
r.getConf().setOutputName(work.getName());
context.connectedReduceSinks.add(r);
}
}
}
}
}
// clear out the set. we don't need it anymore.
context.currentMapJoinOperators.clear();
}
// This is where we cut the tree as described above. We also remember that
// we might have to connect parent work with this work later.
for (Operator<?> parent: new ArrayList<Operator<?>>(root.getParentOperators())) {
context.leafOperatorToFollowingWork.put(parent, work);
LOG.debug("Removing " + parent + " as parent from " + root);
root.removeParent(parent);
}
if (!context.currentUnionOperators.isEmpty()) {
// if there are union all operators we need to add the work to the set
// of union operators.
UnionWork unionWork;
if (context.unionWorkMap.containsKey(operator)) {
// we've seen this terminal before and have created a union work object.
// just need to add this work to it. There will be no children of this one
// since we've passed this operator before.
assert operator.getChildOperators().isEmpty();
unionWork = (UnionWork) context.unionWorkMap.get(operator);
} else {
// first time through. we need to create a union work object and add this
// work to it. Subsequent work should reference the union and not the actual
// work.
unionWork = utils.createUnionWork(context, operator, tezWork);
}
// finally hook everything up
LOG.debug("Connecting union work ("+unionWork+") with work ("+work+")");
TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.CONTAINS);
tezWork.connect(unionWork, work, edgeProp);
unionWork.addUnionOperators(context.currentUnionOperators);
context.currentUnionOperators.clear();
context.workWithUnionOperators.add(work);
work = unionWork;
}
// We're scanning a tree from roots to leaf (this is not technically
// correct, demux and mux operators might form a diamond shape, but
// we will only scan one path and ignore the others, because the
// diamond shape is always contained in a single vertex). The scan
// is depth first and because we remove parents when we pack a pipeline
// into a vertex we will never visit any node twice. But because of that
// we might have a situation where we need to connect 'work' that comes after
// the 'work' we're currently looking at.
//
// Also note: the concept of leaf and root is reversed in hive for historical
// reasons. Roots are data sources, leaves are data sinks. I know.
if (context.leafOperatorToFollowingWork.containsKey(operator)) {
BaseWork followingWork = context.leafOperatorToFollowingWork.get(operator);
LOG.debug("Second pass. Leaf operator: "+operator
+" has common downstream work:"+followingWork);
// need to add this branch to the key + value info
assert operator instanceof ReduceSinkOperator
&& followingWork instanceof ReduceWork;
ReduceSinkOperator rs = (ReduceSinkOperator) operator;
ReduceWork rWork = (ReduceWork) followingWork;
GenMapRedUtils.setKeyAndValueDesc(rWork, rs);
// remember which parent belongs to which tag
rWork.getTagToInput().put(rs.getConf().getTag(), work.getName());
// remember the output name of the reduce sink
rs.getConf().setOutputName(rWork.getName());
if (!context.connectedReduceSinks.contains(rs)) {
// add dependency between the two work items
TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.SIMPLE_EDGE);
tezWork.connect(work, rWork, edgeProp);
context.connectedReduceSinks.add(rs);
}
} else {
LOG.debug("First pass. Leaf operator: "+operator);