if (!context.currentUnionOperators.isEmpty()) {
// if there are union all operators we need to add the work to the set
// of union operators.
UnionWork unionWork;
if (context.unionWorkMap.containsKey(operator)) {
// we've seen this terminal before and have created a union work object.
// just need to add this work to it. There will be no children of this one
// since we've passed this operator before.
assert operator.getChildOperators().isEmpty();
unionWork = (UnionWork) context.unionWorkMap.get(operator);
} else {
// first time through. we need to create a union work object and add this
// work to it. Subsequent work should reference the union and not the actual
// work.
unionWork = utils.createUnionWork(context, operator, tezWork);
// finally hook everything up
LOG.debug("Connecting union work ("+unionWork+") with work ("+work+")");
TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.CONTAINS);
tezWork.connect(unionWork, work, edgeProp);
work = unionWork;
// This is where we cut the tree as described above. We also remember that
// we might have to connect parent work with this work later.
boolean removeParents = false;
for (Operator<?> parent: new ArrayList<Operator<?>>(root.getParentOperators())) {
removeParents = true;
context.leafOperatorToFollowingWork.put(parent, work);
LOG.debug("Removing " + parent + " as parent from " + root);
if (removeParents) {
for (Operator<?> parent : new ArrayList<Operator<?>>(root.getParentOperators())) {
// We're scanning a tree from roots to leaf (this is not technically
// correct, demux and mux operators might form a diamond shape, but
// we will only scan one path and ignore the others, because the
// diamond shape is always contained in a single vertex). The scan
// is depth first and because we remove parents when we pack a pipeline
// into a vertex we will never visit any node twice. But because of that
// we might have a situation where we need to connect 'work' that comes after
// the 'work' we're currently looking at.
// Also note: the concept of leaf and root is reversed in hive for historical
// reasons. Roots are data sources, leaves are data sinks. I know.
if (context.leafOperatorToFollowingWork.containsKey(operator)) {
BaseWork followingWork = context.leafOperatorToFollowingWork.get(operator);
long bytesPerReducer = context.conf.getLongVar(HiveConf.ConfVars.BYTESPERREDUCER);
LOG.debug("Second pass. Leaf operator: "+operator
+" has common downstream work:"+followingWork);
if (operator instanceof DummyStoreOperator) {
// this is the small table side.
assert (followingWork instanceof MergeJoinWork);
MergeJoinWork mergeJoinWork = (MergeJoinWork) followingWork;
CommonMergeJoinOperator mergeJoinOp = mergeJoinWork.getMergeJoinOperator();
mergeJoinWork.addMergedWork(null, work);
tezWork.setVertexType(mergeJoinWork, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES);
for (BaseWork parentWork : tezWork.getParents(work)) {
TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, work);
tezWork.disconnect(parentWork, work);
tezWork.connect(parentWork, mergeJoinWork, edgeProp);
work = mergeJoinWork;
} else {
// need to add this branch to the key + value info
assert operator instanceof ReduceSinkOperator
&& ((followingWork instanceof ReduceWork) || (followingWork instanceof MergeJoinWork)
|| followingWork instanceof UnionWork);
ReduceSinkOperator rs = (ReduceSinkOperator) operator;
ReduceWork rWork = null;
if (followingWork instanceof MergeJoinWork) {
MergeJoinWork mergeJoinWork = (MergeJoinWork) followingWork;
rWork = (ReduceWork) mergeJoinWork.getMainWork();
} else if (followingWork instanceof UnionWork) {
// this can only be possible if there is merge work followed by the union
UnionWork unionWork = (UnionWork) followingWork;
int index = getMergeIndex(tezWork, unionWork, rs);
// guaranteed to be instance of MergeJoinWork if index is valid
BaseWork baseWork = tezWork.getChildren(unionWork).get(index);
if (baseWork instanceof MergeJoinWork) {
MergeJoinWork mergeJoinWork = (MergeJoinWork) baseWork;