process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
throws SemanticException {
OptimizeTezProcContext context = (OptimizeTezProcContext) procCtx;
JoinOperator joinOp = (JoinOperator) nd;
TezBucketJoinProcCtx tezBucketJoinProcCtx = new TezBucketJoinProcCtx(context.conf);
if (!context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN)) {
// we are just converting to a common merge join operator. The shuffle
// join in map-reduce case.
Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx);
if (retval == null) {
return retval;
} else {
int pos = 0; // it doesn't matter which position we use in this case.
convertJoinSMBJoin(joinOp, context, pos, 0, false, false);
return null;
}
}
// if we have traits, and table info is present in the traits, we know the
// exact number of buckets. Else choose the largest number of estimated
// reducers from the parent operators.
int numBuckets = -1;
int estimatedBuckets = -1;
if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)) {
for (Operator<? extends OperatorDesc>parentOp : joinOp.getParentOperators()) {
if (parentOp.getOpTraits().getNumBuckets() > 0) {
numBuckets = (numBuckets < parentOp.getOpTraits().getNumBuckets()) ?
parentOp.getOpTraits().getNumBuckets() : numBuckets;
}
if (parentOp instanceof ReduceSinkOperator) {
ReduceSinkOperator rs = (ReduceSinkOperator)parentOp;
estimatedBuckets = (estimatedBuckets < rs.getConf().getNumReducers()) ?
rs.getConf().getNumReducers() : estimatedBuckets;
}
}
if (numBuckets <= 0) {
numBuckets = estimatedBuckets;
if (numBuckets <= 0) {
numBuckets = 1;
}
}
} else {
numBuckets = 1;
}
LOG.info("Estimated number of buckets " + numBuckets);
int mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, numBuckets);
if (mapJoinConversionPos < 0) {
Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx);
if (retval == null) {
return retval;
} else {
// only case is full outer join with SMB enabled which is not possible. Convert to regular
// join.
convertJoinSMBJoin(joinOp, context, 0, 0, false, false);
return null;
}
}
if (numBuckets > 1) {
if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)) {
if (convertJoinBucketMapJoin(joinOp, context, mapJoinConversionPos, tezBucketJoinProcCtx)) {
return null;
}
}
}
LOG.info("Convert to non-bucketed map join");
// check if we can convert to map join no bucket scaling.
mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, 1);
if (mapJoinConversionPos < 0) {
// we are just converting to a common merge join operator. The shuffle
// join in map-reduce case.
int pos = 0; // it doesn't matter which position we use in this case.
convertJoinSMBJoin(joinOp, context, pos, 0, false, false);
return null;
}
MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversionPos);
// map join operator by default has no bucket cols
mapJoinOp.setOpTraits(new OpTraits(null, -1, null));
mapJoinOp.setStatistics(joinOp.getStatistics());
// propagate this change till the next RS
for (Operator<? extends OperatorDesc> childOp : mapJoinOp.getChildOperators()) {
setAllChildrenTraitsToNull(childOp);
}