*
*/
public static class LocalMapJoinProcessor implements NodeProcessor {
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
throws SemanticException {
LocalMapJoinProcCtx context = (LocalMapJoinProcCtx) ctx;
if (!nd.getName().equals("MAPJOIN")) {
return null;
}
MapJoinOperator mapJoinOp = (MapJoinOperator) nd;
try {
hasGroupBy(mapJoinOp, context);
} catch (Exception e) {
e.printStackTrace();
}
// mapjoin should not affected by join reordering
mapJoinOp.getConf().resetOrder();
HashTableSinkDesc hashTableSinkDesc = new HashTableSinkDesc(mapJoinOp.getConf());
HashTableSinkOperator hashTableSinkOp = (HashTableSinkOperator) OperatorFactory
.get(hashTableSinkDesc);
// set hashtable memory usage
float hashtableMemoryUsage;
if (context.isFollowedByGroupBy()) {
hashtableMemoryUsage = context.getParseCtx().getConf().getFloatVar(
HiveConf.ConfVars.HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE);
} else {
hashtableMemoryUsage = context.getParseCtx().getConf().getFloatVar(
HiveConf.ConfVars.HIVEHASHTABLEMAXMEMORYUSAGE);
}
hashTableSinkOp.getConf().setHashtableMemoryUsage(hashtableMemoryUsage);
// get the last operator for processing big tables
int bigTable = mapJoinOp.getConf().getPosBigTable();
// the parent ops for hashTableSinkOp
List<Operator<? extends OperatorDesc>> smallTablesParentOp =
new ArrayList<Operator<? extends OperatorDesc>>();
List<Operator<? extends OperatorDesc>> dummyOperators =
new ArrayList<Operator<? extends OperatorDesc>>();
// get all parents
List<Operator<? extends OperatorDesc>> parentsOp = mapJoinOp.getParentOperators();
for (int i = 0; i < parentsOp.size(); i++) {
if (i == bigTable) {
smallTablesParentOp.add(null);
continue;
}
Operator<? extends OperatorDesc> parent = parentsOp.get(i);
// let hashtable Op be the child of this parent
parent.replaceChild(mapJoinOp, hashTableSinkOp);
// keep the parent id correct
smallTablesParentOp.add(parent);
// create an new operator: HashTable DummyOpeator, which share the table desc
HashTableDummyDesc desc = new HashTableDummyDesc();
HashTableDummyOperator dummyOp = (HashTableDummyOperator) OperatorFactory.get(desc);
TableDesc tbl;
if (parent.getSchema() == null) {
if (parent instanceof TableScanOperator) {
tbl = ((TableScanOperator) parent).getTableDesc();
} else {
throw new SemanticException();
}
} else {
// get parent schema
RowSchema rowSchema = parent.getSchema();
tbl = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(
rowSchema, ""));
}
dummyOp.getConf().setTbl(tbl);
// let the dummy op be the parent of mapjoin op
mapJoinOp.replaceParent(parent, dummyOp);
List<Operator<? extends OperatorDesc>> dummyChildren =
new ArrayList<Operator<? extends OperatorDesc>>();
dummyChildren.add(mapJoinOp);
dummyOp.setChildOperators(dummyChildren);
// add this dummy op to the dummp operator list
dummyOperators.add(dummyOp);
}
hashTableSinkOp.setParentOperators(smallTablesParentOp);
for (Operator<? extends OperatorDesc> op : dummyOperators) {
context.addDummyParentOp(op);
}
return null;
}