}
public static MapJoinDesc getMapJoinDesc(HiveConf hconf,
LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtxMap,
JoinOperator op, QBJoinTree joinTree, int mapJoinPos, boolean noCheckOuterJoin) throws SemanticException {
JoinDesc desc = op.getConf();
JoinCondDesc[] condns = desc.getConds();
Byte[] tagOrder = desc.getTagOrder();
// outer join cannot be performed on a table which is being cached
if (!noCheckOuterJoin) {
if (checkMapJoin(mapJoinPos, condns) < 0) {
throw new SemanticException(ErrorMsg.NO_OUTER_MAPJOIN.getMsg());
}
}
// Walk over all the sources (which are guaranteed to be reduce sink
// operators).
// The join outputs a concatenation of all the inputs.
QBJoinTree leftSrc = joinTree.getJoinSrc();
List<ReduceSinkOperator> oldReduceSinkParentOps =
new ArrayList<ReduceSinkOperator>(op.getNumParent());
if (leftSrc != null) {
// assert mapJoinPos == 0;
Operator<? extends OperatorDesc> parentOp = op.getParentOperators().get(0);
assert parentOp.getParentOperators().size() == 1;
oldReduceSinkParentOps.add((ReduceSinkOperator) parentOp);
}
byte pos = 0;
for (String src : joinTree.getBaseSrc()) {
if (src != null) {
Operator<? extends OperatorDesc> parentOp = op.getParentOperators().get(pos);
assert parentOp.getParentOperators().size() == 1;
oldReduceSinkParentOps.add((ReduceSinkOperator) parentOp);
}
pos++;
}
Map<String, ExprNodeDesc> colExprMap = op.getColumnExprMap();
List<ColumnInfo> schema = new ArrayList<ColumnInfo>(op.getSchema().getSignature());
Map<Byte, List<ExprNodeDesc>> valueExprs = op.getConf().getExprs();
Map<Byte, List<ExprNodeDesc>> newValueExprs = new HashMap<Byte, List<ExprNodeDesc>>();
for (Map.Entry<Byte, List<ExprNodeDesc>> entry : valueExprs.entrySet()) {
byte tag = entry.getKey();
Operator<?> terminal = oldReduceSinkParentOps.get(tag);
List<ExprNodeDesc> values = entry.getValue();
List<ExprNodeDesc> newValues = ExprNodeDescUtils.backtrack(values, op, terminal);
newValueExprs.put(tag, newValues);
for (int i = 0; i < schema.size(); i++) {
ColumnInfo column = schema.get(i);
if (column == null) {
continue;
}
ExprNodeDesc expr = colExprMap.get(column.getInternalName());
int index = ExprNodeDescUtils.indexOf(expr, values);
if (index >= 0) {
colExprMap.put(column.getInternalName(), newValues.get(index));
schema.set(i, null);
}
}
}
// rewrite value index for mapjoin
Map<Byte, int[]> valueIndices = new HashMap<Byte, int[]>();
// get the join keys from old parent ReduceSink operators
Map<Byte, List<ExprNodeDesc>> keyExprMap = new HashMap<Byte, List<ExprNodeDesc>>();
// construct valueTableDescs and valueFilteredTableDescs
List<TableDesc> valueTableDescs = new ArrayList<TableDesc>();
List<TableDesc> valueFilteredTableDescs = new ArrayList<TableDesc>();
int[][] filterMap = desc.getFilterMap();
for (pos = 0; pos < op.getParentOperators().size(); pos++) {
ReduceSinkOperator inputRS = oldReduceSinkParentOps.get(pos);
List<ExprNodeDesc> keyCols = inputRS.getConf().getKeyCols();
List<ExprNodeDesc> valueCols = newValueExprs.get(pos);
if (pos != mapJoinPos) {
// remove values in key exprs for value table schema
// value expression for hashsink will be modified in
// LocalMapJoinProcessor
int[] valueIndex = new int[valueCols.size()];
List<ExprNodeDesc> valueColsInValueExpr = new ArrayList<ExprNodeDesc>();
for (int i = 0; i < valueIndex.length; i++) {
ExprNodeDesc expr = valueCols.get(i);
int kindex = ExprNodeDescUtils.indexOf(expr, keyCols);
if (kindex >= 0) {
valueIndex[i] = kindex;
} else {
valueIndex[i] = -valueColsInValueExpr.size() - 1;
valueColsInValueExpr.add(expr);
}
}
if (needValueIndex(valueIndex)) {
valueIndices.put(pos, valueIndex);
}
valueCols = valueColsInValueExpr;
}
// deep copy expr node desc
List<ExprNodeDesc> valueFilteredCols = ExprNodeDescUtils.clone(valueCols);
if (filterMap != null && filterMap[pos] != null && pos != mapJoinPos) {
ExprNodeColumnDesc isFilterDesc =
new ExprNodeColumnDesc(
TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME), "filter",
"filter", false);
valueFilteredCols.add(isFilterDesc);
}
TableDesc valueTableDesc =
PlanUtils.getMapJoinValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(valueCols,
"mapjoinvalue"));
TableDesc valueFilteredTableDesc =
PlanUtils.getMapJoinValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(
valueFilteredCols, "mapjoinvalue"));
valueTableDescs.add(valueTableDesc);
valueFilteredTableDescs.add(valueFilteredTableDesc);
keyExprMap.put(pos, keyCols);
}
Map<Byte, List<ExprNodeDesc>> filters = desc.getFilters();
Map<Byte, List<ExprNodeDesc>> newFilters = new HashMap<Byte, List<ExprNodeDesc>>();
for (Map.Entry<Byte, List<ExprNodeDesc>> entry : filters.entrySet()) {
byte srcTag = entry.getKey();
List<ExprNodeDesc> filter = entry.getValue();
Operator<?> terminal = oldReduceSinkParentOps.get(srcTag);
newFilters.put(srcTag, ExprNodeDescUtils.backtrack(filter, op, terminal));
}
desc.setFilters(filters = newFilters);
// create dumpfile prefix needed to create descriptor
String dumpFilePrefix = "";
if (joinTree.getMapAliases() != null) {
for (String mapAlias : joinTree.getMapAliases()) {
dumpFilePrefix = dumpFilePrefix + mapAlias;
}
dumpFilePrefix = dumpFilePrefix + "-" + PlanUtils.getCountForMapJoinDumpFilePrefix();
} else {
dumpFilePrefix = "mapfile" + PlanUtils.getCountForMapJoinDumpFilePrefix();
}
List<ExprNodeDesc> keyCols = keyExprMap.get((byte) mapJoinPos);
List<String> outputColumnNames = op.getConf().getOutputColumnNames();
TableDesc keyTableDesc =
PlanUtils.getMapJoinKeyTableDesc(hconf,
PlanUtils.getFieldSchemasFromColumnList(keyCols, MAPJOINKEY_FIELDPREFIX));
JoinCondDesc[] joinCondns = op.getConf().getConds();
MapJoinDesc mapJoinDescriptor =
new MapJoinDesc(keyExprMap, keyTableDesc, newValueExprs, valueTableDescs,
valueFilteredTableDescs, outputColumnNames, mapJoinPos, joinCondns, filters, op
.getConf().getNoOuterJoin(), dumpFilePrefix);
mapJoinDescriptor.setStatistics(op.getConf().getStatistics());
mapJoinDescriptor.setTagOrder(tagOrder);
mapJoinDescriptor.setNullSafes(desc.getNullSafes());
mapJoinDescriptor.setFilterMap(desc.getFilterMap());
if (!valueIndices.isEmpty()) {
mapJoinDescriptor.setValueIndices(valueIndices);
}
return mapJoinDescriptor;