if (checkMapJoin(mapJoinPos, condns) < 0) {
throw new SemanticException(ErrorMsg.NO_OUTER_MAPJOIN.getMsg());
}
}
RowResolver outputRS = opParseCtxMap.get(op).getRowResolver();
Map<Byte, List<ExprNodeDesc>> keyExprMap = new HashMap<Byte, List<ExprNodeDesc>>();
// Walk over all the sources (which are guaranteed to be reduce sink
// operators).
// The join outputs a concatenation of all the inputs.
QBJoinTree leftSrc = joinTree.getJoinSrc();
List<Operator<? extends OperatorDesc>> parentOps = op.getParentOperators();
List<Operator<? extends OperatorDesc>> newParentOps =
new ArrayList<Operator<? extends OperatorDesc>>();
List<Operator<? extends OperatorDesc>> oldReduceSinkParentOps =
new ArrayList<Operator<? extends OperatorDesc>>();
// found a source which is not to be stored in memory
if (leftSrc != null) {
// assert mapJoinPos == 0;
Operator<? extends OperatorDesc> parentOp = parentOps.get(0);
assert parentOp.getParentOperators().size() == 1;
Operator<? extends OperatorDesc> grandParentOp =
parentOp.getParentOperators().get(0);
oldReduceSinkParentOps.add(parentOp);
newParentOps.add(grandParentOp);
}
byte pos = 0;
// Remove parent reduce-sink operators
for (String src : joinTree.getBaseSrc()) {
if (src != null) {
Operator<? extends OperatorDesc> parentOp = parentOps.get(pos);
assert parentOp.getParentOperators().size() == 1;
Operator<? extends OperatorDesc> grandParentOp =
parentOp.getParentOperators().get(0);
oldReduceSinkParentOps.add(parentOp);
newParentOps.add(grandParentOp);
}
pos++;
}
// get the join keys from old parent ReduceSink operators
for (pos = 0; pos < newParentOps.size(); pos++) {
ReduceSinkOperator oldPar = (ReduceSinkOperator) oldReduceSinkParentOps.get(pos);
ReduceSinkDesc rsconf = oldPar.getConf();
List<ExprNodeDesc> keys = rsconf.getKeyCols();
keyExprMap.put(pos, keys);
}
// removing RS, only ExprNodeDesc is changed (key/value/filter exprs and colExprMap)
// others (output column-name, RR, schema) remain intact
Map<String, ExprNodeDesc> colExprMap = op.getColumnExprMap();
List<String> outputColumnNames = op.getConf().getOutputColumnNames();
List<ColumnInfo> schema = new ArrayList<ColumnInfo>(op.getSchema().getSignature());
Map<Byte, List<ExprNodeDesc>> valueExprs = op.getConf().getExprs();
Map<Byte, List<ExprNodeDesc>> newValueExprs = new HashMap<Byte, List<ExprNodeDesc>>();
for (Map.Entry<Byte, List<ExprNodeDesc>> entry : valueExprs.entrySet()) {
byte tag = entry.getKey();
Operator<?> terminal = oldReduceSinkParentOps.get(tag);
List<ExprNodeDesc> values = entry.getValue();
List<ExprNodeDesc> newValues = ExprNodeDescUtils.backtrack(values, op, terminal);
newValueExprs.put(tag, newValues);
for (int i = 0; i < schema.size(); i++) {
ColumnInfo column = schema.get(i);
if (column == null) {
continue;
}
ExprNodeDesc expr = colExprMap.get(column.getInternalName());
int index = ExprNodeDescUtils.indexOf(expr, values);
if (index >= 0) {
colExprMap.put(column.getInternalName(), newValues.get(index));
schema.set(i, null);
}
}
}
Map<Byte, List<ExprNodeDesc>> filters = desc.getFilters();
Map<Byte, List<ExprNodeDesc>> newFilters = new HashMap<Byte, List<ExprNodeDesc>>();
for (Map.Entry<Byte, List<ExprNodeDesc>> entry : filters.entrySet()) {
byte srcTag = entry.getKey();
List<ExprNodeDesc> filter = entry.getValue();
Operator<?> terminal = oldReduceSinkParentOps.get(srcTag);
newFilters.put(srcTag, ExprNodeDescUtils.backtrack(filter, op, terminal));
}
desc.setFilters(filters = newFilters);
// remove old parents
for (pos = 0; pos < newParentOps.size(); pos++) {
newParentOps.get(pos).removeChild(oldReduceSinkParentOps.get(pos));
}
JoinCondDesc[] joinCondns = op.getConf().getConds();
Operator[] newPar = new Operator[newParentOps.size()];
pos = 0;
for (Operator<? extends OperatorDesc> o : newParentOps) {
newPar[pos++] = o;
}
List<ExprNodeDesc> keyCols = keyExprMap.get(Byte.valueOf((byte) 0));
StringBuilder keyOrder = new StringBuilder();
for (int i = 0; i < keyCols.size(); i++) {
keyOrder.append("+");
}
TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(PlanUtils
.getFieldSchemasFromColumnList(keyCols, MAPJOINKEY_FIELDPREFIX));
List<TableDesc> valueTableDescs = new ArrayList<TableDesc>();
List<TableDesc> valueFiltedTableDescs = new ArrayList<TableDesc>();
int[][] filterMap = desc.getFilterMap();
for (pos = 0; pos < newParentOps.size(); pos++) {
List<ExprNodeDesc> valueCols = newValueExprs.get(pos);
int length = valueCols.size();
List<ExprNodeDesc> valueFilteredCols = new ArrayList<ExprNodeDesc>(length);
// deep copy expr node desc
for (int i = 0; i < length; i++) {
valueFilteredCols.add(valueCols.get(i).clone());
}
if (filterMap != null && filterMap[pos] != null && pos != mapJoinPos) {
ExprNodeColumnDesc isFilterDesc = new ExprNodeColumnDesc(TypeInfoFactory
.getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME), "filter", "filter", false);
valueFilteredCols.add(isFilterDesc);
}
keyOrder = new StringBuilder();
for (int i = 0; i < valueCols.size(); i++) {
keyOrder.append("+");
}
TableDesc valueTableDesc = PlanUtils.getMapJoinValueTableDesc(PlanUtils
.getFieldSchemasFromColumnList(valueCols, "mapjoinvalue"));
TableDesc valueFilteredTableDesc = PlanUtils.getMapJoinValueTableDesc(PlanUtils
.getFieldSchemasFromColumnList(valueFilteredCols, "mapjoinvalue"));
valueTableDescs.add(valueTableDesc);
valueFiltedTableDescs.add(valueFilteredTableDesc);
}
String dumpFilePrefix = "";
if( joinTree.getMapAliases() != null ) {
for(String mapAlias : joinTree.getMapAliases()) {
dumpFilePrefix = dumpFilePrefix + mapAlias;
}
dumpFilePrefix = dumpFilePrefix+"-"+PlanUtils.getCountForMapJoinDumpFilePrefix();
} else {
dumpFilePrefix = "mapfile"+PlanUtils.getCountForMapJoinDumpFilePrefix();
}
MapJoinDesc mapJoinDescriptor = new MapJoinDesc(keyExprMap, keyTableDesc, newValueExprs,
valueTableDescs, valueFiltedTableDescs, outputColumnNames, mapJoinPos, joinCondns,
filters, op.getConf().getNoOuterJoin(), dumpFilePrefix);
mapJoinDescriptor.setTagOrder(tagOrder);
mapJoinDescriptor.setNullSafes(desc.getNullSafes());
mapJoinDescriptor.setFilterMap(desc.getFilterMap());
MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(
mapJoinDescriptor, new RowSchema(outputRS.getColumnInfos()), newPar);
OpParseContext ctx = new OpParseContext(outputRS);
opParseCtxMap.put(mapJoinOp, ctx);
mapJoinOp.getConf().setReversedExprs(op.getConf().getReversedExprs());