public static MapJoinOperator convertMapJoin(
LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtxMap,
JoinOperator op, QBJoinTree joinTree, int mapJoinPos, boolean noCheckOuterJoin)
throws SemanticException {
// outer join cannot be performed on a table which is being cached
JoinDesc desc = op.getConf();
JoinCondDesc[] condns = desc.getConds();
Byte[] tagOrder = desc.getTagOrder();
if (!noCheckOuterJoin) {
checkMapJoin(mapJoinPos, condns);
}
RowResolver oldOutputRS = opParseCtxMap.get(op).getRowResolver();
RowResolver outputRS = new RowResolver();
ArrayList<String> outputColumnNames = new ArrayList<String>();
Map<Byte, List<ExprNodeDesc>> keyExprMap = new HashMap<Byte, List<ExprNodeDesc>>();
Map<Byte, List<ExprNodeDesc>> valueExprMap = new HashMap<Byte, List<ExprNodeDesc>>();
// Walk over all the sources (which are guaranteed to be reduce sink
// operators).
// The join outputs a concatenation of all the inputs.
QBJoinTree leftSrc = joinTree.getJoinSrc();
List<Operator<? extends OperatorDesc>> parentOps = op.getParentOperators();
List<Operator<? extends OperatorDesc>> newParentOps =
new ArrayList<Operator<? extends OperatorDesc>>();
List<Operator<? extends OperatorDesc>> oldReduceSinkParentOps =
new ArrayList<Operator<? extends OperatorDesc>>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
HashMap<Byte, HashMap<String, ExprNodeDesc>> columnTransfer =
new HashMap<Byte, HashMap<String, ExprNodeDesc>>();
// found a source which is not to be stored in memory
if (leftSrc != null) {
// assert mapJoinPos == 0;
Operator<? extends OperatorDesc> parentOp = parentOps.get(0);
assert parentOp.getParentOperators().size() == 1;
Operator<? extends OperatorDesc> grandParentOp =
parentOp.getParentOperators().get(0);
oldReduceSinkParentOps.add(parentOp);
grandParentOp.removeChild(parentOp);
newParentOps.add(grandParentOp);
}
int pos = 0;
// Remove parent reduce-sink operators
for (String src : joinTree.getBaseSrc()) {
if (src != null) {
Operator<? extends OperatorDesc> parentOp = parentOps.get(pos);
assert parentOp.getParentOperators().size() == 1;
Operator<? extends OperatorDesc> grandParentOp =
parentOp.getParentOperators().get(0);
grandParentOp.removeChild(parentOp);
oldReduceSinkParentOps.add(parentOp);
newParentOps.add(grandParentOp);
}
pos++;
}
// get the join keys from old parent ReduceSink operators
for (pos = 0; pos < newParentOps.size(); pos++) {
ReduceSinkOperator oldPar = (ReduceSinkOperator) oldReduceSinkParentOps.get(pos);
ReduceSinkDesc rsconf = oldPar.getConf();
Byte tag = (byte) rsconf.getTag();
List<ExprNodeDesc> keys = rsconf.getKeyCols();
keyExprMap.put(tag, keys);
// set column transfer
HashMap<String, ExprNodeDesc> map = (HashMap<String, ExprNodeDesc>) oldPar.getColumnExprMap();
columnTransfer.put(tag, map);
}
// create the map-join operator
for (pos = 0; pos < newParentOps.size(); pos++) {
RowResolver inputRS = opParseCtxMap.get(newParentOps.get(pos)).getRowResolver();
List<ExprNodeDesc> values = new ArrayList<ExprNodeDesc>();
Iterator<String> keysIter = inputRS.getTableNames().iterator();
while (keysIter.hasNext()) {
String key = keysIter.next();
HashMap<String, ColumnInfo> rrMap = inputRS.getFieldMap(key);
Iterator<String> fNamesIter = rrMap.keySet().iterator();
while (fNamesIter.hasNext()) {
String field = fNamesIter.next();
ColumnInfo valueInfo = inputRS.get(key, field);
ColumnInfo oldValueInfo = oldOutputRS.get(key, field);
if (oldValueInfo == null) {
continue;
}
String outputCol = oldValueInfo.getInternalName();
if (outputRS.get(key, field) == null) {
outputColumnNames.add(outputCol);
ExprNodeDesc colDesc = new ExprNodeColumnDesc(valueInfo.getType(), valueInfo
.getInternalName(), valueInfo.getTabAlias(), valueInfo.getIsVirtualCol());
values.add(colDesc);
outputRS.put(key, field, new ColumnInfo(outputCol, valueInfo.getType(), valueInfo
.getTabAlias(), valueInfo.getIsVirtualCol(), valueInfo.isHiddenVirtualCol()));
colExprMap.put(outputCol, colDesc);
}
}
}
valueExprMap.put(Byte.valueOf((byte) pos), values);
}
Map<Byte, List<ExprNodeDesc>> filters = desc.getFilters();
for (Map.Entry<Byte, List<ExprNodeDesc>> entry : filters.entrySet()) {
Byte srcAlias = entry.getKey();
List<ExprNodeDesc> columnDescList = entry.getValue();
for (ExprNodeDesc nodeExpr : columnDescList) {
ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) nodeExpr;
for (ExprNodeDesc childDesc : funcDesc.getChildExprs()) {
if (!(childDesc instanceof ExprNodeColumnDesc)) {
continue;
}
ExprNodeColumnDesc columnDesc = (ExprNodeColumnDesc) childDesc;
// reset columns
String column = columnDesc.getColumn();
String newColumn = null;
HashMap<String, ExprNodeDesc> map = columnTransfer.get(srcAlias);
ExprNodeColumnDesc tmpDesc = (ExprNodeColumnDesc) map.get(column);
if (tmpDesc != null) {
newColumn = tmpDesc.getColumn();
}
if (newColumn == null) {
throw new SemanticException("No Column name found in parent reduce sink op");
}
columnDesc.setColumn(newColumn);
}
}
}
JoinCondDesc[] joinCondns = op.getConf().getConds();
Operator[] newPar = new Operator[newParentOps.size()];
pos = 0;
for (Operator<? extends OperatorDesc> o : newParentOps) {
newPar[pos++] = o;
}
List<ExprNodeDesc> keyCols = keyExprMap.get(Byte.valueOf((byte) 0));
StringBuilder keyOrder = new StringBuilder();
for (int i = 0; i < keyCols.size(); i++) {
keyOrder.append("+");
}
TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(PlanUtils
.getFieldSchemasFromColumnList(keyCols, "mapjoinkey"));
List<TableDesc> valueTableDescs = new ArrayList<TableDesc>();
List<TableDesc> valueFiltedTableDescs = new ArrayList<TableDesc>();
int[][] filterMap = desc.getFilterMap();
for (pos = 0; pos < newParentOps.size(); pos++) {
List<ExprNodeDesc> valueCols = valueExprMap.get(Byte.valueOf((byte) pos));
int length = valueCols.size();
List<ExprNodeDesc> valueFilteredCols = new ArrayList<ExprNodeDesc>(length);
// deep copy expr node desc
for (int i = 0; i < length; i++) {
valueFilteredCols.add(valueCols.get(i).clone());
}
if (filterMap != null && filterMap[pos] != null && pos != mapJoinPos) {
ExprNodeColumnDesc isFilterDesc = new ExprNodeColumnDesc(TypeInfoFactory
.getPrimitiveTypeInfo(serdeConstants.TINYINT_TYPE_NAME), "filter", "filter", false);
valueFilteredCols.add(isFilterDesc);
}
keyOrder = new StringBuilder();
for (int i = 0; i < valueCols.size(); i++) {
keyOrder.append("+");
}
TableDesc valueTableDesc = PlanUtils.getMapJoinValueTableDesc(PlanUtils
.getFieldSchemasFromColumnList(valueCols, "mapjoinvalue"));
TableDesc valueFilteredTableDesc = PlanUtils.getMapJoinValueTableDesc(PlanUtils
.getFieldSchemasFromColumnList(valueFilteredCols, "mapjoinvalue"));
valueTableDescs.add(valueTableDesc);
valueFiltedTableDescs.add(valueFilteredTableDesc);
}
String dumpFilePrefix = "";
if( joinTree.getMapAliases() != null ) {
for(String mapAlias : joinTree.getMapAliases()) {
dumpFilePrefix = dumpFilePrefix + mapAlias;
}
dumpFilePrefix = dumpFilePrefix+"-"+PlanUtils.getCountForMapJoinDumpFilePrefix();
} else {
dumpFilePrefix = "mapfile"+PlanUtils.getCountForMapJoinDumpFilePrefix();
}
MapJoinDesc mapJoinDescriptor = new MapJoinDesc(keyExprMap, keyTableDesc, valueExprMap,
valueTableDescs, valueFiltedTableDescs, outputColumnNames, mapJoinPos, joinCondns,
filters, op.getConf().getNoOuterJoin(), dumpFilePrefix);
mapJoinDescriptor.setTagOrder(tagOrder);
mapJoinDescriptor.setNullSafes(desc.getNullSafes());
mapJoinDescriptor.setFilterMap(desc.getFilterMap());
MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(
mapJoinDescriptor, new RowSchema(outputRS.getColumnInfos()), newPar);
OpParseContext ctx = new OpParseContext(outputRS);