* enhanced to keep the big table bucket -> small table buckets mapping.
*/
protected void convertMapJoinToBucketMapJoin(
MapJoinOperator mapJoinOp,
BucketJoinProcCtx context) throws SemanticException {
MapJoinDesc desc = mapJoinOp.getConf();
Map<String, Map<String, List<String>>> aliasBucketFileNameMapping =
new LinkedHashMap<String, Map<String, List<String>>>();
Map<String, List<Integer>> tblAliasToNumberOfBucketsInEachPartition =
context.getTblAliasToNumberOfBucketsInEachPartition();
Map<String, List<List<String>>> tblAliasToBucketedFilePathsInEachPartition =
context.getTblAliasToBucketedFilePathsInEachPartition();
Map<Partition, List<String>> bigTblPartsToBucketFileNames =
context.getBigTblPartsToBucketFileNames();
Map<Partition, Integer> bigTblPartsToBucketNumber =
context.getBigTblPartsToBucketNumber();
List<String> joinAliases = context.getJoinAliases();
String baseBigAlias = context.getBaseBigAlias();
// sort bucket names for the big table
for (List<String> partBucketNames : bigTblPartsToBucketFileNames.values()) {
Collections.sort(partBucketNames);
}
// go through all small tables and get the mapping from bucket file name
// in the big table to bucket file names in small tables.
for (int j = 0; j < joinAliases.size(); j++) {
String alias = joinAliases.get(j);
if (alias.equals(baseBigAlias)) {
continue;
}
for (List<String> names : tblAliasToBucketedFilePathsInEachPartition.get(alias)) {
Collections.sort(names);
}
List<Integer> smallTblBucketNums = tblAliasToNumberOfBucketsInEachPartition.get(alias);
List<List<String>> smallTblFilesList = tblAliasToBucketedFilePathsInEachPartition.get(alias);
Map<String, List<String>> mappingBigTableBucketFileNameToSmallTableBucketFileNames =
new LinkedHashMap<String, List<String>>();
aliasBucketFileNameMapping.put(alias,
mappingBigTableBucketFileNameToSmallTableBucketFileNames);
// for each bucket file in big table, get the corresponding bucket file
// name in the small table.
// more than 1 partition in the big table, do the mapping for each partition
Iterator<Entry<Partition, List<String>>> bigTblPartToBucketNames =
bigTblPartsToBucketFileNames.entrySet().iterator();
Iterator<Entry<Partition, Integer>> bigTblPartToBucketNum = bigTblPartsToBucketNumber
.entrySet().iterator();
while (bigTblPartToBucketNames.hasNext()) {
assert bigTblPartToBucketNum.hasNext();
int bigTblBucketNum = bigTblPartToBucketNum.next().getValue();
List<String> bigTblBucketNameList = bigTblPartToBucketNames.next().getValue();
fillMappingBigTableBucketFileNameToSmallTableBucketFileNames(smallTblBucketNums,
smallTblFilesList,
mappingBigTableBucketFileNameToSmallTableBucketFileNames, bigTblBucketNum,
bigTblBucketNameList,
desc.getBigTableBucketNumMapping());
}
}
desc.setAliasBucketFileNameMapping(aliasBucketFileNameMapping);
desc.setBigTableAlias(baseBigAlias);
boolean bigTablePartitioned = context.isBigTablePartitioned();
if (bigTablePartitioned) {
desc.setBigTablePartSpecToFileMapping(convert(bigTblPartsToBucketFileNames));
}
// successfully convert to bucket map join
desc.setBucketMapJoin(true);
}