// keep the small table alias to avoid concurrent modification exception
ArrayList<String> smallTableAliasList = new ArrayList<String>();
String bigTableAlias = null;
// create a new MapredLocalWork
MapredLocalWork newLocalWork = new MapredLocalWork(
new LinkedHashMap<String, Operator<? extends Serializable>>(),
new LinkedHashMap<String, FetchWork>());
for (Map.Entry<String, Operator<? extends Serializable>> entry : newWork.getAliasToWork()
.entrySet()) {
String alias = entry.getKey();
Operator<? extends Serializable> op = entry.getValue();
// if the table scan is for big table; then skip it
// tracing down the operator tree from the table scan operator
Operator<? extends Serializable> parentOp = op;
Operator<? extends Serializable> childOp = op.getChildOperators().get(0);
while ((childOp != null) && (!childOp.equals(mapJoinOp))) {
parentOp = childOp;
assert parentOp.getChildOperators().size() == 1;
childOp = parentOp.getChildOperators().get(0);
}
if (childOp == null) {
throw new SemanticException(
"Cannot find join op by tracing down the table scan operator tree");
}
// skip the big table pos
int i = childOp.getParentOperators().indexOf(parentOp);
if (i == bigTablePos) {
bigTableAlias = alias;
continue;
}
// set alias to work and put into smallTableAliasList
newLocalWork.getAliasToWork().put(alias, op);
smallTableAliasList.add(alias);
// get input path and remove this alias from pathToAlias
// because this file will be fetched by fetch operator
LinkedHashMap<String, ArrayList<String>> pathToAliases = newWork.getPathToAliases();
// keep record all the input path for this alias
HashSet<String> pathSet = new HashSet<String>();
HashSet<String> emptyPath = new HashSet<String>();
for (Map.Entry<String, ArrayList<String>> entry2 : pathToAliases.entrySet()) {
String path = entry2.getKey();
ArrayList<String> list = entry2.getValue();
if (list.contains(alias)) {
// add to path set
if (!pathSet.contains(path)) {
pathSet.add(path);
}
//remove this alias from the alias list
list.remove(alias);
if(list.size() == 0) {
emptyPath.add(path);
}
}
}
//remove the path, with which no alias associates
for (String path : emptyPath) {
pathToAliases.remove(path);
}
// create fetch work
FetchWork fetchWork = null;
List<String> partDir = new ArrayList<String>();
List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();
for (String tablePath : pathSet) {
PartitionDesc partitionDesc = newWork.getPathToPartitionInfo().get(tablePath);
// create fetchwork for non partitioned table
if (partitionDesc.getPartSpec() == null || partitionDesc.getPartSpec().size() == 0) {
fetchWork = new FetchWork(tablePath, partitionDesc.getTableDesc());
break;
}
// if table is partitioned,add partDir and partitionDesc
partDir.add(tablePath);
partDesc.add(partitionDesc);
}
// create fetchwork for partitioned table
if (fetchWork == null) {
fetchWork = new FetchWork(partDir, partDesc);
}
// set alias to fetch work
newLocalWork.getAliasToFetchWork().put(alias, fetchWork);
}
// remove small table ailias from aliasToWork;Avoid concurrent modification
for (String alias : smallTableAliasList) {
newWork.getAliasToWork().remove(alias);
}