private ConditionalTask processCurrentTask(MapRedTask currTask,
ConditionalTask conditionalTask, Context context)
throws SemanticException {
// whether it contains common join op; if contains, return this common join op
JoinOperator joinOp = getJoinOp(currTask);
if (joinOp == null) {
return null;
}
currTask.setTaskTag(Task.COMMON_JOIN);
MapredWork currWork = currTask.getWork();
// create conditional work list and task list
List<Serializable> listWorks = new ArrayList<Serializable>();
List<Task<? extends Serializable>> listTasks = new ArrayList<Task<? extends Serializable>>();
// create alias to task mapping and alias to input file mapping for resolver
HashMap<String, Task<? extends Serializable>> aliasToTask = new HashMap<String, Task<? extends Serializable>>();
HashMap<String, ArrayList<String>> pathToAliases = currTask.getWork().getPathToAliases();
// get parseCtx for this Join Operator
ParseContext parseCtx = physicalContext.getParseContext();
QBJoinTree joinTree = parseCtx.getJoinContext().get(joinOp);
// start to generate multiple map join tasks
JoinDesc joinDesc = joinOp.getConf();
Byte[] order = joinDesc.getTagOrder();
int numAliases = order.length;
long aliasTotalKnownInputSize = 0;
HashMap<String, Long> aliasToSize = new HashMap<String, Long>();
try {
// go over all the input paths, and calculate a known total size, known
// size for each input alias.
Utilities.getInputSummary(context, currWork, null).getLength();
// set alias to size mapping, this can be used to determine if one table
// is choosen as big table, what's the total size of left tables, which
// are going to be small tables.
for (Map.Entry<String, ArrayList<String>> entry : pathToAliases.entrySet()) {
String path = entry.getKey();
List<String> aliasList = entry.getValue();
ContentSummary cs = context.getCS(path);
if (cs != null) {
long size = cs.getLength();
for (String alias : aliasList) {
aliasTotalKnownInputSize += size;
Long es = aliasToSize.get(alias);
if(es == null) {
es = new Long(0);
}
es += size;
aliasToSize.put(alias, es);
}
}
}
HashSet<Integer> bigTableCandidates = MapJoinProcessor.getBigTableCandidates(joinDesc.getConds());
// no table could be the big table; there is no need to convert
if (bigTableCandidates == null) {
return null;
}
currWork.setOpParseCtxMap(parseCtx.getOpParseCtx());
currWork.setJoinTree(joinTree);
String xml = currWork.toXML();
String bigTableAlias = null;
long ThresholdOfSmallTblSizeSum = HiveConf.getLongVar(context.getConf(),
HiveConf.ConfVars.HIVESMALLTABLESFILESIZE);
for (int i = 0; i < numAliases; i++) {
// this table cannot be big table
if (!bigTableCandidates.contains(i)) {
continue;
}
// create map join task and set big table as i
// deep copy a new mapred work from xml
InputStream in = new ByteArrayInputStream(xml.getBytes("UTF-8"));
MapredWork newWork = Utilities.deserializeMapRedWork(in, physicalContext.getConf());
// create a mapred task for this work
MapRedTask newTask = (MapRedTask) TaskFactory.get(newWork, physicalContext
.getParseContext().getConf());
JoinOperator newJoinOp = getJoinOp(newTask);
// optimize this newWork and assume big table position is i
bigTableAlias = MapJoinProcessor.genMapJoinOpAndLocalWork(newWork, newJoinOp, i);
Long aliasKnownSize = aliasToSize.get(bigTableAlias);