public static void splitTasks(Operator<? extends Serializable> op,
Task<? extends Serializable> parentTask,
Task<? extends Serializable> childTask,
GenMRProcContext opProcCtx, boolean setReducer,
boolean local, int posn) throws SemanticException {
mapredWork plan = (mapredWork) childTask.getWork();
Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp();
ParseContext parseCtx = opProcCtx.getParseCtx();
parentTask.addDependentTask(childTask);
// Root Task cannot depend on any other task, therefore childTask cannot be a root Task
List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks();
if (rootTasks.contains(childTask))
rootTasks.remove(childTask);
// generate the temporary file
Context baseCtx = parseCtx.getContext();
String taskTmpDir = baseCtx.getMRTmpFileURI();
Operator<? extends Serializable> parent = op.getParentOperators().get(posn);
tableDesc tt_desc =
PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));
// Create a file sink operator for this file name
boolean compressIntermediate = parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE);
fileSinkDesc desc = new fileSinkDesc(taskTmpDir, tt_desc, compressIntermediate);
if (compressIntermediate) {
desc.setCompressCodec(parseCtx.getConf().getVar(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC));
desc.setCompressType(parseCtx.getConf().getVar(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE));
}
Operator<? extends Serializable> fs_op = putOpInsertMap(OperatorFactory.get(desc, parent.getSchema()), null, parseCtx);
// replace the reduce child with this operator
List<Operator<? extends Serializable>> childOpList = parent.getChildOperators();
for (int pos = 0; pos < childOpList.size(); pos++) {
if (childOpList.get(pos) == op) {
childOpList.set(pos, fs_op);
break;
}
}
List<Operator<? extends Serializable>> parentOpList = new ArrayList<Operator<? extends Serializable>>();
parentOpList.add(parent);
fs_op.setParentOperators(parentOpList);
// create a dummy tableScan operator on top of op
Operator<? extends Serializable> ts_op =
putOpInsertMap(OperatorFactory.get(tableScanDesc.class, parent.getSchema()), null, parseCtx);
childOpList = new ArrayList<Operator<? extends Serializable>>();
childOpList.add(op);
ts_op.setChildOperators(childOpList);
op.getParentOperators().set(posn, ts_op);
Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx();
mapCurrCtx.put(ts_op, new GenMapRedCtx(childTask, null, null));
String streamDesc = taskTmpDir;
mapredWork cplan = (mapredWork) childTask.getWork();
if (setReducer) {
Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
if (reducer.getClass() == JoinOperator.class) {
String origStreamDesc;
streamDesc = "$INTNAME";
origStreamDesc = streamDesc;
int pos = 0;
while (cplan.getAliasToWork().get(streamDesc) != null)
streamDesc = origStreamDesc.concat(String.valueOf(++pos));
}
// TODO: Allocate work to remove the temporary files and make that
// dependent on the redTask
if (reducer.getClass() == JoinOperator.class)
cplan.setNeedsTagging(true);
}
// Add the path to alias mapping
setTaskPlan(taskTmpDir, streamDesc, ts_op, cplan, local, tt_desc);