ParseContext parseCtx = ctx.getParseCtx();
boolean chDir = false;
Task<? extends Serializable> currTask = ctx.getCurrTask();
ctx.addRootIfPossible(currTask);
FileSinkOperator fsOp = (FileSinkOperator) nd;
boolean isInsertTable = // is INSERT OVERWRITE TABLE
fsOp.getConf().getTableInfo().getTableName() != null &&
parseCtx.getQB().getParseInfo().isInsertToTable();
HiveConf hconf = parseCtx.getConf();
// Mark this task as a final map reduce task (ignoring the optional merge task)
((MapredWork)currTask.getWork()).setFinalMapRed(true);
// If this file sink desc has been processed due to a linked file sink desc,
// use that task
Map<FileSinkDesc, Task<? extends Serializable>> fileSinkDescs = ctx.getLinkedFileDescTasks();
if (fileSinkDescs != null) {
Task<? extends Serializable> childTask = fileSinkDescs.get(fsOp.getConf());
processLinkedFileDesc(ctx, childTask);
return true;
}
// Has the user enabled merging of files for map-only jobs or for all jobs
if ((ctx.getMvTask() != null) && (!ctx.getMvTask().isEmpty())) {
List<Task<MoveWork>> mvTasks = ctx.getMvTask();
// In case of unions or map-joins, it is possible that the file has
// already been seen.
// So, no need to attempt to merge the files again.
if ((ctx.getSeenFileSinkOps() == null)
|| (!ctx.getSeenFileSinkOps().contains(nd))) {
// no need of merging if the move is to a local file system
MoveTask mvTask = (MoveTask) findMoveTask(mvTasks, fsOp);
if (mvTask != null && isInsertTable && hconf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER)) {
addStatsTask(fsOp, mvTask, currTask, parseCtx.getConf());
}
if ((mvTask != null) && !mvTask.isLocal() && fsOp.getConf().canBeMerged()) {
if (fsOp.getConf().isLinkedFileSink()) {
// If the user has HIVEMERGEMAPREDFILES set to false, the idea was the
// number of reducers are few, so the number of files anyway are small.
// However, with this optimization, we are increasing the number of files
// possibly by a big margin. So, merge aggresively.
if (hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) ||
hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES)) {
chDir = true;
}
} else {
// There are separate configuration parameters to control whether to
// merge for a map-only job
// or for a map-reduce job
MapredWork currWork = (MapredWork) currTask.getWork();
boolean mergeMapOnly =
hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) && currWork.getReduceWork() == null;
boolean mergeMapRed =
hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES) &&
currWork.getReduceWork() != null;
if (mergeMapOnly || mergeMapRed) {
chDir = true;
}
}
}
}
}
String finalName = processFS(fsOp, stack, opProcCtx, chDir);
if (chDir) {
// Merge the files in the destination table/partitions by creating Map-only merge job
// If underlying data is RCFile a RCFileBlockMerge task would be created.
LOG.info("using CombineHiveInputformat for the merge job");
createMRWorkForMergingFiles(fsOp, ctx, finalName);
}
FileSinkDesc fileSinkDesc = fsOp.getConf();
if (fileSinkDesc.isLinkedFileSink()) {
Map<FileSinkDesc, Task<? extends Serializable>> linkedFileDescTasks =
ctx.getLinkedFileDescTasks();
if (linkedFileDescTasks == null) {
linkedFileDescTasks = new HashMap<FileSinkDesc, Task<? extends Serializable>>();