*/
public static void addStatsTask(FileSinkOperator nd, MoveTask mvTask,
Task<? extends Serializable> currTask, HiveConf hconf) {
MoveWork mvWork = mvTask.getWork();
StatsWork statsWork = null;
if (mvWork.getLoadTableWork() != null) {
statsWork = new StatsWork(mvWork.getLoadTableWork());
} else if (mvWork.getLoadFileWork() != null) {
statsWork = new StatsWork(mvWork.getLoadFileWork());
}
assert statsWork != null : "Error when genereting StatsTask";
statsWork.setSourceTask(currTask);
statsWork.setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE));
if (currTask.getWork() instanceof MapredWork) {
MapredWork mrWork = (MapredWork) currTask.getWork();
mrWork.getMapWork().setGatheringStats(true);
if (mrWork.getReduceWork() != null) {
mrWork.getReduceWork().setGatheringStats(true);
}
} else {
TezWork work = (TezWork) currTask.getWork();
for (BaseWork w: work.getAllWork()) {
w.setGatheringStats(true);
}
}
// AggKey in StatsWork is used for stats aggregation while StatsAggPrefix
// in FileSinkDesc is used for stats publishing. They should be consistent.
statsWork.setAggKey(nd.getConf().getStatsAggPrefix());
Task<? extends Serializable> statsTask = TaskFactory.get(statsWork, hconf);
// mark the MapredWork and FileSinkOperator for gathering stats
nd.getConf().setGatherStats(true);
nd.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE));