// let the job retry several times, which eventually lead to failure.
if (fatal) {
continue; // wait until rj.isComplete
}
Counters ctrs = th.getCounters();
if (fatal = checkFatalErrors(ctrs, errMsg)) {
console.printError("[Fatal Error] " + errMsg.toString() + ". Killing the job.");
rj.killJob();
continue;
}
errMsg.setLength(0);
updateCounters(ctrs, rj);
// Prepare data for Client Stat Publishers (if any present) and execute them
if (clientStatPublishers.size() > 0 && ctrs != null) {
Map<String, Double> exctractedCounters = extractAllCounterValues(ctrs);
for (ClientStatsPublisher clientStatPublisher : clientStatPublishers) {
try {
clientStatPublisher.run(exctractedCounters, rj.getID().toString());
} catch (RuntimeException runtimeException) {
LOG.error("Exception " + runtimeException.getClass().getCanonicalName()
+ " thrown when running clientStatsPublishers. The stack trace is: ",
runtimeException);
}
}
}
String report = " " + getId() + " map = " + mapProgress + "%, reduce = " + reduceProgress
+ "%";
if (!report.equals(lastReport)
|| System.currentTimeMillis() >= reportTime + maxReportInterval) {
// find out CPU msecs
// In the case that we can't find out this number, we just skip the step to print
// it out.
if (ctrs != null) {
Counter counterCpuMsec = ctrs.findCounter("org.apache.hadoop.mapred.Task$Counter",
"CPU_MILLISECONDS");
if (counterCpuMsec != null) {
long newCpuMSec = counterCpuMsec.getValue();
if (newCpuMSec > 0) {
cpuMsec = newCpuMSec;
report += ", Cumulative CPU "
+ (cpuMsec / 1000D) + " sec";
}
}
}
// write out serialized plan with counters to log file
// LOG.info(queryPlan);
String output = dateFormat.format(Calendar.getInstance().getTime()) + report;
SessionState ss = SessionState.get();
if (ss != null) {
ss.getHiveHistory().setTaskCounters(SessionState.get().getQueryId(), getId(), ctrs);
ss.getHiveHistory().setTaskProperty(SessionState.get().getQueryId(), getId(),
Keys.TASK_HADOOP_PROGRESS, output);
ss.getHiveHistory().progressTask(SessionState.get().getQueryId(), this.task);
this.callBackObj.logPlanProgress(ss);
}
console.printInfo(output);
lastReport = report;
reportTime = System.currentTimeMillis();
}
}
if (cpuMsec > 0) {
console.printInfo("MapReduce Total cumulative CPU time: "
+ Utilities.formatMsecToStr(cpuMsec));
}
boolean success;
Counters ctrs = th.getCounters();
if (fatal) {
success = false;
} else {
// check for fatal error again in case it occurred after
// the last check before the job is completed
if (checkFatalErrors(ctrs, errMsg)) {
console.printError("[Fatal Error] " + errMsg.toString());
success = false;
} else {
success = rj.isSuccessful();
}
}
if (ctrs != null) {
Counter counterCpuMsec = ctrs.findCounter("org.apache.hadoop.mapred.Task$Counter",
"CPU_MILLISECONDS");
if (counterCpuMsec != null) {
long newCpuMSec = counterCpuMsec.getValue();
if (newCpuMSec > cpuMsec) {
cpuMsec = newCpuMSec;