//
// Now, actually submit the job (using the submit name)
//
JobClient jobClient = execEngine.getJobClient();
RunningJob status = jobClient.submitJob(conf);
log.debug("submitted job: " + status.getJobID());
long sleepTime = 1000;
double lastQueryProgress = -1.0;
int lastJobsQueued = -1;
double lastMapProgress = -1.0;
double lastReduceProgress = -1.0;
while (true) {
try {
Thread.sleep(sleepTime); } catch (Exception e) {}
if (status.isComplete()) {
success = status.isSuccessful();
if (log.isDebugEnabled()) {
StringBuilder sb = new StringBuilder();
sb.append("Job finished ");
sb.append((success ? "" : "un"));
sb.append("successfully");
log.debug(sb.toString());
}
if (success) {
mrJobNumber++;
}
double queryProgress = ((double) mrJobNumber) / ((double) numMRJobs);
if (queryProgress > lastQueryProgress) {
if (log.isInfoEnabled()) {
StringBuilder sbProgress = new StringBuilder();
sbProgress.append("Pig progress = ");
sbProgress.append(((int) (queryProgress * 100)));
sbProgress.append("%");
log.info(sbProgress.toString());
}
lastQueryProgress = queryProgress;
}
break;
}
else // still running
{
double mapProgress = status.mapProgress();
double reduceProgress = status.reduceProgress();
if (lastMapProgress != mapProgress || lastReduceProgress != reduceProgress) {
if (log.isDebugEnabled()) {
StringBuilder sbProgress = new StringBuilder();
sbProgress.append("Hadoop job progress: Map=");
sbProgress.append((int) (mapProgress * 100));
sbProgress.append("% Reduce=");
sbProgress.append((int) (reduceProgress * 100));
sbProgress.append("%");
log.debug(sbProgress.toString());
}
lastMapProgress = mapProgress;
lastReduceProgress = reduceProgress;
}
double numJobsCompleted = mrJobNumber;
double thisJobProgress = (mapProgress + reduceProgress) / 2.0;
double queryProgress = (numJobsCompleted + thisJobProgress) / ((double) numMRJobs);
if (queryProgress > lastQueryProgress) {
if (log.isInfoEnabled()) {
StringBuilder sbProgress = new StringBuilder();
sbProgress.append("Pig progress = ");
sbProgress.append(((int) (queryProgress * 100)));
sbProgress.append("%");
log.info(sbProgress.toString());
}
lastQueryProgress = queryProgress;
}
}
}
// bug 1030028: if the input file is empty; hadoop doesn't create the output file!
Path outputFile = conf.getOutputPath();
String outputName = outputFile.getName();
int colon = outputName.indexOf(':');
if (colon != -1) {
outputFile = new Path(outputFile.getParent(), outputName.substring(0, colon));
}
try {
ElementDescriptor descriptor =
((HDataStorage)(pom.pigContext.getDfs())).asElement(outputFile.toString());
if (success && !descriptor.exists()) {
// create an empty output file
PigFile f = new PigFile(outputFile.toString(), false);
f.store(BagFactory.getInstance().newDefaultBag(),
new PigStorage(),
pom.pigContext);
}
}
catch (DataStorageException e) {
throw WrappedIOException.wrap("Failed to obtain descriptor for " + outputFile.toString(), e);
}
if (!success) {
// go find the error messages
getErrorMessages(jobClient.getMapTaskReports(status.getJobID()),
"map");
getErrorMessages(jobClient.getReduceTaskReports(status.getJobID()),
"reduce");
}
else {
long timeSpent = 0;
// NOTE: this call is crashing due to a bug in Hadoop; the bug is known and the patch has not been applied yet.
TaskReport[] mapReports = jobClient.getMapTaskReports(status.getJobID());
TaskReport[] reduceReports = jobClient.getReduceTaskReports(status.getJobID());
for (TaskReport r : mapReports) {
timeSpent += (r.getFinishTime() - r.getStartTime());
}
for (TaskReport r : reduceReports) {
timeSpent += (r.getFinishTime() - r.getStartTime());