List<Job> failedJobs = new LinkedList<Job>();
List<NativeMapReduceOper> failedNativeMR = new LinkedList<NativeMapReduceOper>();
List<Job> completeFailedJobsInThisRun = new LinkedList<Job>();
List<Job> succJobs = new LinkedList<Job>();
JobControl jc;
int totalMRJobs = mrp.size();
int numMRJobsCompl = 0;
double lastProg = -1;
//create the exception handler for the job control thread
//and register the handler with the job control thread
JobControlThreadExceptionHandler jctExceptionHandler = new JobControlThreadExceptionHandler();
boolean stop_on_failure =
pc.getProperties().getProperty("stop.on.failure", "false").equals("true");
// jc is null only when mrp.size == 0
while(mrp.size() != 0) {
jc = jcc.compile(mrp, grpName);
if(jc == null) {
List<MapReduceOper> roots = new LinkedList<MapReduceOper>();
roots.addAll(mrp.getRoots());
// run the native mapreduce roots first then run the rest of the roots
for(MapReduceOper mro: roots) {
if(mro instanceof NativeMapReduceOper) {
NativeMapReduceOper natOp = (NativeMapReduceOper)mro;
try {
ScriptState.get().emitJobsSubmittedNotification(1);
natOp.runJob();
numMRJobsCompl++;
} catch (IOException e) {
mrp.trimBelow(natOp);
failedNativeMR.add(natOp);
String msg = "Error running native mapreduce" +
" operator job :" + natOp.getJobId() + e.getMessage();
String stackTrace = getStackStraceStr(e);
LogUtils.writeLog(msg,
stackTrace,
pc.getProperties().getProperty("pig.logfile"),
log
);
log.info(msg);
if (stop_on_failure) {
int errCode = 6017;
throw new ExecException(msg, errCode,
PigException.REMOTE_ENVIRONMENT);
}
}
double prog = ((double)numMRJobsCompl)/totalMRJobs;
notifyProgress(prog, lastProg);
lastProg = prog;
mrp.remove(natOp);
}
}
continue;
}
// Initially, all jobs are in wait state.
List<Job> jobsWithoutIds = jc.getWaitingJobs();
log.info(jobsWithoutIds.size() +" map-reduce job(s) waiting for submission.");
//notify listeners about jobs submitted
ScriptState.get().emitJobsSubmittedNotification(jobsWithoutIds.size());
// determine job tracker url
String jobTrackerLoc;
JobConf jobConf = jobsWithoutIds.get(0).getJobConf();
try {
String port = jobConf.get("mapred.job.tracker.http.address");
String jobTrackerAdd = jobConf.get(HExecutionEngine.JOB_TRACKER_LOCATION);
jobTrackerLoc = jobTrackerAdd.substring(0,jobTrackerAdd.indexOf(":"))
+ port.substring(port.indexOf(":"));
}
catch(Exception e){
// Could not get the job tracker location, most probably we are running in local mode.
// If it is the case, we don't print out job tracker location,
// because it is meaningless for local mode.
jobTrackerLoc = null;
log.debug("Failed to get job tracker location.");
}
completeFailedJobsInThisRun.clear();
Thread jcThread = new Thread(jc);
jcThread.setUncaughtExceptionHandler(jctExceptionHandler);
jcThread.setContextClassLoader(LogicalPlanBuilder.classloader);
//All the setup done, now lets launch the jobs.
jcThread.start();
// Now wait, till we are finished.
while(!jc.allFinished()){
try { Thread.sleep(sleepTime); }
catch (InterruptedException e) {}
List<Job> jobsAssignedIdInThisRun = new ArrayList<Job>();
for(Job job : jobsWithoutIds){
if (job.getAssignedJobID() != null){
jobsAssignedIdInThisRun.add(job);
log.info("HadoopJobId: "+job.getAssignedJobID());
if(jobTrackerLoc != null){
log.info("More information at: http://"+ jobTrackerLoc+
"/jobdetails.jsp?jobid="+job.getAssignedJobID());
}
ScriptState.get().emitJobStartedNotification(
job.getAssignedJobID().toString());
}
else{
// This job is not assigned an id yet.
}
}
jobsWithoutIds.removeAll(jobsAssignedIdInThisRun);
double prog = (numMRJobsCompl+calculateProgress(jc, jobClient))/totalMRJobs;
notifyProgress(prog, lastProg);
lastProg = prog;
}
//check for the jobControlException first
//if the job controller fails before launching the jobs then there are
//no jobs to check for failure
if (jobControlException != null) {
if (jobControlException instanceof PigException) {
if (jobControlExceptionStackTrace != null) {
LogUtils.writeLog("Error message from job controller",
jobControlExceptionStackTrace, pc
.getProperties().getProperty(
"pig.logfile"), log);
}
throw jobControlException;
} else {
int errCode = 2117;
String msg = "Unexpected error when launching map reduce job.";
throw new ExecException(msg, errCode, PigException.BUG,
jobControlException);
}
}
if (!jc.getFailedJobs().isEmpty() ) {
if (stop_on_failure){
int errCode = 6017;
StringBuilder msg = new StringBuilder();
for (int i=0; i<jc.getFailedJobs().size(); i++) {
Job j = jc.getFailedJobs().get(i);
msg.append(getFirstLineFromMessage(j.getMessage()));
if (i!=jc.getFailedJobs().size()-1) {
msg.append("\n");
}
}
throw new ExecException(msg.toString(), errCode,
PigException.REMOTE_ENVIRONMENT);
}
// If we only have one store and that job fail, then we sure
// that the job completely fail, and we shall stop dependent jobs
for (Job job : jc.getFailedJobs()) {
completeFailedJobsInThisRun.add(job);
log.info("job " + job.getAssignedJobID() + " has failed! Stop running all dependent jobs");
}
failedJobs.addAll(jc.getFailedJobs());
}
// update Pig stats' job DAG with job ids of just completed jobs
PigStatsUtil.updateJobMroMap(jcc.getJobMroMap());
int removedMROp = jcc.updateMROpPlan(completeFailedJobsInThisRun);
numMRJobsCompl += removedMROp;
List<Job> jobs = jc.getSuccessfulJobs();
jcc.moveResults(jobs);
succJobs.addAll(jobs);
// collecting statistics
PigStatsUtil.accumulateStats(jc);
jc.stop();
}
ScriptState.get().emitProgressUpdatedNotification(100);
log.info( "100% complete");