@Override
public JobSubmissionResult submitJob(JobGraph job) throws IOException {
// First check the basics
if (job == null) {
return new JobSubmissionResult(AbstractJobResult.ReturnCode.ERROR, "Submitted job is null!");
}
if (job.getNumberOfVertices() == 0) {
return new JobSubmissionResult(ReturnCode.ERROR, "Job is empty.");
}
ExecutionGraph executionGraph = null;
try {
if (LOG.isInfoEnabled()) {
LOG.info(String.format("Received job %s (%s)", job.getJobID(), job.getName()));
}
// Register this job with the library cache manager
libraryCacheManager.registerJob(job.getJobID(), job.getUserJarBlobKeys());
// get the existing execution graph (if we attach), or construct a new empty one to attach
executionGraph = this.currentJobs.get(job.getJobID());
if (executionGraph == null) {
if (LOG.isInfoEnabled()) {
LOG.info("Creating new execution graph for job " + job.getJobID() + " (" + job.getName() + ')');
}
executionGraph = new ExecutionGraph(job.getJobID(), job.getName(),
job.getJobConfiguration(), job.getUserJarBlobKeys(), this.executorService);
executionGraph.setNumberOfRetriesLeft(job.getNumberOfExecutionRetries() >= 0 ?
job.getNumberOfExecutionRetries() : this.defaultExecutionRetries);
executionGraph.setDelayBeforeRetrying(this.delayBetweenRetries);
ExecutionGraph previous = this.currentJobs.putIfAbsent(job.getJobID(), executionGraph);
if (previous != null) {
throw new JobException("Concurrent submission of a job with the same jobId: " + job.getJobID());
}
}
else {
if (LOG.isInfoEnabled()) {
LOG.info(String.format("Found existing execution graph for id %s, attaching this job.", job.getJobID()));
}
}
// Register for updates on the job status
executionGraph.registerJobStatusListener(this);
// grab the class loader for user-defined code
final ClassLoader userCodeLoader = libraryCacheManager.getClassLoader(job.getJobID());
if (userCodeLoader == null) {
throw new JobException("The user code class loader could not be initialized.");
}
// first, perform the master initialization of the nodes
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("Running master initialization of job %s (%s)", job.getJobID(), job.getName()));
}
for (AbstractJobVertex vertex : job.getVertices()) {
// check that the vertex has an executable class
String executableClass = vertex.getInvokableClassName();
if (executableClass == null || executableClass.length() == 0) {
throw new JobException(String.format("The vertex %s (%s) has no invokable class.", vertex.getID(), vertex.getName()));
}
// master side initialization
vertex.initializeOnMaster(userCodeLoader);
}
// first topologically sort the job vertices to form the basis of creating the execution graph
List<AbstractJobVertex> topoSorted = job.getVerticesSortedTopologicallyFromSources();
// first convert this job graph to an execution graph
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("Adding %d vertices from job graph %s (%s)", topoSorted.size(), job.getJobID(), job.getName()));
}
executionGraph.attachJobGraph(topoSorted);
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("Successfully created execution graph from job graph %s (%s)", job.getJobID(), job.getName()));
}
// should the job fail if a vertex cannot be deployed immediately (streams, closed iterations)
executionGraph.setQueuedSchedulingAllowed(job.getAllowQueuedScheduling());
// Register job with the progress collector
if (this.eventCollector != null) {
this.eventCollector.registerJob(executionGraph, false, System.currentTimeMillis());
}
// Schedule job
if (LOG.isInfoEnabled()) {
LOG.info("Scheduling job " + job.getName());
}
executionGraph.scheduleForExecution(this.scheduler);
return new JobSubmissionResult(AbstractJobResult.ReturnCode.SUCCESS, null);
}
catch (Throwable t) {
LOG.error("Job submission failed.", t);
if(executionGraph != null){
executionGraph.fail(t);
try {
executionGraph.waitForJobEnd(10000);
}catch(InterruptedException e){
LOG.error("Interrupted while waiting for job to finish canceling.");
}
}
// job was not prperly removed by the fail call
if(currentJobs.contains(job.getJobID())){
currentJobs.remove(job.getJobID());
libraryCacheManager.unregisterJob(job.getJobID());
}
return new JobSubmissionResult(AbstractJobResult.ReturnCode.ERROR, StringUtils.stringifyException(t));
}
}