}
@SuppressWarnings("unchecked")
@Override
public void run() {
ContainerLauncherEvent event = null;
// Collect locations of map outputs to give to reduces
Map<TaskAttemptID, MapOutputFile> localMapFiles =
new HashMap<TaskAttemptID, MapOutputFile>();
// _must_ either run subtasks sequentially or accept expense of new JVMs
// (i.e., fork()), else will get weird failures when maps try to create/
// write same dirname or filename: no chdir() in Java
while (!Thread.currentThread().isInterrupted()) {
try {
event = eventQueue.take();
} catch (InterruptedException e) { // mostly via T_KILL? JOB_KILL?
LOG.error("Returning, interrupted : " + e);
return;
}
LOG.info("Processing the event " + event.toString());
if (event.getType() == EventType.CONTAINER_REMOTE_LAUNCH) {
ContainerRemoteLaunchEvent launchEv =
(ContainerRemoteLaunchEvent)event;
TaskAttemptId attemptID = launchEv.getTaskAttemptID();
Job job = context.getAllJobs().get(attemptID.getTaskId().getJobId());
int numMapTasks = job.getTotalMaps();
int numReduceTasks = job.getTotalReduces();
// YARN (tracking) Task:
org.apache.hadoop.mapreduce.v2.app.job.Task ytask =
job.getTask(attemptID.getTaskId());
// classic mapred Task:
org.apache.hadoop.mapred.Task remoteTask = launchEv.getRemoteTask();
// after "launching," send launched event to task attempt to move
// state from ASSIGNED to RUNNING (also nukes "remoteTask", so must
// do getRemoteTask() call first)
//There is no port number because we are not really talking to a task
// tracker. The shuffle is just done through local files. So the
// port number is set to -1 in this case.
context.getEventHandler().handle(
new TaskAttemptContainerLaunchedEvent(attemptID, -1));
if (numMapTasks == 0) {
doneWithMaps = true;
}
try {
if (remoteTask.isMapOrReduce()) {
JobCounterUpdateEvent jce = new JobCounterUpdateEvent(attemptID.getTaskId().getJobId());
jce.addCounterUpdate(JobCounter.TOTAL_LAUNCHED_UBERTASKS, 1);
if (remoteTask.isMapTask()) {
jce.addCounterUpdate(JobCounter.NUM_UBER_SUBMAPS, 1);
} else {
jce.addCounterUpdate(JobCounter.NUM_UBER_SUBREDUCES, 1);
}
context.getEventHandler().handle(jce);
}
runSubtask(remoteTask, ytask.getType(), attemptID, numMapTasks,
(numReduceTasks > 0), localMapFiles);
} catch (RuntimeException re) {
JobCounterUpdateEvent jce = new JobCounterUpdateEvent(attemptID.getTaskId().getJobId());
jce.addCounterUpdate(JobCounter.NUM_FAILED_UBERTASKS, 1);
context.getEventHandler().handle(jce);
// this is our signal that the subtask failed in some way, so
// simulate a failed JVM/container and send a container-completed
// event to task attempt (i.e., move state machine from RUNNING
// to FAIL_CONTAINER_CLEANUP [and ultimately to FAILED])
context.getEventHandler().handle(new TaskAttemptEvent(attemptID,
TaskAttemptEventType.TA_CONTAINER_COMPLETED));
} catch (IOException ioe) {
// if umbilical itself barfs (in error-handler of runSubMap()),
// we're pretty much hosed, so do what YarnChild main() does
// (i.e., exit clumsily--but can never happen, so no worries!)
LOG.fatal("oopsie... this can never happen: "
+ StringUtils.stringifyException(ioe));
System.exit(-1);
}
} else if (event.getType() == EventType.CONTAINER_REMOTE_CLEANUP) {
// no container to kill, so just send "cleaned" event to task attempt
// to move us from SUCCESS_CONTAINER_CLEANUP to SUCCEEDED state
// (or {FAIL|KILL}_CONTAINER_CLEANUP to {FAIL|KILL}_TASK_CLEANUP)
context.getEventHandler().handle(
new TaskAttemptEvent(event.getTaskAttemptID(),
TaskAttemptEventType.TA_CONTAINER_CLEANED));
} else {
LOG.warn("Ignoring unexpected event " + event.toString());
}
}
}