/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.corona.CoronaClient;
import org.apache.hadoop.corona.InetAddress;
import org.apache.hadoop.corona.CoronaConf;
import org.apache.hadoop.corona.PoolInfo;
import org.apache.hadoop.corona.ResourceGrant;
import org.apache.hadoop.corona.ResourceRequest;
import org.apache.hadoop.corona.ResourceType;
import org.apache.hadoop.corona.SessionDriver;
import org.apache.hadoop.corona.SessionDriverService;
import org.apache.hadoop.corona.SessionHistoryManager;
import org.apache.hadoop.corona.SessionPriority;
import org.apache.hadoop.corona.SessionStatus;
import org.apache.hadoop.corona.Utilities;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.http.HttpServer;
import org.apache.hadoop.ipc.ProtocolSignature;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.util.VersionInfo;
/**
* The Corona Job Tracker (CJT) can work in one of three modes
* - In-process: In this mode, the CJT performs its entire functionality in
* the same process as the JobClient
* - Forwarding: In this case, the CJT just forwards the calls to a remote CJT.
* - Standalone: This is the remote CJT that is serving the calls from the
* forwarding CJT.
* The CoronaJobTracker (CJT) is responsible for running a single map-reduce
* job in Corona. It is similar to the classic Map-Reduce JobTracker (JT) class,
* except that it deals with only one job. Unlike the JT, the CJT does not
* track/manage the nodes that run the map/reduce tasks. The CJT gets all that
* functionality from the ClusterManager (CM). It communicates the resource
* needs of its job to the CM, and uses the resources provided by the CM to
* launch tasks.
* <p/>
* Cluster Resource Flow in CJT
* <p/>
* When the CJT starts, it obtains a session ID in the constructor.
* This session ID is used to derive the job ID and that does not change during
* the lifetime of the CJT. When the job is started through
* JobSubmissionProtocol#submitJob call, the resource flow is started. First the
* job is initialized through CoronaJobInProgress#initTasks(). Then
* CoronaJobTracker#startJob() does the work to create the initial set of
* resource requests to be sent to the CM. CoronaJobTracker#updateResources is
* responsible for actually sending the resource requests to the CM, and it is
* invoked periodically to update the CM with requested and released resources.
* <p/>
* Apart from the initial set of resource requests, the CJT may send additional
* resource requests. This is needed to run speculative task attempts and to
* re-run task attempts that have failed and need to be run on a different
* machine. In these cases, the machine that ran the original attempt is
* specified as an excluded host in the resource request.
* <p/>
* The process of releasing resources back to the CM is a little involved.
* The resources given by the CM to the CJT are for the CJT to use for as long
* as needed, except if the resource is revoked by the CM through
* SessionDriverService#Iface#revokeResource. So once a task is finished on the
* granted machine, the CJT is allowed to reuse the machine to run other tasks.
* The decision of reusing a resource vs not is done in
* CoronaJobTracker#processTaskResource, which does the following:
* - if the task succeeded: reuse the resource if possible, otherwise release it
* - if the task failed: get a new request for running the task, and mark the
* resource as bad so that it can be excluded from future requests.
* <p/>
* When the job finishes, the resources active at that point are not explicitly
* returned the CM, instead, a session-end notification is sent to the CM which
* effectively releases the resources for the job. Also a job end notification
* is sent to the task trackers that ran tasks, so that they can clean up their
* state (see CoronaJobTracker#close)
*/
@SuppressWarnings("deprecation")
public class CoronaJobTracker extends JobTrackerTraits
implements JobSubmissionProtocol,
SessionDriverService.Iface,
InterTrackerProtocol,
ResourceTracker.ResourceProcessor,
TaskStateChangeListener {
/** Threshold on number of map tasks for automatically choosing remote mode
* for a job. If the number of map tasks in the job is more than this,
* start a remote mode tracker
*/
public static final String STANDALONE_CJT_THRESHOLD_CONF =
"mapred.coronajobtracker.remote.threshold";
/** Default threshold for automatically choosing remote mode for a job. */
public static final int STANDALONE_CJT_THRESHOLD_DEFAULT = 1000;
/** Timeout for connecting to a task tracker. */
public static final String TT_CONNECT_TIMEOUT_MSEC_KEY =
"corona.tasktracker.connect.timeout.msec";
/** RPC timeout for RPCs to a task tracker. */
public static final String TT_RPC_TIMEOUT_MSEC_KEY =
"corona.tasktracker.rpc.timeout.msec";
/** Interval between heartbeats to the parent corona job tracker. */
public static final String HEART_BEAT_INTERVAL_KEY =
"corona.jobtracker.heartbeat.interval";
/** Number of grants processed under the global lock at a time. */
public static final String GRANTS_PROCESS_PER_ITERATION =
"corona.jobtracker.resources.per.iteration";
/** Limit on number of task completion events to hand out in a single RPC. */
public static final String TASK_COMPLETION_EVENTS_PER_RPC =
"corona.jobtracker.tce.per.rpc";
/** Corona system directory. */
public static final String SYSTEM_DIR_KEY = "corona.system.dir";
/** Default corona system directory. */
public static final String DEFAULT_SYSTEM_DIR = "/tmp/hadoop/mapred/system";
/** Number of handlers used by the RPC server.*/
public static final String RPC_SERVER_HANDLER_COUNT =
"mapred.job.tracker.handler.count";
/**
* The number of handlers used by the RPC server in
* standalone mode. The standalone mode is used for large jobs, so should
* use more threads.
*/
public static final String RPC_SERVER_HANDLER_COUNT_STANDALONE =
"mapred.coronajobtracker.remote.thread.standalone";
/**
* If a remote JT is running, stop the local RPC server after this timeout
* past the completion of the job.
*/
public static final String RPC_SERVER_STOP_TIMEOUT =
"mapred.coronajobtracker.rpcserver.stop.timeout";
/** Logger. */
private static final Log LOG = LogFactory.getLog(CoronaJobTracker.class);
static {
Utilities.makeProcessExitOnUncaughtException(LOG);
}
/** Configuration. */
private JobConf conf;
/** Filesystem. */
private FileSystem fs;
/** Running "standalone" (in the cluster). */
private final boolean isStandalone;
/** The remote job tracker. */
private volatile RemoteJTProxy remoteJT;
/** * Grants to process in an iteration. */
private final int grantsPerIteration;
/** Limit on number of task completion events to hand out in a single RPC. */
private final int maxEventsPerRpc;
/** Handles the session with the cluster manager. */
private SessionDriver sessionDriver;
/** Session ID. */
private String sessionId;
/** Session End Status. */
private SessionStatus sessionEndStatus = null;
/** Will always be 1. */
private AtomicInteger jobCounter = new AtomicInteger();
/** Identifier for the current job. */
private JobID jobId;
/** The job. */
private CoronaJobInProgress job;
/** The grants to revoke. */
private List<ResourceGrant> grantsToRevoke = new ArrayList<ResourceGrant>();
/** The dead nodes. */
private List<String> deadNodes = new ArrayList<String>();
/** Is the job tracker running? */
private volatile boolean running = true;
/** Has @link close() been called? */
private volatile boolean closed = false;
/** The thread to assign tasks. */
private Thread assignTasksThread;
/** The resource tracker. */
private ResourceTracker resourceTracker;
/** The RPC server address. */
private InetSocketAddress jobTrackerAddress;
/** The RPC server. */
private volatile Server interTrackerServer;
/** The HTTP server. */
private HttpServer infoServer;
/** The HTTP server port. */
private int infoPort;
/** The task lookup table */
private TaskLookupTable taskLookupTable = new TaskLookupTable();
/** Task tracker status map. */
private Map<String, TaskTrackerStatus> taskTrackerStatus =
new ConcurrentHashMap<String, TaskTrackerStatus>();
/** Task tracker statistics. */
private final TrackerStats trackerStats;
/** Cache of RPC clients to task trackers. */
private TrackerClientCache trackerClientCache;
/** The resource updater. */
private ResourceUpdater resourceUpdater = new ResourceUpdater();
/** The resource updater thread. */
private Thread resourceUpdaterThread;
/** The global lock. */
private final Object lockObject = new Object();
/** Mutex for closing. */
private final Object closeLock = new Object();
/** The job history. */
private CoronaJobHistory jobHistory;
/** Interval between heartbeats to the parent. */
private final int heartbeatInterval;
/** Has a full-fledged tracker started. */
private volatile boolean fullTrackerStarted = false;
/** The task launcher. */
private CoronaTaskLauncher taskLauncher;
/** This provides information about the resource needs of each task (TIP). */
private HashMap<TaskInProgress, TaskContext> taskToContextMap =
new HashMap<TaskInProgress, TaskContext>();
/** Maintains the inverse of taskToContextMap. */
private HashMap<Integer, TaskInProgress> requestToTipMap =
new HashMap<Integer, TaskInProgress>();
/** Keeping track of the speculated Maps. */
private HashSet<TaskInProgress> speculatedMaps =
new HashSet<TaskInProgress>();
/** Keeping track of the speculated Reduces. */
private HashSet<TaskInProgress> speculatedReduces =
new HashSet<TaskInProgress>();
/** The task launch expiry logic. */
private ExpireTasks expireTasks;
public long getTaskExpiryInterval() {
return this.job.getConf().getLong("mapred.task.timeout", 60 * 10 * 1000);
}
/** Maintain information about resource requests for a TIP. */
private static class TaskContext {
/** The resource requests. */
private List<ResourceRequest> resourceRequests;
/** The excluded hosts. */
private Set<String> excludedHosts;
/**
* Constructor.
* @param req The resource request.
*/
TaskContext(ResourceRequest req) {
resourceRequests = new ArrayList<ResourceRequest>();
resourceRequests.add(req);
excludedHosts = new HashSet<String>();
}
}
/**
* An Attempt and it's corresponding TaskInProgress
* There is a unique TIP per Attempt. Hence the attempt
* can be used as the unique key to identify this tuple
* (in a Collection for example)
*/
public static final class TaskAttemptIDWithTip
implements Comparable<TaskAttemptIDWithTip> {
/** The attempt ID. */
private final TaskAttemptID attemptId;
/** The TIP. */
private final TaskInProgress tip;
/**
* Constructor.
* @param attemptId The attempt ID.
* @param tip The TIP.
*/
public TaskAttemptIDWithTip(TaskAttemptID attemptId, TaskInProgress tip) {
this.attemptId = attemptId;
this.tip = tip;
}
@Override
public boolean equals(Object o) {
TaskAttemptIDWithTip that = (TaskAttemptIDWithTip) o;
return this.attemptId.equals(that.attemptId);
}
@Override
public int hashCode() {
return attemptId.hashCode();
}
@Override
public int compareTo(TaskAttemptIDWithTip that) {
return this.attemptId.compareTo(that.attemptId);
}
}
/**
* Look up information about tasks.
*/
class TaskLookupTable {
/** Where did the attempt run? */
private Map<TaskAttemptID, String> taskIdToTrackerMap =
new HashMap<TaskAttemptID, String>();
/** Reverse lookup from attempt to TIP. */
private Map<TaskAttemptID, TaskInProgress> taskIdToTIPMap =
new HashMap<TaskAttemptID, TaskInProgress>();
/** What did the tracker run? */
private Map<String, Set<TaskAttemptIDWithTip>> trackerToTaskMap =
new HashMap<String, Set<TaskAttemptIDWithTip>>();
/** Find out the successful attempts on a tracker. */
private Map<String, Set<TaskAttemptID>> trackerToSucessfulTaskMap =
new HashMap<String, Set<TaskAttemptID>>();
/** Find the grant used for an attempt. */
private Map<TaskAttemptID, Integer> taskIdToGrantMap =
new HashMap<TaskAttemptID, Integer>();
/**
* Create a task entry.
* @param taskId The attempt ID.
* @param taskTracker The task tracker.
* @param tip The TIP.
* @param grant The resource grant.
*/
public void createTaskEntry(
TaskAttemptID taskId, String taskTracker, TaskInProgress tip,
Integer grant) {
LOG.info("Adding task (" + tip.getAttemptType(taskId) + ") " +
"'" + taskId + "' to tip " +
tip.getTIPId() + ", for tracker '" + taskTracker + "' grant:" + grant);
synchronized (lockObject) {
// taskId --> tracker
taskIdToTrackerMap.put(taskId, taskTracker);
// tracker --> taskId
Set<TaskAttemptIDWithTip> taskset = trackerToTaskMap.get(taskTracker);
if (taskset == null) {
taskset = new HashSet<TaskAttemptIDWithTip>();
trackerToTaskMap.put(taskTracker, taskset);
}
taskset.add(new TaskAttemptIDWithTip(taskId, tip));
// taskId --> TIP
// We never remove this entry.
taskIdToTIPMap.put(taskId, tip);
taskIdToGrantMap.put(taskId, grant);
}
}
/**
* Find the successful tasks on a tracker.
* @param node The tracker.
* @return The successful attempts.
*/
public List<TaskAttemptID> getSuccessfulTasksForNode(String node) {
List<TaskAttemptID> attempts = new ArrayList<TaskAttemptID>();
synchronized (lockObject) {
Set<TaskAttemptID> set = trackerToSucessfulTaskMap.get(node);
if (set != null) {
attempts.addAll(set);
}
}
return attempts;
}
/**
* Record a successful task attempt.
* @param taskId The attempt ID.
* @param node The tracker.
*/
public void addSuccessfulTaskEntry(TaskAttemptID taskId, String node) {
synchronized (lockObject) {
Set<TaskAttemptID> attempts = trackerToSucessfulTaskMap.get(node);
if (attempts == null) {
attempts = new HashSet<TaskAttemptID>();
trackerToSucessfulTaskMap.put(node, attempts);
}
attempts.add(taskId);
}
}
/**
* Remove the entry for a task.
* @param taskId The attempt ID.
*/
public void removeTaskEntry(TaskAttemptID taskId) {
LOG.info("Removing task '" + taskId + "'");
synchronized (lockObject) {
// taskId --> tracker
String tracker = taskIdToTrackerMap.get(taskId);
// tracker --> taskId
if (tracker != null) {
Set<TaskAttemptIDWithTip> taskset = trackerToTaskMap.get(tracker);
if (taskset != null) {
// TaskAttemptIDWithTip.equals() uses attemptId equality.
taskset.remove(new TaskAttemptIDWithTip(taskId, null));
}
}
taskIdToGrantMap.remove(taskId);
}
}
/**
* Find the TIP for an attempt.
* @param taskId The attempt ID.
* @return The TIP.
*/
public TaskInProgress getTIP(TaskAttemptID taskId) {
synchronized (lockObject) {
return taskIdToTIPMap.get(taskId);
}
}
/**
* Find the task attempt for a resource grant.
* @param grant The grant.
* @return The attempt ID.
*/
public TaskAttemptID taskForGrant(ResourceGrant grant) {
return taskForGrantId(grant.getId());
}
/**
* Find the task attempt for a resource grant.
* @param grantId The grant ID.
* @return The attempt ID.
*/
public TaskAttemptID taskForGrantId(Integer grantId) {
synchronized (lockObject) {
for (Map.Entry<TaskAttemptID, Integer> entry :
taskIdToGrantMap.entrySet()) {
if (entry.getValue().equals(grantId)) {
return entry.getKey();
}
}
}
return null;
}
/**
* Find the grants in use on a tracker.
* @param trackerName the tracker.
* @return The grants in use on the tracker.
*/
public Set<Integer> grantsInUseOnTracker(String trackerName) {
synchronized (lockObject) {
Set<Integer> grants = new HashSet<Integer>();
if (trackerToTaskMap.containsKey(trackerName)) {
for (TaskAttemptIDWithTip tip : trackerToTaskMap.get(trackerName)) {
grants.add(taskIdToGrantMap.get(tip.attemptId));
}
}
return grants;
}
}
/**
* Find the tasks to be killed on a tracker.
* @param taskTracker The tracker.
* @return The tasks to kill.
*/
List<KillTaskAction> getTasksToKill(String taskTracker) {
synchronized (lockObject) {
Set<TaskAttemptIDWithTip> taskset = trackerToTaskMap.get(taskTracker);
List<KillTaskAction> killList = new ArrayList<KillTaskAction>();
if (taskset != null) {
for (TaskAttemptIDWithTip onetask : taskset) {
TaskAttemptID killTaskId = onetask.attemptId;
TaskInProgress tip = onetask.tip;
if (tip == null) {
continue;
}
if (tip.shouldClose(killTaskId)) {
//
// This is how the JobTracker ends a task at the TaskTracker.
// It may be successfully completed, or may be killed in
// mid-execution.
//
if (job != null && !job.getStatus().isJobComplete()) {
killList.add(new KillTaskAction(killTaskId));
LOG.debug(taskTracker + " -> KillTaskAction: " + killTaskId);
}
}
}
}
return killList;
}
}
/**
* Find the grant for an attempt.
* @param taskId The attempt ID.
* @return The grant ID.
*/
public Integer getGrantIdForTask(TaskAttemptID taskId) {
synchronized (lockObject) {
return taskIdToGrantMap.get(taskId);
}
}
/**
* Find the tracker for a task attempt.
* @param attempt The attempt ID.
* @return The tracker.
*/
public String getAssignedTracker(TaskAttemptID attempt) {
synchronized (lockObject) {
return taskIdToTrackerMap.get(attempt);
}
}
}
/**
* Constructor for the remote job tracker (running in cluster).
* @param conf Configuration
* @param jobId Job ID.
* @param attemptId attempt ID
* @param parentAddr Address of the parent job tracker
* @throws IOException
*/
public CoronaJobTracker(
JobConf conf,
JobID jobId,
TaskAttemptID attemptId,
InetSocketAddress parentAddr) throws IOException {
this.isStandalone = true;
this.heartbeatInterval = conf.getInt(HEART_BEAT_INTERVAL_KEY, 3000);
this.grantsPerIteration = conf.getInt(GRANTS_PROCESS_PER_ITERATION, 100);
this.maxEventsPerRpc = conf.getInt(TASK_COMPLETION_EVENTS_PER_RPC, 100);
this.remoteJT = null;
// This is already a standalone (remote) CJT, unset the flag.
conf.setBoolean("mapred.coronajobtracker.forceremote", false);
this.conf = conf;
this.trackerStats = new TrackerStats(conf);
this.fs = FileSystem.get(conf);
this.jobId = jobId;
createSession();
startFullTracker();
// In remote mode, we have a parent JT that we need to communicate with.
ParentHeartbeat parentHeartbeat = new ParentHeartbeat(
conf, attemptId, jobTrackerAddress, parentAddr, sessionId);
try {
// Perform an initial heartbeat to confirm that we can go ahead.
// If this throws an exception, the rest of the threads are daemon
// threads, so the stand-alone CJT will exit.
parentHeartbeat.initialHeartbeat();
// Start the thread to do periodic heartbeats.
// This thread is not a daemon thread, so the process will hang around
// while it is alive.
Thread parentHeartbeatThread = new Thread(parentHeartbeat);
parentHeartbeatThread.setDaemon(false);
parentHeartbeatThread.setName("Parent Heartbeat");
parentHeartbeatThread.start();
} catch (IOException e) {
LOG.error("Closing CJT after initial heartbeat error" , e);
try {
close(false);
} catch (InterruptedException e1) {
throw new IOException(e1);
}
}
}
/**
* Constructor for the in-process job tracker.
* @param conf Configuration.
* @throws IOException
*/
public CoronaJobTracker(JobConf conf) throws IOException {
this.isStandalone = false;
this.heartbeatInterval = conf.getInt(HEART_BEAT_INTERVAL_KEY, 3000);
this.grantsPerIteration = conf.getInt(GRANTS_PROCESS_PER_ITERATION, 100);
this.maxEventsPerRpc = conf.getInt(TASK_COMPLETION_EVENTS_PER_RPC, 100);
this.conf = conf;
this.trackerStats = new TrackerStats(conf);
this.fs = FileSystem.get(conf);
}
public static JobID jobIdFromSessionId(String sessionId) {
return new JobID(sessionId, 1);
}
public static String sessionIdFromJobID(JobID jobId) {
return jobId.getJtIdentifier();
}
private void failTask(TaskAttemptID taskId, String reason,
boolean isFailed) {
TaskInProgress tip = taskLookupTable.getTIP(taskId);
Integer grantId = taskLookupTable.getGrantIdForTask(taskId);
ResourceGrant grant = resourceTracker.getGrant(grantId);
synchronized (lockObject) {
if (!tip.isAttemptRunning(taskId)) {
/*
* This attempt is not running so we should not be killing/failing it
* The reason we might try to fail the task that is not running is if it
* has finished and was preempted at the same time.
*/
return;
}
}
assert grant != null : "Task " + taskId +
" is running but has no associated resource";
String trackerName = grant.getNodeName();
TaskTrackerStatus trackerStatus =
getTaskTrackerStatus(trackerName);
TaskStatus.Phase phase =
tip.isMapTask() ? TaskStatus.Phase.MAP : TaskStatus.Phase.STARTING;
CoronaJobTracker.this.job.failedTask(
tip, taskId, reason, phase, isFailed, trackerName, trackerStatus);
}
public SessionDriver getSessionDriver() {
return sessionDriver;
}
public String getSessionId() {
return sessionId;
}
private void createSession() throws IOException {
// Create the session driver. This will contact the cluster manager.
sessionDriver = new SessionDriver(conf, this);
sessionId = sessionDriver.getSessionId();
}
private void startFullTracker() throws IOException {
if (fullTrackerStarted) {
return;
}
sessionDriver.startSession();
this.resourceTracker = new ResourceTracker(lockObject);
this.trackerClientCache = new TrackerClientCache(conf);
startRPCServer(this);
startInfoServer();
this.taskLookupTable = new TaskLookupTable();
assignTasksThread = new Thread(new AssignTasksThread());
assignTasksThread.setName("assignTasks Thread");
assignTasksThread.setDaemon(true);
assignTasksThread.start();
resourceUpdaterThread = new Thread(resourceUpdater);
resourceUpdaterThread.setName("Resource Updater");
resourceUpdaterThread.setDaemon(true);
resourceUpdaterThread.start();
expireTasks = new ExpireTasks(this);
expireTasks.setName("Expire launching tasks");
expireTasks.setDaemon(true);
expireTasks.start();
taskLauncher = new CoronaTaskLauncher(conf, this, expireTasks);
String sessionLogPath = null;
if (isStandalone) {
// If this is the remote job tracker, we need to use the session log
// path of the parent job tracker, since we use the job ID specified
// by the parent job tracker.
String parentSessionId = CoronaJobTracker.sessionIdFromJobID(jobId);
SessionHistoryManager sessionHistoryManager = new SessionHistoryManager();
sessionHistoryManager.setConf(conf);
sessionLogPath = sessionHistoryManager.getLogPath(parentSessionId);
LOG.info("Using session log path " + sessionLogPath + " based on jobId " +
jobId);
} else {
sessionLogPath = sessionDriver.getSessionLog();
}
jobHistory = new CoronaJobHistory(conf, jobId, sessionLogPath);
// Initialize history DONE folder
if (!jobHistory.isDisabled()) {
String historyLogDir =
jobHistory.getCompletedJobHistoryLocation().toString();
infoServer.setAttribute("historyLogDir", historyLogDir);
infoServer.setAttribute("conf", conf);
}
fullTrackerStarted = true;
}
private void startRestrictedTracker(JobID jobId, JobConf jobConf)
throws IOException {
sessionDriver.startSession();
this.resourceTracker = new ResourceTracker(lockObject);
this.trackerClientCache = new TrackerClientCache(conf);
remoteJT = new RemoteJTProxy(this, jobId, jobConf);
startRPCServer(remoteJT);
}
private void startRPCServer(Object instance) throws IOException {
if (interTrackerServer != null) {
return;
}
int handlerCount = conf.getInt(RPC_SERVER_HANDLER_COUNT, 10);
if (isStandalone) {
handlerCount = conf.getInt(RPC_SERVER_HANDLER_COUNT_STANDALONE, 100);
}
// Use the DNS hostname so that Task Trackers can connect to JT.
jobTrackerAddress = NetUtils.createSocketAddr(
java.net.InetAddress.getLocalHost().getCanonicalHostName(),
0);
interTrackerServer = RPC.getServer(instance,
jobTrackerAddress.getHostName(), jobTrackerAddress.getPort(),
handlerCount, false, conf);
interTrackerServer.start();
jobTrackerAddress = new InetSocketAddress(
jobTrackerAddress.getHostName(),
interTrackerServer.getListenerAddress().getPort());
LOG.info("CoronaJobTracker up at " + jobTrackerAddress);
}
private void startInfoServer() throws IOException {
InetSocketAddress infoSocAddr = NetUtils.createSocketAddr(
java.net.InetAddress.getLocalHost().getCanonicalHostName(),
0);
String infoBindAddress = infoSocAddr.getHostName();
int tmpInfoPort = infoSocAddr.getPort();
infoServer = new HttpServer("jt", infoBindAddress, tmpInfoPort,
tmpInfoPort == 0, conf);
infoServer.setAttribute("job.tracker", this);
infoServer.start();
this.infoPort = this.infoServer.getPort();
String hostname =
java.net.InetAddress.getLocalHost().getCanonicalHostName();
this.conf.set(
"mapred.job.tracker.http.address", hostname + ":" + this.infoPort);
this.conf.setInt("mapred.job.tracker.info.port", this.infoPort);
this.conf.set("mapred.job.tracker.info.bindAddress", hostname);
LOG.info("JobTracker webserver: " + this.infoPort);
}
public String getJobTrackerMachine() {
return jobTrackerAddress.getHostName();
}
public String getUrl() throws IOException {
Path historyDir = new Path(sessionDriver.getSessionLog());
historyDir.getName();
String url = getProxyUrl(conf, "coronajobdetails.jsp?jobid=" + jobId);
return url;
}
public SessionStatus getSessionEndStatus(int jobState) {
if (sessionEndStatus != null) {
return sessionEndStatus;
}
switch (jobState) {
case JobStatus.PREP:
case JobStatus.RUNNING:
return SessionStatus.RUNNING;
case JobStatus.SUCCEEDED:
return SessionStatus.SUCCESSFUL;
case JobStatus.FAILED:
return SessionStatus.FAILED;
case JobStatus.KILLED:
return SessionStatus.KILLED;
default:
throw new RuntimeException("Unknown job state: " + jobState);
}
}
public InetSocketAddress getJobTrackerAddress() {
return jobTrackerAddress;
}
public ResourceTracker getResourceTracker() {
return resourceTracker;
}
public TrackerStats getTrackerStats() {
return trackerStats;
}
public CoronaTaskTrackerProtocol getTaskTrackerClient(String host, int port)
throws IOException {
return trackerClientCache.getClient(host, port);
}
public void resetTaskTrackerClient(String host, int port) {
trackerClientCache.resetClient(host, port);
}
protected void closeIfComplete(boolean closeFromWebUI) throws IOException {
// Prevent multiple simultaneous executions of this function. We could have
// the Web UI and JobSubmissionProtocol.killJob() call this, for example.
if (this.job.getStatus().isJobComplete()) {
try {
close(closeFromWebUI);
} catch (InterruptedException e) {
throw new IOException(e);
}
}
}
/**
* Cleanup after CoronaJobTracker operation.
* If remote CJT error occured use overloaded version.
* @param closeFromWebUI Indicates whether called from web UI.
* @throws IOException
* @throws InterruptedException
*/
void close(boolean closeFromWebUI) throws IOException, InterruptedException {
close(closeFromWebUI, false);
}
/**
* Cleanup after CoronaJobTracker operation.
* @param closeFromWebUI Indicates whether called from web UI.
* @param remoteJTFailure Indicates whether the remote CJT failed or
* is unreachable.
* @throws IOException
* @throws InterruptedException
*/
void close(boolean closeFromWebUI, boolean remoteJTFailure)
throws IOException, InterruptedException {
synchronized (closeLock) {
if (!running) {
return;
}
running = false;
if (job != null) {
job.close();
}
reportJobStats();
if (jobHistory != null) {
try {
jobHistory.markCompleted();
} catch (IOException ioe) {
LOG.warn("Failed to mark job " + jobId + " as completed!", ioe);
}
jobHistory.shutdown();
}
if (sessionDriver != null) {
int jobState = 0;
if (job == null) {
if (remoteJTFailure) {
// There will be no feedback from remote JT because it died.
LOG.warn("JobTracker died or is unreachable." +
"Reporting to ClusterManager.");
sessionDriver.stop(SessionStatus.FAILED_JOBTRACKER);
} else {
// The remote JT will have the real status.
jobState = JobStatus.SUCCEEDED;
sessionDriver.stop(getSessionEndStatus(jobState));
}
} else {
jobState = job.getStatus().getRunState();
if (jobState != JobStatus.SUCCEEDED) {
// We will report task failure counts only if the job succeeded.
trackerStats.resetFailedCount();
}
sessionDriver.stop(
getSessionEndStatus(jobState),
ResourceTracker.resourceTypes(),
trackerStats.getNodeUsageReports());
}
}
if (expireTasks != null) {
expireTasks.shutdown();
expireTasks.interrupt();
expireTasks.join();
}
if (resourceUpdaterThread != null) {
resourceUpdaterThread.interrupt();
resourceUpdaterThread.join();
}
if (assignTasksThread != null) {
assignTasksThread.interrupt();
assignTasksThread.join();
}
if (sessionDriver != null) {
sessionDriver.join();
}
if (taskLauncher != null) {
taskLauncher.killJob(jobId, resourceTracker.allTrackers());
}
if (infoServer != null) {
if (closeFromWebUI) {
// If we are being called from the web UI, this function is executing
// in a web-server thread. Give some time to the web-server to
// clean up.
infoServer.setGracefulShutdown(1000);
}
try {
// Unavoidable catch-all because of AbstractLifeCycle.stop().
infoServer.stop();
} catch (Exception ex) {
LOG.warn("Exception shutting down web server ", ex);
}
}
// Stop RPC server. This is done near the end of the function
// since this could be called through a RPC heartbeat call.
// If (standalone == true)
// - dont stop the RPC server at all. When this cannot talk to the parent,
// it will exit the process.
// if (standalone == false)
// - if there is no remote JT, close right away
// - if there is a remote JT, close after 1min.
if (interTrackerServer != null) {
if (!isStandalone) {
if (remoteJT == null) {
interTrackerServer.stop();
} else {
final int timeout = conf.getInt(RPC_SERVER_STOP_TIMEOUT, 0);
if (timeout > 0) {
LOG.info("Starting async thread to stop RPC server for " + jobId);
Thread async = new Thread(new Runnable() {
@Override
public void run() {
try {
Thread.sleep(timeout);
LOG.info("Stopping RPC server for " + jobId);
interTrackerServer.stop();
remoteJT.close();
} catch (InterruptedException e) {
LOG.warn(
"Interrupted during wait before stopping RPC server");
}
}
});
async.setDaemon(true);
async.start();
}
}
}
}
synchronized (lockObject) {
closed = true;
lockObject.notifyAll();
}
}
}
private void reportJobStats() {
if (job == null) {
return;
}
Counters jobCounters = job.getCounters();
JobStats jobStats = job.getJobStats();
String pool = null;
if (sessionDriver != null) {
pool = PoolInfo.createStringFromPoolInfo(sessionDriver.getPoolInfo());
}
try {
CoronaConf coronaConf = new CoronaConf(conf);
InetSocketAddress aggregatorAddr = NetUtils.createSocketAddr(
coronaConf.getProxyJobTrackerAddress());
long timeout = 5000; // Can make configurable later.
CoronaJobAggregator aggregator = RPC.waitForProxy(
CoronaJobAggregator.class,
CoronaJobAggregator.versionID,
aggregatorAddr,
conf,
timeout);
LOG.info("Reporting job stats with jobId=" + jobId +
", pool=" + pool + ", jobStats=" + jobStats + ", " +
"jobCounters=" + jobCounters);
aggregator.reportJobStats(jobId.toString(), pool, jobStats, jobCounters);
} catch (IOException e) {
LOG.warn("Ignoring error in reportJobStats ", e);
}
}
class AssignTasksThread implements Runnable {
@Override
public void run() {
while (running) {
try {
assignTasks();
} catch (InterruptedException e) {
// ignore and let loop check running flag
continue;
}
}
LOG.info("Terminating AssignTasksThread");
}
}
/**
* This thread performs heartbeats to the parent CJT. It has two purposes -
* notify the parent of the RPC host:port information of this CJT - detect if
* the parent has died, and terminate this CJT in that case.
*/
class ParentHeartbeat implements Runnable {
private final InetSocketAddress myAddr;
private final InetSocketAddress parentAddr;
private final InterCoronaJobTrackerProtocol parent;
private final TaskAttemptID attemptId;
private final String sessionId;
public ParentHeartbeat(
Configuration conf,
TaskAttemptID attemptId,
InetSocketAddress myAddr,
InetSocketAddress parentAddr,
String sessionId) throws IOException {
this.attemptId = attemptId;
this.myAddr = myAddr;
this.parentAddr = parentAddr;
this.sessionId = sessionId;
long connectTimeout = RemoteJTProxy.getRemotJTTimeout(conf);
parent = RPC.waitForProxy(
InterCoronaJobTrackerProtocol.class,
InterCoronaJobTrackerProtocol.versionID,
parentAddr,
conf,
connectTimeout);
}
public void initialHeartbeat() throws IOException {
parent.reportRemoteCoronaJobTracker(
attemptId.toString(),
myAddr.getHostName(),
myAddr.getPort(),
sessionId);
}
@Override
public void run() {
while (true) {
try {
parent.reportRemoteCoronaJobTracker(
attemptId.toString(),
myAddr.getHostName(),
myAddr.getPort(),
sessionId);
LOG.info("Performed heartbeat to parent at " + parentAddr);
Thread.sleep(1000);
} catch (IOException e) {
LOG.error("Could not communicate with parent, closing this CJT ", e);
CoronaJobTracker jt = CoronaJobTracker.this;
try {
jt.killJob(jt.jobId);
} catch (IOException e1) {
LOG.error("Error in closing on timeout ", e1);
} finally {
System.exit(1);
}
} catch (InterruptedException e) {
// Ignore and check running flag.
continue;
}
}
}
}
@Override
public boolean processAvailableResource(ResourceGrant grant) {
if (isBadResource(grant)) {
LOG.info("Resource " + grant.getId() + " nodename " +
grant.getNodeName() + " is bad");
processBadResource(grant.getId(), true);
// return true since this request was bad and will be returned
// so it should no longer be available
return true;
} else if (!isResourceNeeded(grant)) {
// This resource is no longer needed, but it is not a fault
// of the host
LOG.info("Resource " + grant.getId() + " nodename " +
grant.getNodeName() + " is not needed");
processBadResource(grant.getId(), false);
return true;
}
InetAddress addr =
Utilities.appInfoToAddress(grant.appInfo);
String trackerName = grant.getNodeName();
boolean isMapGrant =
grant.getType().equals(ResourceType.MAP);
Task task = getSetupAndCleanupTasks(trackerName, addr.host, isMapGrant);
if (task == null) {
TaskInProgress tip = null;
synchronized (lockObject) {
tip = requestToTipMap.get(grant.getId());
}
if (tip.isMapTask()) {
task = job.obtainNewMapTaskForTip(trackerName, addr.host, tip);
} else {
task = job.obtainNewReduceTaskForTip(trackerName, addr.host, tip);
}
}
if (task != null) {
TaskAttemptID taskId = task.getTaskID();
taskLookupTable.createTaskEntry(taskId, trackerName,
job.getTaskInProgress(taskId.getTaskID()), grant.getId());
taskLauncher.launchTask(task, trackerName, addr);
trackerStats.recordTask(trackerName);
return true;
}
return false;
}
public boolean isBadResource(ResourceGrant grant) {
InetAddress addr = grant.address;
String trackerName = grant.getNodeName();
TaskInProgress tip = requestToTipMap.get(grant.getId());
return trackerStats.isFaulty(trackerName) ||
!job.canTrackerBeUsed(trackerName, addr.host, tip) ||
job.isBadSpeculativeResource(tip, trackerName, addr.host);
}
public boolean isResourceNeeded(ResourceGrant grant) {
InetAddress addr = grant.address;
String trackerName = grant.getNodeName();
TaskInProgress tip = requestToTipMap.get(grant.getId());
// 1. If the task is running and we can speculate
// 2. If the task is not running, but is runnable
// 3. If we are about to reuse a tip for something else
return (tip.isRunning() &&
job.confirmSpeculativeTask(tip, trackerName, addr.host)) ||
(!tip.isRunning() && tip.isRunnable()) ||
(job.needsTaskCleanup(tip)) ||
job.shouldReuseTaskResource(tip);
}
/**
* Return this grant and request a different one.
* This can happen because the task has failed, was killed
* or the job tracker decided that the resource is bad
*
* @param grant The grant identifier.
* @param abandonHost - if true then this host will be excluded
* from the list of possibilities for this request
*/
public void processBadResource(int grant, boolean abandonHost) {
synchronized (lockObject) {
Set<String> excludedHosts = null;
TaskInProgress tip = requestToTipMap.get(grant);
if (!job.canLaunchJobCleanupTask() &&
(!tip.isRunnable() ||
(tip.isRunning() &&
!(speculatedMaps.contains(tip) ||
speculatedReduces.contains(tip))))) {
// The task is not runnable anymore. Job is done/killed/failed or the
// task has finished and this is a speculative resource
// Or the task is running and this is a speculative resource
// but the speculation is no longer needed
resourceTracker.releaseResource(grant);
return;
}
if (abandonHost) {
ResourceGrant resource = resourceTracker.getGrant(grant);
String hostToExlcude = resource.getAddress().getHost();
taskToContextMap.get(tip).excludedHosts.add(hostToExlcude);
excludedHosts = taskToContextMap.get(tip).excludedHosts;
}
ResourceRequest newReq = resourceTracker.releaseAndRequestResource(grant,
excludedHosts);
requestToTipMap.put(newReq.getId(), tip);
TaskContext context = taskToContextMap.get(tip);
if (context == null) {
context = new TaskContext(newReq);
} else {
context.resourceRequests.add(newReq);
}
taskToContextMap.put(tip, context);
}
}
/**
* One iteration of core logic.
*/
void assignTasks() throws InterruptedException {
resourceTracker.processAvailableGrants(this, this.grantsPerIteration);
}
void processDeadNodes() {
if (job == null) {
return;
}
synchronized (lockObject) {
for (String deadNode : deadNodes) {
trackerStats.recordDeadTracker(deadNode);
List<TaskAttemptID> attempts =
taskLookupTable.getSuccessfulTasksForNode(deadNode);
for (TaskAttemptID attempt : attempts) {
TaskInProgress tip = taskLookupTable.getTIP(attempt);
if (tip.isMapTask()) {
// Only the map task needs to be rerun if there was a failure
job.failedTask(tip, attempt, "Lost task tracker",
TaskStatus.Phase.MAP, false, deadNode, null);
}
}
Set<Integer> grantIds = taskLookupTable.grantsInUseOnTracker(deadNode);
for (int grantId : grantIds) {
TaskAttemptID attempt = taskLookupTable.taskForGrantId(grantId);
// We are just failing the tasks, since if they are still
// to be launched the launcher will check with the trackerStats
// see that the tracker is dead and not launch them in the first
failTask(attempt, "TaskTracker is dead", false);
}
}
deadNodes.clear();
}
}
void processGrantsToRevoke() {
if (job == null) {
return;
}
Map<ResourceGrant, TaskAttemptID> processed =
new HashMap<ResourceGrant, TaskAttemptID>();
Set<String> nodesOfGrants = new HashSet<String>();
synchronized (lockObject) {
for (ResourceGrant grant : grantsToRevoke) {
TaskAttemptID attemptId = taskLookupTable.taskForGrant(grant);
TaskInProgress tip = taskLookupTable.getTIP(attemptId);
if (attemptId != null) {
if (taskLauncher.removeLaunchingTask(attemptId)) {
// Kill the task in the job since it never got launched
job.failedTask(tip, attemptId, "", TaskStatus.Phase.MAP,
false, grant.getNodeName(), null);
continue;
}
killTaskUnprotected(attemptId, false,
"Request received to kill" +
" task '" + attemptId + "' by cluster manager (grant revoked)");
processed.put(grant, attemptId);
nodesOfGrants.add(grant.getNodeName());
// Grant will get removed from the resource tracker
// when the kill takes effect and we get a response from TT.
}
}
for (String ttNode : nodesOfGrants) {
queueKillActions(ttNode);
}
}
for (Map.Entry<ResourceGrant, TaskAttemptID> entry : processed.entrySet()) {
LOG.info("Revoking resource " + entry.getKey().getId() +
" task: " + entry.getValue());
grantsToRevoke.remove(entry.getKey());
}
}
void processTaskLaunchError(TaskTrackerAction ttAction) {
if (ttAction instanceof LaunchTaskAction) {
LaunchTaskAction launchTaskAction = (LaunchTaskAction) ttAction;
TaskAttemptID attempt = launchTaskAction.getTask().getTaskID();
expiredLaunchingTask(attempt);
} else if (ttAction instanceof KillTaskAction) {
KillTaskAction killTaskAction = (KillTaskAction) ttAction;
TaskAttemptID attempt = killTaskAction.getTaskID();
failTask(attempt, "TaskTracker is dead", false);
}
}
/**
* A thread to update resource requests/releases.
*/
protected class ResourceUpdater implements Runnable {
void notifyThread() {
synchronized (this) {
this.notify();
}
}
void waitToBeNotified() throws InterruptedException {
synchronized (this) {
this.wait(1000L);
}
}
@Override
public void run() {
while (running) {
try {
// Check if session had errors in heartbeating.
// We need this to detect lost sessions early.
if (sessionDriver != null) {
IOException sessionException = sessionDriver.getFailed();
if (sessionException != null) {
killJobOnSessionError(sessionException, SessionStatus.KILLED);
return;
}
}
waitToBeNotified();
processGrantsToRevoke();
updateSpeculativeResources();
processDeadNodes();
try {
updateResources();
} catch (IOException e) {
killJobOnSessionError(e, SessionStatus.KILLED_ABORTED);
return;
}
} catch (InterruptedException ie) {
// ignore. if shutting down, while cond. will catch it
continue;
}
}
}
private void killJobOnSessionError(IOException e, SessionStatus s) {
sessionEndStatus = s;
// Just log the exception name, the stack trace would have been logged
// earlier.
LOG.error("Killing job because session indicated error " + e);
// Kill the job in a new thread, since killJob() will call
// close() eventually, and that will try to join() all the
// existing threads, including the thread calling this function.
new Thread(new Runnable() {
@Override
public void run() {
try {
killJob(CoronaJobTracker.this.jobId);
} catch (IOException ignored) {
LOG.warn("Ignoring exception while killing job", ignored);
}
}
}).start();
}
public void updateResources() throws IOException {
if (job == null) {
return;
}
if (sessionDriver != null) {
List<ResourceRequest> newRequests =
resourceTracker.getWantedResources();
if (!newRequests.isEmpty()) {
sessionDriver.requestResources(newRequests);
}
List<ResourceRequest> toRelease =
resourceTracker.getResourcesToRelease();
if (!toRelease.isEmpty()) {
sessionDriver.releaseResources(toRelease);
}
}
// Check that all resources make sense
checkTasksResource(TaskType.MAP);
checkTasksResource(TaskType.REDUCE);
}
/**
* This method copies the requests and adds all the hosts
* currently used to run the attempts of the TIP to the list
* of excluded and removes them from the list of requested.
* This way when we request a resource for speculation it will
* not be given on the host that is already running an attempt
*
* @param req the request to copy
* @param tip the task in progress of this request. It is being used
* to figure out which hosts are running attempts of this task.
*/
private void excludeHostsUnprotected(ResourceRequest req,
TaskInProgress tip) {
Set<String> excludedHosts = new HashSet<String>();
excludedHosts.addAll(taskToContextMap.get(tip).excludedHosts);
for (TaskAttemptID tid : tip.getAllTaskAttemptIDs()) {
Integer runningGrant = taskLookupTable.getGrantIdForTask(tid);
if (runningGrant == null) {
// This task attempt is no longer running
continue;
}
ResourceGrant resource = resourceTracker.getGrant(runningGrant);
String tidHost = resource.getAddress().getHost();
excludedHosts.add(tidHost);
}
req.setExcludeHosts(new ArrayList<String>(excludedHosts));
List<String> newHosts = new ArrayList<String>();
if (req.getHosts() != null) {
for (String host : req.getHosts()) {
if (!excludedHosts.contains(host)) {
newHosts.add(host);
}
}
req.setHosts(newHosts);
}
}
public void updateSpeculativeResources() {
if (job == null) {
return;
}
// Update resource requests based on speculation.
if (job.getStatus().getRunState() == JobStatus.RUNNING) {
job.updateSpeculationCandidates();
}
synchronized (lockObject) {
List<TaskInProgress> maps = job.getSpeculativeCandidates(TaskType.MAP);
if (maps != null) {
for (TaskInProgress tip : maps) {
if (!speculatedMaps.contains(tip)) {
// Speculate the tip
ResourceRequest req =
resourceTracker.newMapRequest(tip.getSplitLocations());
excludeHostsUnprotected(req, tip);
registerNewRequestForTip(tip, req);
}
}
speculatedMaps.clear();
speculatedMaps.addAll(maps);
}
List<TaskInProgress> reduces = job
.getSpeculativeCandidates(TaskType.REDUCE);
if (reduces != null) {
for (TaskInProgress tip : reduces) {
if (!speculatedReduces.contains(tip)) {
// Speculate the tip
ResourceRequest req = resourceTracker.newReduceRequest();
excludeHostsUnprotected(req, tip);
registerNewRequestForTip(tip, req);
}
}
speculatedReduces.clear();
speculatedReduces.addAll(reduces);
}
}
}
private void checkTasksResource(TaskType type) throws IOException {
synchronized (lockObject) {
if (!job.inited()) {
return;
}
if (type == TaskType.REDUCE && !job.areReducersInitialized()) {
return;
}
TaskInProgress[] tasks = job.getTasks(type);
for (TaskInProgress tip : tasks) {
// Check that tip is either:
if (tip.isRunnable()) {
// There should be requests for this tip since it is not done yet
List<ResourceRequest> requestIds =
taskToContextMap.get(tip).resourceRequests;
if (requestIds == null || requestIds.size() == 0) {
// This task should be runnable, but it doesn't
// have requests which means it will never run
throw new IOException("Tip " + tip.getTIPId() +
" doesn't have resources " + "requested");
}
}
}
}
}
}
Task getSetupAndCleanupTasks(String taskTrackerName, String hostName,
boolean isMapGrant) {
Task t = null;
t = job.obtainJobCleanupTask(taskTrackerName, hostName, isMapGrant);
if (t == null) {
t = job.obtainJobSetupTask(taskTrackerName, hostName, isMapGrant);
}
return t;
}
void updateTaskStatuses(TaskTrackerStatus status) {
String trackerName = status.getTrackerName();
for (TaskStatus report : status.getTaskReports()) {
report.setTaskTracker(trackerName);
TaskAttemptID taskId = report.getTaskID();
// Remove it from the expired task list
if (report.getRunState() != TaskStatus.State.UNASSIGNED) {
expireTasks.removeTask(taskId);
}
if (report.getRunState() == TaskStatus.State.RUNNING) {
expireTasks.updateTask(taskId);
}
if (!this.jobId.equals(taskId.getJobID())) {
LOG.warn("Task " + taskId +
" belongs to unknown job " + taskId.getJobID());
continue;
}
TaskInProgress tip = taskLookupTable.getTIP(taskId);
if (tip == null) {
continue;
}
// Clone TaskStatus object here, because CoronaJobInProgress
// or TaskInProgress can modify this object and
// the changes should not get reflected in TaskTrackerStatus.
// An old TaskTrackerStatus is used later in countMapTasks, etc.
job.updateTaskStatus(tip, (TaskStatus) report.clone(), status);
setupReduceRequests(job);
List<TaskInProgress> failedTips = processFetchFailures(report);
}
}
@Override
public void taskStateChange(TaskStatus.State state, TaskInProgress tip,
TaskAttemptID taskid) {
LOG.info("The state of " + taskid + " changed to " + state);
processTaskResource(state, tip, taskid);
}
private void processTaskResource(TaskStatus.State state, TaskInProgress tip,
TaskAttemptID taskid) {
if (!TaskStatus.TERMINATING_STATES.contains(state)) {
return;
}
expireTasks.finishedTask(taskid);
Integer grantId = taskLookupTable.getGrantIdForTask(taskid);
// The TIP that this grant was issued for originally
// if tip is not equal to assignedTip then the grant was borrowed
TaskInProgress assignedTip = requestToTipMap.get(grantId);
taskLookupTable.removeTaskEntry(taskid);
ResourceGrant grant = resourceTracker.getGrant(grantId);
String trackerName = null;
if (grant != null) {
trackerName = grant.nodeName;
}
if (trackerName != null) {
if (state == TaskStatus.State.SUCCEEDED) {
trackerStats.recordSucceededTask(trackerName);
} else if (state == TaskStatus.State.FAILED_UNCLEAN) {
trackerStats.recordFailedTask(trackerName);
} else if (state == TaskStatus.State.KILLED_UNCLEAN) {
trackerStats.recordKilledTask(trackerName);
}
}
if (state == TaskStatus.State.SUCCEEDED) {
assert grantId != null : "Grant for task id " + taskid + " is null!";
TaskType taskType = tip.getAttemptType(taskid);
if (taskType == TaskType.MAP || taskType == TaskType.REDUCE) {
// Ignore cleanup tasks types.
taskLookupTable.addSuccessfulTaskEntry(taskid, trackerName);
}
if (job.shouldReuseTaskResource(tip) || !assignedTip.equals(tip)) {
resourceTracker.reuseGrant(grantId);
} else {
resourceTracker.releaseResource(grantId);
}
} else {
if (grantId == null) {
// grant could be null if the task reached a terminating state twice,
// e.g. succeeded then failed due to a fetch failure. Or if a TT
// dies after after a success
if (tip.isMapTask()) {
registerNewRequestForTip(tip,
resourceTracker.newMapRequest(tip.getSplitLocations()));
} else {
registerNewRequestForTip(tip, resourceTracker.newReduceRequest());
}
} else {
boolean excludeResource = state != TaskStatus.State.KILLED &&
state != TaskStatus.State.KILLED_UNCLEAN;
processBadResource(grantId, excludeResource);
}
}
}
private List<TaskInProgress> processFetchFailures(TaskStatus taskStatus) {
List<TaskInProgress> failedMaps = new ArrayList<TaskInProgress>();
List<TaskAttemptID> failedFetchMaps = taskStatus.getFetchFailedMaps();
if (failedFetchMaps != null) {
TaskAttemptID reportingAttempt = taskStatus.getTaskID();
for (TaskAttemptID mapTaskId : failedFetchMaps) {
TaskInProgress failedFetchMap = taskLookupTable.getTIP(mapTaskId);
if (failedFetchMap != null) {
// Gather information about the map which has to be failed, if need be
String failedFetchTrackerName =
taskLookupTable.getAssignedTracker(mapTaskId);
if (failedFetchTrackerName == null) {
failedFetchTrackerName = "Lost task tracker";
}
if (job.fetchFailureNotification(reportingAttempt, failedFetchMap,
mapTaskId, failedFetchTrackerName)) {
failedMaps.add(failedFetchMap);
}
} else {
LOG.warn("Could not find TIP for " + failedFetchMap);
}
}
}
return failedMaps;
}
/**
* A tracker wants to know if any of its Tasks can be committed
* @param tts The task tracker status
* @return The commit actions.
*/
List<CommitTaskAction> getCommitActions(TaskTrackerStatus tts) {
synchronized (lockObject) {
List<CommitTaskAction> saveList = new ArrayList<CommitTaskAction>();
List<TaskStatus> taskStatuses = tts.getTaskReports();
if (taskStatuses != null) {
for (TaskStatus taskStatus : taskStatuses) {
if (taskStatus.getRunState() == TaskStatus.State.COMMIT_PENDING) {
TaskAttemptID taskId = taskStatus.getTaskID();
TaskInProgress tip = taskLookupTable.getTIP(taskId);
if (tip == null) {
continue;
}
if (tip.shouldCommit(taskId)) {
Integer grant = taskLookupTable.getGrantIdForTask(taskId);
if (grant != null) {
InetAddress addr = Utilities.appInfoToAddress(
resourceTracker.getGrant(grant).getAppInfo());
CommitTaskAction commitAction = new CommitTaskAction(taskId);
saveList.add(commitAction);
LOG.debug(tts.getTrackerName() +
" -> CommitTaskAction: " + taskId);
}
}
}
}
}
return saveList;
}
}
CoronaJobInProgress createJob(JobID jobId, JobConf defaultConf)
throws IOException {
checkJobId(jobId);
return new CoronaJobInProgress(
lockObject, jobId, new Path(getSystemDir()), defaultConf,
taskLookupTable, this, jobHistory, getUrl());
}
private void registerNewRequestForTip(
TaskInProgress tip, ResourceRequest req) {
requestToTipMap.put(req.getId(), tip);
TaskContext context = taskToContextMap.get(tip);
if (context == null) {
context = new TaskContext(req);
} else {
context.resourceRequests.add(req);
}
taskToContextMap.put(tip, context);
resourceTracker.recordRequest(req);
}
private void setupMapRequests(CoronaJobInProgress jip) {
synchronized (lockObject) {
TaskInProgress[] maps = jip.getTasks(TaskType.MAP);
for (TaskInProgress map : maps) {
ResourceRequest req =
resourceTracker.newMapRequest(map.getSplitLocations());
registerNewRequestForTip(map, req);
}
}
}
private void setupReduceRequests(CoronaJobInProgress jip) {
synchronized (lockObject) {
if (jip.scheduleReducesUnprotected() && !jip.initializeReducers()) {
TaskInProgress[] reduces = jip.getTasks(TaskType.REDUCE);
for (TaskInProgress reduce : reduces) {
ResourceRequest req = resourceTracker.newReduceRequest();
registerNewRequestForTip(reduce, req);
}
}
}
}
JobStatus startJob(CoronaJobInProgress jip, SessionDriver driver)
throws IOException {
synchronized (lockObject) {
this.job = jip;
}
if (job.isJobEmpty()) {
job.completeEmptyJob();
closeIfComplete(false);
return job.getStatus();
} else if (!job.isSetupCleanupRequired()) {
job.completeSetup();
}
setupMapRequests(job);
setupReduceRequests(job);
resourceUpdater.notifyThread();
return job.getStatus();
}
CoronaJobInProgress getJob() {
return job;
}
@Override
public JobInProgressTraits getJobInProgress(JobID jobId) {
checkJobId(jobId);
return this.job;
}
@Override
public long getProtocolVersion(String protocol, long clientVersion)
throws IOException {
if (protocol.equals(JobSubmissionProtocol.class.getName())) {
return JobSubmissionProtocol.versionID;
} else if (protocol.equals(InterTrackerProtocol.class.getName())) {
return InterTrackerProtocol.versionID;
} else {
throw new IOException("Unknown protocol " + protocol);
}
}
public void killJobFromWebUI(JobID jobId) throws IOException {
checkJobId(jobId);
LOG.info("Killing job from Web UI " + jobId);
job.kill();
closeIfComplete(true);
}
private boolean canStartLocalJT(JobConf jobConf) {
boolean forceRemote = jobConf.getBoolean(
"mapred.coronajobtracker.forceremote", false);
if (isStandalone) {
// If we are running in standalone (remote) mode, start the tracker.
return true;
} else {
// We are running in the client process.
if (forceRemote) {
// If remote mode is forced, should not start tracker.
return false;
} else {
// Remote mode is not forced, go remote if there are too many
// map tasks.
return jobConf.getNumMapTasks() <=
jobConf.getInt(STANDALONE_CJT_THRESHOLD_CONF,
STANDALONE_CJT_THRESHOLD_DEFAULT);
}
}
}
//////////////////////////////////////////////////////////////////////////////
// JobSubmissionProtocol
//////////////////////////////////////////////////////////////////////////////
/**
* Returns a unique JobID for a new job.
* CoronaJobTracker can only run a single job and it's id is fixed a-priori
* @return the job ID.
*/
@Override
public JobID getNewJobId() throws IOException {
int value = jobCounter.incrementAndGet();
if (value > 1) {
throw new RuntimeException(
"CoronaJobTracker can only run one job! (value=" + value + ")");
}
createSession();
// the jobtracker can run only a single job. it's jobid is fixed based
// on the sessionId.
jobId = CoronaJobTracker.jobIdFromSessionId(sessionId);
return jobId;
}
@Override
public JobStatus submitJob(JobID jobId) throws IOException {
// In stand-alone mode, the parent would have submitted the correct
// configuration and we can be comfortable about using the configuration.
// Otherwise, the job client is in the same process as this, and we must
// be able to get a cached configuration.
JobConf jobConf = isStandalone ? this.conf :
JobClient.getAndRemoveCachedJobConf(jobId);
if (canStartLocalJT(jobConf)) {
startFullTracker();
CoronaJobInProgress jip = createJob(jobId, jobConf);
if (sessionDriver != null) {
sessionDriver.setName(jobConf.getJobName());
sessionDriver.setUrl(getUrl());
sessionDriver.setPriority(jip.getPriority());
sessionDriver.setDeadline(jip.getJobDeadline());
}
jip.initTasks();
return startJob(jip, sessionDriver);
} else {
if (sessionDriver != null) {
sessionDriver.setName("Launch pending for " + jobConf.getJobName());
}
CoronaJobInProgress.uploadCachedSplits(jobId, jobConf, getSystemDir());
startRestrictedTracker(jobId, jobConf);
remoteJT.waitForJTStart(jobConf);
JobStatus status = remoteJT.submitJob(jobId);
String url = remoteJT.getJobProfile(jobId).getURL().toString();
if (sessionDriver != null) {
sessionDriver.setName("Launched session " +
remoteJT.getRemoteSessionId());
sessionDriver.setUrl(url);
}
return status;
}
}
@Override
public ClusterStatus getClusterStatus(boolean detailed) throws IOException {
throw new UnsupportedOperationException(
"getClusterStatus is not supported by CoronaJobTracker");
}
@Override
public void killJob(JobID jobId) throws IOException {
if (jobId.equals(this.jobId)) {
LOG.info("Killing owned job " + jobId);
if (remoteJT == null) {
job.kill();
closeIfComplete(false);
} else {
remoteJT.killJob(jobId);
LOG.info("Successfully killed " + jobId + " on remote JT, closing");
try {
close(false);
} catch (InterruptedException e) {
throw new IOException(e);
}
}
} else {
String sessionId = sessionIdFromJobID(jobId);
LOG.info("Killing session " + sessionId + " for non-owned job " + jobId);
CoronaClient.killSession(sessionId, conf);
}
}
@Override
public void setJobPriority(JobID jobId, String priority) throws IOException {
if (!this.jobId.equals(jobId)) {
throw new IOException("JobId " + jobId +
" does not match the expected id of: " + this.jobId);
}
SessionPriority newPrio = SessionPriority.valueOf(priority);
sessionDriver.setPriority(newPrio);
job.setPriority(newPrio);
}
@Override
public boolean killTask(TaskAttemptID taskId, boolean shouldFail)
throws IOException {
if (remoteJT == null) {
synchronized (lockObject) {
return killTaskUnprotected(taskId, shouldFail,
"Request received to " + (shouldFail ? "fail" : "kill") +
" task '" + taskId + "' by user");
}
} else {
return remoteJT.killTask(taskId, shouldFail);
}
}
private boolean killTaskUnprotected(TaskAttemptID taskId, boolean shouldFail,
String diagnosticInfo) {
TaskInProgress tip = taskLookupTable.getTIP(taskId);
return tip.killTask(taskId, shouldFail, diagnosticInfo);
}
@Override
public JobProfile getJobProfile(JobID jobId) throws IOException {
if (!this.jobId.equals(jobId)) {
return null;
} else {
if (remoteJT == null) {
return this.job.getProfile();
} else {
return remoteJT.getJobProfile(jobId);
}
}
}
@Override
public JobStatus getJobStatus(JobID jobId) throws IOException {
JobStatus status = null;
if (this.jobId.equals(jobId)) {
if (remoteJT == null) {
status = this.job.getStatus();
if (status.isJobComplete()) {
synchronized (lockObject) {
while (!closed) {
try {
lockObject.wait();
} catch (InterruptedException iex) {
throw new IOException(iex);
}
}
}
}
} else {
status = remoteJT.getJobStatus(jobId);
if (status.isJobComplete()) {
try {
close(false);
} catch (InterruptedException e) {
throw new IOException(e);
}
}
}
}
return status;
}
@Override
public Counters getJobCounters(JobID jobId) throws IOException {
if (!this.jobId.equals(jobId)) {
return null;
} else {
if (remoteJT == null) {
return this.job.getCounters();
} else {
return remoteJT.getJobCounters(jobId);
}
}
}
@Override
public TaskReport[] getMapTaskReports(JobID jobId) throws IOException {
checkJobId(jobId);
if (remoteJT == null) {
synchronized (lockObject) {
return super.getMapTaskReportsImpl(jobId);
}
} else {
return remoteJT.getMapTaskReports(jobId);
}
}
@Override
public TaskReport[] getReduceTaskReports(JobID jobId) throws IOException {
checkJobId(jobId);
if (remoteJT == null) {
synchronized (lockObject) {
return super.getReduceTaskReportsImpl(jobId);
}
} else {
return remoteJT.getReduceTaskReports(jobId);
}
}
@Override
public TaskReport[] getCleanupTaskReports(JobID jobId) throws IOException {
checkJobId(jobId);
if (remoteJT == null) {
synchronized (lockObject) {
return super.getCleanupTaskReportsImpl(jobId);
}
} else {
return remoteJT.getCleanupTaskReports(jobId);
}
}
@Override
public TaskReport[] getSetupTaskReports(JobID jobId) throws IOException {
checkJobId(jobId);
if (remoteJT == null) {
synchronized (lockObject) {
return super.getSetupTaskReportsImpl(jobId);
}
} else {
return remoteJT.getSetupTaskReports(jobId);
}
}
@Override
public String getFilesystemName() throws IOException {
return null;
}
@Override
public JobStatus[] jobsToComplete() { return null; }
@Override
public JobStatus[] getAllJobs() { return null; }
@Override
public TaskCompletionEvent[] getTaskCompletionEvents(JobID jobid,
int fromEventId, int maxEvents) throws IOException {
maxEvents = Math.min(maxEvents, maxEventsPerRpc);
if (!this.jobId.equals(jobId)) {
return TaskCompletionEvent.EMPTY_ARRAY;
} else {
if (remoteJT == null) {
return job.getTaskCompletionEvents(fromEventId, maxEvents);
} else {
return remoteJT.getTaskCompletionEvents(jobid,
fromEventId, maxEvents);
}
}
}
@Override
public String[] getTaskDiagnostics(TaskAttemptID taskId) throws IOException {
if (remoteJT == null) {
synchronized (lockObject) {
return super.getTaskDiagnosticsImpl(taskId);
}
} else {
return remoteJT.getTaskDiagnostics(taskId);
}
}
@Override
public String getSystemDir() {
return getSystemDir(fs, conf);
}
public static String getSystemDir(FileSystem fs, Configuration conf) {
Path sysDir = new Path(conf.get(SYSTEM_DIR_KEY, DEFAULT_SYSTEM_DIR));
java.net.URI uri = sysDir.toUri();
if (uri.getScheme() != null && uri.getAuthority() != null) {
return sysDir.toString();
} else {
return fs.makeQualified(sysDir).toString();
}
}
@Override
public JobQueueInfo[] getQueues() { return null; }
@Override
public JobQueueInfo getQueueInfo(String queue) { return null; }
@Override
public JobStatus[] getJobsFromQueue(String queue) { return null; }
@Override
public QueueAclsInfo[] getQueueAclsForCurrentUser() throws IOException {
return null;
}
//////////////////////////////////////////////////////////////////////////////
// SessionDriverService.Iface
//////////////////////////////////////////////////////////////////////////////
@Override
public void grantResource(String handle, List<ResourceGrant> granted) {
String msg = "Received " + granted.size() + " new grants ";
if (LOG.isDebugEnabled()) {
LOG.debug(msg + granted.toString());
} else {
LOG.info(msg);
}
resourceTracker.addNewGrants(granted);
}
@Override
public void revokeResource(String handle,
List<ResourceGrant> revoked, boolean force) {
synchronized (lockObject) {
grantsToRevoke.addAll(revoked);
}
LOG.info("Giving up " + revoked.size() + " grants: " +
revoked.toString());
}
@Override
public void processDeadNode(String handle, String deadNode) {
// CM declared the node as lost so we can process it quickly
synchronized (lockObject) {
deadNodes.add(deadNode);
}
LOG.info("Node " + deadNode + " declared dead by the CM");
}
/////////////////////////////////////////////////////////////////////////////
// InterTrackerProtocol
/////////////////////////////////////////////////////////////////////////////
@Override
public String getBuildVersion() throws IOException {
return VersionInfo.getBuildVersion();
}
@Override
public HeartbeatResponse heartbeat(TaskTrackerStatus status,
boolean restarted, boolean initialContact, boolean acceptNewTasks,
short responseId) throws IOException {
updateTaskStatuses(status);
String trackerName = status.getTrackerName();
// remember the last known status of this task tracker
// This is a ConcurrentHashMap, so no lock required.
taskTrackerStatus.put(trackerName, status);
// Check for tasks whose outputs can be saved
List<CommitTaskAction> commitActions = getCommitActions(status);
for (CommitTaskAction action: commitActions) {
taskLauncher.commitTask(
trackerName, resourceTracker.getTrackerAddr(trackerName), action);
}
// Return an empty response since the actions are sent separately.
short newResponseId = (short) (responseId + 1);
HeartbeatResponse response =
new HeartbeatResponse(newResponseId, new TaskTrackerAction[0]);
response.setHeartbeatInterval(getNextHeartbeatInterval());
queueKillActions(trackerName);
closeIfComplete(false);
return response;
}
private void queueKillActions(String trackerName) {
List<KillTaskAction> killActions =
taskLookupTable.getTasksToKill(trackerName);
InetAddress addr =
resourceTracker.getTrackerAddr(trackerName);
taskLauncher.killTasks(trackerName, addr, killActions);
}
private int getNextHeartbeatInterval() {
return heartbeatInterval;
}
@Override
public void reportTaskTrackerError(String taskTrackerName, String errorClass,
String errorMessage) throws IOException {
LOG.warn("reportTaskTrackerError is not implemented in Corona JT, " +
"params are " + taskTrackerName + "," + errorClass + "," + errorMessage);
}
@Override
public ProtocolSignature getProtocolSignature(String protocol,
long clientVersion, int clientMethodsHash) throws IOException {
return ProtocolSignature.getProtocolSignature(
this, protocol, clientVersion, clientMethodsHash);
}
public int getInfoPort() {
return infoPort;
}
public TaskTrackerStatus getTaskTrackerStatus(String trackerID) {
synchronized (lockObject) {
return taskTrackerStatus.get(trackerID);
}
}
/**
* Based on the resource type, get a resource report of the grant # and
* task #. Used by coronajobresources.jsp for debugging which resources are
* being used
*
* @param resourceType Map or reduce type
* @return List of the resource reports for the appropriate type sorted by id.
*/
public List<ResourceReport> getResourceReportList(String resourceType) {
Map<Integer, ResourceReport> resourceReportMap =
new TreeMap<Integer, ResourceReport>();
synchronized (lockObject) {
for (Map.Entry<TaskAttemptID, Integer> entry :
taskLookupTable.taskIdToGrantMap.entrySet()) {
if ((resourceType.equals("map") && entry.getKey().isMap()) ||
(resourceType.equals("reduce") && !entry.getKey().isMap())) {
resourceReportMap.put(entry.getValue(),
new ResourceReport(entry.getValue(), entry.getKey().toString()));
}
}
for (Integer grantId : resourceTracker.availableResources) {
if (!resourceReportMap.containsKey(grantId)) {
resourceReportMap.put(grantId,
new ResourceReport(grantId, "Available (currently not in use)"));
}
}
}
return new ArrayList<ResourceReport>(resourceReportMap.values());
}
public String getProxyUrl(String relativeUrl) {
return getProxyUrl(conf, relativeUrl);
}
public String getProxyJTAddr() {
return getProxyJTAddr(conf);
}
public static String getProxyJTAddr(Configuration conf) {
return conf.get("mapred.job.tracker.corona.proxyaddr", "localhost");
}
public static String getProxyUrl(Configuration conf, String relativeUrl) {
String proxyJtAddr = getProxyJTAddr(conf);
if ((proxyJtAddr != null) && (proxyJtAddr.length() > 0)) {
String ret = "http://" + proxyJtAddr + "/proxy?host=" +
conf.get("mapred.job.tracker.info.bindAddress") + "&port=" +
conf.get("mapred.job.tracker.info.port") + "&path=";
int qIndex = relativeUrl.indexOf('?');
String path = (qIndex == -1) ? relativeUrl :
relativeUrl.substring(0, qIndex);
String params = (qIndex == -1) ? null :
(qIndex == relativeUrl.length() - 1 ? null :
relativeUrl.substring(qIndex + 1));
return ret + path + ((params == null) ? "" : ("&" + params));
} else {
return relativeUrl;
}
}
public String getClusterManagerUrl() {
String httpConf = conf.get("cm.server.http.address");
if (httpConf != null) {
return "http://" + httpConf;
} else {
return "NONE";
}
}
public TrackerStats getStats() {
return trackerStats;
}
private void checkJobId(JobID jobId) {
if (!this.jobId.equals(jobId)) {
throw new RuntimeException("JobId " + jobId +
" does not match the expected id of: " + this.jobId);
}
}
/**
* Gets the resource usage (snapshot), mainly for displaying on the web
* server.
*
* @return Snapshot of resource usage
*/
public ResourceUsage getResourceUsage() {
return resourceTracker.getResourceUsage();
}
public static void main(String[] args)
throws IOException, InterruptedException {
if (args.length < 4) {
System.err.println(
"Usage: java CoronaJobTracker JOBID ATTEMPTID PARENTHOST PARENTPORT");
System.exit(-1);
}
JobID jobId = JobID.forName(args[0]);
TaskAttemptID attemptId = TaskAttemptID.forName(args[1]);
InetSocketAddress parentAddr =
new InetSocketAddress(args[2], Integer.parseInt(args[3]));
// Use the localized configuration in the working directory.
JobConf conf = new JobConf(new Path(jobId + ".xml"));
Task.loadStaticResolutions(conf);
conf.set("mapred.system.dir", System.getProperty("mapred.system.dir"));
CoronaJobTracker cjt = new CoronaJobTracker(
conf, jobId, attemptId, parentAddr);
while (cjt.running) {
Thread.sleep(1000);
}
}
/**
* Handle a task that could not be launched.
* @param taskId The task attempt ID.
*/
public void expiredLaunchingTask(TaskAttemptID taskId) {
synchronized (lockObject) {
Integer grantId = taskLookupTable.getGrantIdForTask(taskId);
if (grantId != null) {
ResourceGrant grant = resourceTracker.getGrant(grantId);
if (grant != null) {
trackerStats.recordTimeout(grant.getNodeName());
}
}
failTask(taskId, "Error launching task", false);
}
}
/**
* Handle a task that did not heartbeat in a while
* @param taskId The task attempt ID.
*/
public void expiredRunningTask(TaskAttemptID taskId) {
synchronized (lockObject) {
Integer grantId = taskLookupTable.getGrantIdForTask(taskId);
if (grantId != null) {
ResourceGrant grant = resourceTracker.getGrant(grantId);
if (grant != null) {
trackerStats.recordTimeout(grant.getNodeName());
}
}
failTask(taskId, "Timeout running task", false);
}
}
}