/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import java.io.DataInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.Vector;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.CleanupQueue.PathDeletionContext;
import org.apache.hadoop.mapred.JobHistory.Values;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.mapreduce.server.jobtracker.TaskTracker;
import org.apache.hadoop.metrics.MetricsContext;
import org.apache.hadoop.metrics.MetricsRecord;
import org.apache.hadoop.metrics.MetricsUtil;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.net.NetworkTopology;
import org.apache.hadoop.net.Node;
import org.apache.hadoop.util.StringUtils;
/*************************************************************
* JobInProgress maintains all the info for keeping
* a Job on the straight and narrow. It keeps its JobProfile
* and its latest JobStatus, plus a set of tables for
* doing bookkeeping of its Tasks.
* ***********************************************************
*/
public class JobInProgress extends JobInProgressTraits {
/**
* Used when the a kill is issued to a job which is initializing.
*/
static class KillInterruptedException extends InterruptedException {
private static final long serialVersionUID = 1L;
public KillInterruptedException(String msg) {
super(msg);
}
}
static final Log LOG = LogFactory.getLog(JobInProgress.class);
static final Log countersLog = LogFactory.getLog("Counters");
JobProfile profile;
JobStatus status;
Path jobFile = null;
Path localJobFile = null;
int numMapTasks = 0;
int numReduceTasks = 0;
long memoryPerMap;
long memoryPerReduce;
volatile int numSlotsPerMap = 1;
volatile int numSlotsPerReduce = 1;
int maxTaskFailuresPerTracker;
volatile long totalMapWaitTime = 0L;
volatile long totalReduceWaitTime = 0L;
volatile long firstMapStartTime = 0;
volatile long firstReduceStartTime = 0;
// Counters to track currently running/finished/failed Map/Reduce task-attempts
int runningMapTasks = 0;
int runningReduceTasks = 0;
int pendingMapTasks = 0;
int pendingReduceTasks = 0;
int neededMapTasks = 0;
int neededReduceTasks = 0;
int finishedMapTasks = 0;
int finishedReduceTasks = 0;
int failedMapTasks = 0;
int failedReduceTasks = 0;
int killedMapTasks = 0;
int killedReduceTasks = 0;
static final float DEFAULT_COMPLETED_MAPS_PERCENT_FOR_REDUCE_SLOWSTART = 0.05f;
int completedMapsForReduceSlowstart = 0;
int rushReduceReduces = 5;
int rushReduceMaps = 5;
// Speculate when the percentage of the unfinished maps is lower than this
public static final String SPECULATIVE_MAP_UNFINISHED_THRESHOLD_KEY =
"mapred.map.tasks.speculation.unfinished.threshold";
private float speculativeMapUnfininshedThreshold = 0.001F;
// Speculate when the percentage of the unfinished reduces is lower than this
public static final String SPECULATIVE_REDUCE_UNFINISHED_THRESHOLD_KEY =
"mapred.reduce.tasks.speculation.unfinished.threshold";
private float speculativeReduceUnfininshedThreshold = 0.001F;
// runningMapTasks include speculative tasks, so we need to capture
// speculative tasks separately
int speculativeMapTasks = 0;
int speculativeReduceTasks = 0;
boolean garbageCollected = false;
private static AtomicInteger totalSpeculativeMapTasks = new AtomicInteger(0);
private static AtomicInteger totalSpeculativeReduceTasks =
new AtomicInteger(0);
int mapFailuresPercent = 0;
int reduceFailuresPercent = 0;
int failedMapTIPs = 0;
int failedReduceTIPs = 0;
private volatile boolean launchedCleanup = false;
private volatile boolean launchedSetup = false;
private volatile boolean jobKilled = false;
private volatile boolean jobFailed = false;
boolean jobSetupCleanupNeeded;
boolean jobFinishWhenReducesDone;
boolean taskCleanupNeeded;
JobPriority priority = JobPriority.NORMAL;
JobTracker jobtracker;
// NetworkTopology Node to the set of TIPs
Map<Node, List<TaskInProgress>> nonRunningMapCache;
// Map of NetworkTopology Node to set of running TIPs
Map<Node, Set<TaskInProgress>> runningMapCache;
// A list of non-local non-running maps
List<TaskInProgress> nonLocalMaps;
// A set of non-local running maps
Set<TaskInProgress> nonLocalRunningMaps;
// A list of non-running reduce TIPs
List<TaskInProgress> nonRunningReduces;
// A set of running reduce TIPs
Set<TaskInProgress> runningReduces;
// A list of cleanup tasks for the map task attempts, to be launched
List<TaskAttemptID> mapCleanupTasks = new LinkedList<TaskAttemptID>();
// A list of cleanup tasks for the reduce task attempts, to be launched
List<TaskAttemptID> reduceCleanupTasks = new LinkedList<TaskAttemptID>();
int maxLevel;
/**
* A special value indicating that
* {@link #findNewMapTask(TaskTrackerStatus, int, int, int)} should
* schedule any available map tasks for this job, including speculative tasks.
*/
int anyCacheLevel;
/**
* A special value indicating that
* {@link #findNewMapTask(TaskTrackerStatus, int, int, int)} should
* schedule any only off-switch and speculative map tasks for this job.
*/
private static final int NON_LOCAL_CACHE_LEVEL = -1;
private int taskCompletionEventTracker = 0;
List<TaskCompletionEvent> taskCompletionEvents;
// The maximum percentage of trackers in cluster added to the 'blacklist'.
private static final double CLUSTER_BLACKLIST_PERCENT = 0.25;
// The maximum percentage of fetch failures allowed for a map
private static final double MAX_ALLOWED_FETCH_FAILURES_PERCENT = 0.5;
// No. of tasktrackers in the cluster
private volatile int clusterSize = 0;
// The no. of tasktrackers where >= conf.getMaxTaskFailuresPerTracker()
// tasks have failed
private volatile int flakyTaskTrackers = 0;
// Map of trackerHostName -> no. of task failures
private final Map<String, List<String>> trackerToFailuresMap =
new TreeMap<String, List<String>>();
//Confine estimation algorithms to an "oracle" class that JIP queries.
ResourceEstimator resourceEstimator;
volatile long startTime;
long launchTime;
long finishTime;
// Indicates how many times the job got restarted
int restartCount;
JobConf conf;
AtomicBoolean tasksInited = new AtomicBoolean(false);
private final JobInitKillStatus jobInitKillStatus = new JobInitKillStatus();
LocalFileSystem localFs;
JobID jobId;
volatile private boolean hasSpeculativeMaps;
volatile private boolean hasSpeculativeReduces;
long inputLength = 0;
private String user;
private String historyFile = "";
private boolean historyFileCopied;
// Per-job counters
public static enum Counter {
NUM_FAILED_MAPS,
NUM_FAILED_REDUCES,
TOTAL_LAUNCHED_MAPS,
TOTAL_LAUNCHED_REDUCES,
OTHER_LOCAL_MAPS,
DATA_LOCAL_MAPS,
RACK_LOCAL_MAPS,
SLOTS_MILLIS_MAPS,
SLOTS_MILLIS_REDUCES,
SLOTS_MILLIS_REDUCES_COPY,
SLOTS_MILLIS_REDUCES_SORT,
SLOTS_MILLIS_REDUCES_REDUCE,
FALLOW_SLOTS_MILLIS_MAPS,
FALLOW_SLOTS_MILLIS_REDUCES,
LOCAL_MAP_INPUT_BYTES,
RACK_MAP_INPUT_BYTES,
TOTAL_MAP_WAIT_MILLIS,
TOTAL_REDUCE_WAIT_MILLIS,
}
Counters jobCounters = new Counters();
MetricsRecord jobMetrics;
// Maximum no. of fetch-failure notifications after which
// the map task is killed
private static final int MAX_FETCH_FAILURES_NOTIFICATIONS = 3;
private static final int MAX_FETCH_FAILURES_PER_MAP_DEFAULT = 50;
private static final String MAX_FETCH_FAILURES_PER_MAP_KEY =
"mapred.job.per.map.maxfetchfailures";
private int maxFetchFailuresPerMapper;
// Map of mapTaskId -> no. of fetch failures
private final Map<TaskAttemptID, Integer> mapTaskIdToFetchFailuresMap =
new TreeMap<TaskAttemptID, Integer>();
private Object schedulingInfo;
// Don't lower speculativeCap below one TT's worth (for small clusters)
private static final int MIN_SPEC_CAP = 10;
private static final float MIN_SLOTS_CAP = 0.01f;
private static final float TOTAL_SPECULATIVECAP = 0.1f;
public static final String SPECULATIVE_SLOWTASK_THRESHOLD =
"mapreduce.job.speculative.slowtaskthreshold";
public static final String RUSH_REDUCER_MAP_THRESHOLD =
"mapred.job.rushreduce.map.threshold";
public static final String RUSH_REDUCER_REDUCE_THRESHOLD =
"mapred.job.rushreduce.reduce.threshold";
public static final String SPECULATIVECAP =
"mapreduce.job.speculative.speculativecap";
public static final String SPECULATIVE_SLOWNODE_THRESHOLD =
"mapreduce.job.speculative.slownodethreshold";
public static final String REFRESH_TIMEOUT =
"mapreduce.job.refresh.timeout";
public static final String SPECULATIVE_STDDEVMEANRATIO_MAX =
"mapreduce.job.speculative.stddevmeanratio.max";
//thresholds for speculative execution
float slowTaskThreshold;
float speculativeCap;
float slowNodeThreshold;
//Statistics are maintained for a couple of things
//mapTaskStats is used for maintaining statistics about
//the completion time of map tasks on the trackers. On a per
//tracker basis, the mean time for task completion is maintained
private final DataStatistics mapTaskStats = new DataStatistics();
//reduceTaskStats is used for maintaining statistics about
//the completion time of reduce tasks on the trackers. On a per
//tracker basis, the mean time for task completion is maintained
private final DataStatistics reduceTaskStats = new DataStatistics();
//trackerMapStats used to maintain a mapping from the tracker to the
//the statistics about completion time of map tasks
private Map<String,DataStatistics> trackerMapStats =
new HashMap<String,DataStatistics>();
//trackerReduceStats used to maintain a mapping from the tracker to the
//the statistics about completion time of reduce tasks
private Map<String,DataStatistics> trackerReduceStats =
new HashMap<String,DataStatistics>();
//runningMapStats used to maintain the RUNNING map tasks' statistics
private final DataStatistics runningMapTaskStats = new DataStatistics();
//runningReduceStats used to maintain the RUNNING reduce tasks' statistics
private final DataStatistics runningReduceTaskStats = new DataStatistics();
//Stores stats for processing rates for all the tasks in each phase
private DataStatistics runningTaskMapByteProcessingRateStats =
new DataStatistics();
private DataStatistics runningTaskMapRecordProcessingRateStats =
new DataStatistics();
private DataStatistics runningTaskCopyProcessingRateStats =
new DataStatistics();
private DataStatistics runningTaskSortProcessingRateStats =
new DataStatistics();
private DataStatistics runningTaskReduceProcessingRateStats =
new DataStatistics();
private static class FallowSlotInfo {
long timestamp;
int numSlots;
public FallowSlotInfo(long timestamp, int numSlots) {
this.timestamp = timestamp;
this.numSlots = numSlots;
}
public long getTimestamp() {
return timestamp;
}
public void setTimestamp(long timestamp) {
this.timestamp = timestamp;
}
public int getNumSlots() {
return numSlots;
}
public void setNumSlots(int numSlots) {
this.numSlots = numSlots;
}
}
private final Map<TaskTracker, FallowSlotInfo> trackersReservedForMaps =
new HashMap<TaskTracker, FallowSlotInfo>();
private final Map<TaskTracker, FallowSlotInfo> trackersReservedForReduces =
new HashMap<TaskTracker, FallowSlotInfo>();
private long lastRefresh;
private final long refreshTimeout;
private final float speculativeStddevMeanRatioMax;
private List<TaskInProgress> candidateSpeculativeMaps, candidateSpeculativeReduces;
// For tracking what task caused the job to fail.
private TaskID taskIdThatCausedFailure = null;
public static final String ENABLE_NO_FETCH_MAP_OUTPUTS = "mapred.enable.no.fetch.map.outputs";
private final boolean enableNoFetchEmptyMapOutputs;
/**
* Create an almost empty JobInProgress, which can be used only for tests
*/
protected JobInProgress(JobID jobid, JobConf conf, JobTracker tracker) {
this.conf = conf;
this.jobId = jobid;
this.numMapTasks = conf.getNumMapTasks();
this.numReduceTasks = conf.getNumReduceTasks();
this.maxLevel = NetworkTopology.DEFAULT_HOST_LEVEL;
this.anyCacheLevel = this.maxLevel+1;
this.jobtracker = tracker;
this.restartCount = 0;
this.status = new JobStatus(jobid, 0.0f, 0.0f, JobStatus.PREP);
this.profile = new JobProfile(conf.getUser(), jobid, "", "",
conf.getJobName(), conf.getQueueName());
this.memoryPerMap = conf.getMemoryForMapTask();
this.memoryPerReduce = conf.getMemoryForReduceTask();
this.maxTaskFailuresPerTracker = conf.getMaxTaskFailuresPerTracker();
this.nonLocalMaps = new LinkedList<TaskInProgress>();
this.nonLocalRunningMaps = new LinkedHashSet<TaskInProgress>();
this.runningMapCache = new IdentityHashMap<Node, Set<TaskInProgress>>();
this.nonRunningReduces = new LinkedList<TaskInProgress>();
this.runningReduces = new LinkedHashSet<TaskInProgress>();
this.resourceEstimator = new ResourceEstimator(this);
this.nonLocalMaps = new LinkedList<TaskInProgress>();
this.nonLocalRunningMaps = new LinkedHashSet<TaskInProgress>();
this.runningMapCache = new IdentityHashMap<Node, Set<TaskInProgress>>();
this.nonRunningReduces = new LinkedList<TaskInProgress>();
this.runningReduces = new LinkedHashSet<TaskInProgress>();
this.jobSetupCleanupNeeded = true;
this.jobFinishWhenReducesDone = false;
this.taskCompletionEvents = new ArrayList<TaskCompletionEvent>
(numMapTasks + numReduceTasks + 10);
this.slowTaskThreshold = Math.max(0.0f,
conf.getFloat(JobInProgress.SPECULATIVE_SLOWTASK_THRESHOLD,1.0f));
this.speculativeCap = conf.getFloat(
JobInProgress.SPECULATIVECAP,0.1f);
this.slowNodeThreshold = conf.getFloat(
JobInProgress.SPECULATIVE_SLOWNODE_THRESHOLD,1.0f);
this.refreshTimeout = conf.getLong(JobInProgress.REFRESH_TIMEOUT, 5000L);
this.speculativeStddevMeanRatioMax = conf.getFloat(
JobInProgress.SPECULATIVE_STDDEVMEANRATIO_MAX, 0.33f);
this.speculativeMapUnfininshedThreshold = conf.getFloat(
SPECULATIVE_MAP_UNFINISHED_THRESHOLD_KEY,
speculativeMapUnfininshedThreshold);
this.speculativeReduceUnfininshedThreshold = conf.getFloat(
SPECULATIVE_REDUCE_UNFINISHED_THRESHOLD_KEY,
speculativeReduceUnfininshedThreshold);
hasSpeculativeMaps = conf.getMapSpeculativeExecution();
hasSpeculativeReduces = conf.getReduceSpeculativeExecution();
LOG.info(jobId + ": hasSpeculativeMaps = " + hasSpeculativeMaps +
", hasSpeculativeReduces = " + hasSpeculativeReduces);
enableNoFetchEmptyMapOutputs = conf.getBoolean(ENABLE_NO_FETCH_MAP_OUTPUTS, false);
LOG.info(jobId + ": enableNoFetchEmptyMapOutputs = " + enableNoFetchEmptyMapOutputs);
}
/**
* Create a JobInProgress with the given job file, plus a handle
* to the tracker.
*/
public JobInProgress(JobID jobid, JobTracker jobtracker,
JobConf default_conf) throws IOException {
this(jobid, jobtracker, default_conf, 0);
}
public JobInProgress(JobID jobid, JobTracker jobtracker,
JobConf default_conf, int rCount) throws IOException {
this(jobid, jobtracker, default_conf, null, rCount);
}
JobInProgress(JobID jobid, JobTracker jobtracker,
JobConf default_conf, String user, int rCount)
throws IOException {
this.restartCount = rCount;
this.jobId = jobid;
String url = "http://" + jobtracker.getJobTrackerMachine() + ":"
+ jobtracker.getInfoPort() + "/jobdetails.jsp?jobid=" + jobid;
this.jobtracker = jobtracker;
this.status = new JobStatus(jobid, 0.0f, 0.0f, JobStatus.PREP);
this.jobtracker.getInstrumentation().addPrepJob(conf, jobid);
this.startTime = JobTracker.getClock().getTime();
status.setStartTime(startTime);
this.localFs = FileSystem.getLocal(default_conf);
JobConf default_job_conf = new JobConf(default_conf);
this.localJobFile = default_job_conf.getLocalPath(JobTracker.SUBDIR
+"/"+jobid + ".xml");
if (user == null) {
this.user = conf.getUser();
} else {
this.user = user;
}
LOG.info("User : " + this.user);
Path jobDir = jobtracker.getSystemDirectoryForJob(jobId);
FileSystem fs = jobDir.getFileSystem(default_conf);
jobFile = new Path(jobDir, "job.xml");
if (!localFs.exists(localJobFile)) {
fs.copyToLocalFile(jobFile, localJobFile);
}
conf = new JobConf(localJobFile);
this.priority = conf.getJobPriority();
this.status.setJobPriority(this.priority);
this.profile = new JobProfile(user, jobid,
jobFile.toString(), url, conf.getJobName(),
conf.getQueueName());
this.numMapTasks = conf.getNumMapTasks();
this.numReduceTasks = conf.getNumReduceTasks();
this.memoryPerMap = conf.getMemoryForMapTask();
this.memoryPerReduce = conf.getMemoryForReduceTask();
this.taskCompletionEvents = new ArrayList<TaskCompletionEvent>
(numMapTasks + numReduceTasks + 10);
this.jobSetupCleanupNeeded = conf.getJobSetupCleanupNeeded();
this.jobFinishWhenReducesDone = conf.getJobFinishWhenReducesDone();
this.taskCleanupNeeded = conf.getTaskCleanupNeeded();
LOG.info("Setup and cleanup tasks: jobSetupCleanupNeeded = " +
jobSetupCleanupNeeded + ", taskCleanupNeeded = " + taskCleanupNeeded);
this.mapFailuresPercent = conf.getMaxMapTaskFailuresPercent();
this.reduceFailuresPercent = conf.getMaxReduceTaskFailuresPercent();
this.maxTaskFailuresPerTracker = conf.getMaxTaskFailuresPerTracker();
MetricsContext metricsContext = MetricsUtil.getContext("mapred");
this.jobMetrics = MetricsUtil.createRecord(metricsContext, "job");
this.jobMetrics.setTag("user", conf.getUser());
this.jobMetrics.setTag("sessionId", conf.getSessionId());
this.jobMetrics.setTag("jobName", conf.getJobName());
this.jobMetrics.setTag("jobId", jobid.toString());
hasSpeculativeMaps = conf.getMapSpeculativeExecution();
hasSpeculativeReduces = conf.getReduceSpeculativeExecution();
this.maxLevel = jobtracker.getNumTaskCacheLevels();
this.anyCacheLevel = this.maxLevel+1;
this.nonLocalMaps = new LinkedList<TaskInProgress>();
this.nonLocalRunningMaps = new LinkedHashSet<TaskInProgress>();
this.runningMapCache = new IdentityHashMap<Node, Set<TaskInProgress>>();
this.nonRunningReduces = new LinkedList<TaskInProgress>();
this.runningReduces = new LinkedHashSet<TaskInProgress>();
this.resourceEstimator = new ResourceEstimator(this);
this.slowTaskThreshold = Math.max(0.0f,
conf.getFloat(SPECULATIVE_SLOWTASK_THRESHOLD,1.0f));
this.speculativeCap = conf.getFloat(SPECULATIVECAP,0.1f);
this.slowNodeThreshold = conf.getFloat(SPECULATIVE_SLOWNODE_THRESHOLD,1.0f);
this.refreshTimeout = conf.getLong(JobInProgress.REFRESH_TIMEOUT,
jobtracker.getJobTrackerReconfigurable().
getInitialJobRefreshTimeoutMs());
this.speculativeStddevMeanRatioMax = conf.getFloat(
JobInProgress.SPECULATIVE_STDDEVMEANRATIO_MAX, 0.33f);
this.speculativeMapUnfininshedThreshold = conf.getFloat(
SPECULATIVE_MAP_UNFINISHED_THRESHOLD_KEY,
speculativeMapUnfininshedThreshold);
this.speculativeReduceUnfininshedThreshold = conf.getFloat(
SPECULATIVE_REDUCE_UNFINISHED_THRESHOLD_KEY,
speculativeReduceUnfininshedThreshold);
enableNoFetchEmptyMapOutputs = conf.getBoolean(ENABLE_NO_FETCH_MAP_OUTPUTS, false);
LOG.info(jobId + ": enableNoFetchEmptyMapOutputs = " + enableNoFetchEmptyMapOutputs);
}
public static void copyJobFileLocally(Path jobDir, JobID jobid,
JobConf default_conf) throws IOException {
FileSystem fs = jobDir.getFileSystem(default_conf);
JobConf default_job_conf = new JobConf(default_conf);
Path localJobFile = default_job_conf.getLocalPath(JobTracker.SUBDIR + "/"
+ jobid + ".xml");
Path jobFile = new Path(jobDir, "job.xml");
fs.copyToLocalFile(jobFile, localJobFile);
}
/**
* Called periodically by JobTrackerMetrics to update the metrics for
* this job.
*/
public void updateMetrics() {
Counters counters = getCounters();
for (Counters.Group group : counters) {
jobMetrics.setTag("group", group.getDisplayName());
for (Counters.Counter counter : group) {
jobMetrics.setTag("counter", counter.getDisplayName());
jobMetrics.setMetric("value", (float) counter.getCounter());
jobMetrics.update();
}
}
}
/**
* Called when the job is complete
*/
public void cleanUpMetrics() {
// Deletes all metric data for this job (in internal table in metrics package).
// This frees up RAM and possibly saves network bandwidth, since otherwise
// the metrics package implementation might continue to send these job metrics
// after the job has finished.
jobMetrics.removeTag("group");
jobMetrics.removeTag("counter");
jobMetrics.remove();
}
private void printCache (Map<Node, List<TaskInProgress>> cache) {
LOG.info("The taskcache info:");
for (Map.Entry<Node, List<TaskInProgress>> n : cache.entrySet()) {
List <TaskInProgress> tips = n.getValue();
LOG.info("Cached TIPs on node: " + n.getKey());
for (TaskInProgress tip : tips) {
LOG.info("tip : " + tip.getTIPId());
}
}
}
Map<Node, List<TaskInProgress>> createCache(JobClient.RawSplit[] splits,
int maxLevel) {
Map<Node, List<TaskInProgress>> cache =
new IdentityHashMap<Node, List<TaskInProgress>>(maxLevel);
for (int i = 0; i < splits.length; i++) {
String[] splitLocations = splits[i].getLocations();
if (splitLocations.length == 0) {
nonLocalMaps.add(maps[i]);
continue;
}
for(String host: splitLocations) {
Node node = jobtracker.getNode(host);
if (node == null) {
node = jobtracker.resolveAndAddToTopology(host);
}
LOG.debug("tip:" + maps[i].getTIPId() + " has split on node:" + node);
for (int j = 0; j < maxLevel; j++) {
List<TaskInProgress> hostMaps = cache.get(node);
if (hostMaps == null) {
hostMaps = new ArrayList<TaskInProgress>();
cache.put(node, hostMaps);
hostMaps.add(maps[i]);
}
//check whether the hostMaps already contains an entry for a TIP
//This will be true for nodes that are racks and multiple nodes in
//the rack contain the input for a tip. Note that if it already
//exists in the hostMaps, it must be the last element there since
//we process one TIP at a time sequentially in the split-size order
if (hostMaps.get(hostMaps.size() - 1) != maps[i]) {
hostMaps.add(maps[i]);
}
node = node.getParent();
}
}
}
return cache;
}
/**
* Check if the job has been initialized.
* @return <code>true</code> if the job has been initialized,
* <code>false</code> otherwise
*/
@Override
public boolean inited() {
return tasksInited.get();
}
boolean hasRestarted() {
return restartCount > 0;
}
/**
* Get the number of slots required to run a single map task-attempt.
* @return the number of slots required to run a single map task-attempt
*/
int getNumSlotsPerMap() {
return numSlotsPerMap;
}
/**
* Set the number of slots required to run a single map task-attempt.
* This is typically set by schedulers which support high-ram jobs.
* @param slots the number of slots required to run a single map task-attempt
*/
void setNumSlotsPerMap(int numSlotsPerMap) {
this.numSlotsPerMap = numSlotsPerMap;
}
/**
* Get the number of slots required to run a single reduce task-attempt.
* @return the number of slots required to run a single reduce task-attempt
*/
int getNumSlotsPerReduce() {
return numSlotsPerReduce;
}
/**
* Set the number of slots required to run a single reduce task-attempt.
* This is typically set by schedulers which support high-ram jobs.
* @param slots the number of slots required to run a single reduce
* task-attempt
*/
void setNumSlotsPerReduce(int numSlotsPerReduce) {
this.numSlotsPerReduce = numSlotsPerReduce;
}
/**
* Construct the splits, etc. This is invoked from an async
* thread so that split-computation doesn't block anyone.
*/
public synchronized void initTasks()
throws IOException, KillInterruptedException {
if (tasksInited.get() || isComplete()) {
return;
}
synchronized(jobInitKillStatus){
if(jobInitKillStatus.killed || jobInitKillStatus.initStarted) {
return;
}
jobInitKillStatus.initStarted = true;
}
LOG.info("Initializing " + jobId);
// log job info
JobHistory.JobInfo.logSubmitted(getJobID(), conf, jobFile.toString(),
this.startTime, hasRestarted());
// log the job priority
setPriority(this.priority);
//
// read input splits and create a map per a split
//
String jobFile = profile.getJobFile();
Path sysDir = new Path(this.jobtracker.getSystemDir());
FileSystem fs = sysDir.getFileSystem(conf);
DataInputStream splitFile =
fs.open(new Path(conf.get("mapred.job.split.file")));
JobClient.RawSplit[] splits;
try {
splits = JobClient.readSplitFile(splitFile);
} finally {
splitFile.close();
}
numMapTasks = splits.length;
// if the number of splits is larger than a configured value
// then fail the job.
int maxTasks = jobtracker.getMaxTasksPerJob();
if (maxTasks > 0 && numMapTasks + numReduceTasks > maxTasks) {
throw new IOException(
"The number of tasks for this job " +
(numMapTasks + numReduceTasks) +
" exceeds the configured limit " + maxTasks);
}
jobtracker.getInstrumentation().addWaitingMaps(getJobID(), numMapTasks);
jobtracker.getInstrumentation().addWaitingReduces(getJobID(), numReduceTasks);
maps = new TaskInProgress[numMapTasks];
for(int i=0; i < numMapTasks; ++i) {
inputLength += splits[i].getDataLength();
maps[i] = new TaskInProgress(jobId, jobFile,
splits[i],
conf, this, i, numSlotsPerMap);
}
LOG.info("Input size for job " + jobId + " = " + inputLength
+ ". Number of splits = " + splits.length);
if (numMapTasks > 0) {
nonRunningMapCache = createCache(splits, maxLevel);
}
// set the launch time
this.launchTime = JobTracker.getClock().getTime();
jobtracker.getInstrumentation().addLaunchedJobs(
this.launchTime - this.startTime);
//
// Create reduce tasks
//
this.reduces = new TaskInProgress[numReduceTasks];
for (int i = 0; i < numReduceTasks; i++) {
reduces[i] = new TaskInProgress(jobId, jobFile,
numMapTasks, i,
conf, this, numSlotsPerReduce);
nonRunningReduces.add(reduces[i]);
}
// Calculate the minimum number of maps to be complete before
// we should start scheduling reduces
completedMapsForReduceSlowstart =
(int)Math.ceil(
(conf.getFloat("mapred.reduce.slowstart.completed.maps",
DEFAULT_COMPLETED_MAPS_PERCENT_FOR_REDUCE_SLOWSTART) *
numMapTasks));
// The thresholds of total maps and reduces for scheduling reducers
// immediately.
rushReduceMaps =
conf.getInt(RUSH_REDUCER_MAP_THRESHOLD, rushReduceMaps);
rushReduceReduces =
conf.getInt(RUSH_REDUCER_REDUCE_THRESHOLD, rushReduceReduces);
maxFetchFailuresPerMapper = conf.getInt(MAX_FETCH_FAILURES_PER_MAP_KEY,
MAX_FETCH_FAILURES_PER_MAP_DEFAULT);
initSetupCleanupTasks(jobFile);
synchronized(jobInitKillStatus){
jobInitKillStatus.initDone = true;
if(jobInitKillStatus.killed) {
throw new KillInterruptedException("Job " + jobId + " killed in init");
}
}
tasksInited.set(true);
JobHistory.JobInfo.logInited(profile.getJobID(), this.launchTime,
numMapTasks, numReduceTasks);
// Log the number of map and reduce tasks
LOG.info("Job " + jobId + " initialized successfully with " + numMapTasks
+ " map tasks and " + numReduceTasks + " reduce tasks.");
refreshIfNecessary();
}
// Returns true if the job is empty (0 maps, 0 reduces and no setup-cleanup)
// else return false.
synchronized boolean isJobEmpty() {
return maps.length == 0 && reduces.length == 0 && !jobSetupCleanupNeeded;
}
// Should be called once the init is done. This will complete the job
// because the job is empty (0 maps, 0 reduces and no setup-cleanup).
synchronized void completeEmptyJob() {
jobComplete();
}
synchronized void completeSetup() {
setupComplete();
}
private void initSetupCleanupTasks(String jobFile) {
if (!jobSetupCleanupNeeded) {
LOG.info("Setup/Cleanup not needed for job" + jobId);
// nothing to initialize
return;
}
// create cleanup two cleanup tips, one map and one reduce.
cleanup = new TaskInProgress[2];
// cleanup map tip. This map doesn't use any splits. Just assign an empty
// split.
JobClient.RawSplit emptySplit = new JobClient.RawSplit();
cleanup[0] = new TaskInProgress(jobId, jobFile, emptySplit,
conf, this, numMapTasks, 1);
cleanup[0].setJobCleanupTask();
// cleanup reduce tip.
cleanup[1] = new TaskInProgress(jobId, jobFile, numMapTasks,
numReduceTasks, conf, this, 1);
cleanup[1].setJobCleanupTask();
// create two setup tips, one map and one reduce.
setup = new TaskInProgress[2];
// setup map tip. This map doesn't use any split. Just assign an empty
// split.
setup[0] = new TaskInProgress(jobId, jobFile, emptySplit,
conf, this, numMapTasks + 1, 1);
setup[0].setJobSetupTask();
// setup reduce tip.
setup[1] = new TaskInProgress(jobId, jobFile, numMapTasks,
numReduceTasks + 1, conf, this, 1);
setup[1].setJobSetupTask();
}
synchronized boolean isSetupCleanupRequired() {
return jobSetupCleanupNeeded;
}
void setupComplete() {
status.setSetupProgress(1.0f);
if (this.status.getRunState() == JobStatus.PREP) {
changeStateTo(JobStatus.RUNNING);
JobHistory.JobInfo.logStarted(profile.getJobID());
}
}
/////////////////////////////////////////////////////
// Accessors for the JobInProgress
/////////////////////////////////////////////////////
public String getConf(String key) {
return this.conf.get(key);
}
public JobProfile getProfile() {
return profile;
}
@Override
public JobStatus getStatus() {
return status;
}
public synchronized long getLaunchTime() {
return launchTime;
}
public long getStartTime() {
return startTime;
}
public long getFinishTime() {
return finishTime;
}
public int desiredMaps() {
return numMapTasks;
}
public int desiredReduces() {
return numReduceTasks;
}
public boolean getMapSpeculativeExecution() {
return hasSpeculativeMaps;
}
public boolean getReduceSpeculativeExecution() {
return hasSpeculativeReduces;
}
long getMemoryForMapTask() {
return memoryPerMap;
}
long getMemoryForReduceTask() {
return memoryPerReduce;
}
public synchronized int finishedMaps() {
return finishedMapTasks;
}
public synchronized int finishedReduces() {
return finishedReduceTasks;
}
public synchronized int runningMaps() {
return runningMapTasks;
}
public synchronized int runningReduces() {
return runningReduceTasks;
}
public synchronized int pendingMaps() {
return pendingMapTasks;
}
public synchronized int pendingReduces() {
return pendingReduceTasks;
}
public synchronized int neededMaps() {
return neededMapTasks;
}
public synchronized int neededReduces() {
return neededReduceTasks;
}
// This are used in UI only, so the variable
// is volatile and non synchronized
public long getTotalMapWaitTime() {
return totalMapWaitTime;
}
// This are used in UI only, so the variable
// is volatile and non synchronized
public long getTotalReduceWaitTime() {
return totalReduceWaitTime;
}
public long getFirstMapWaitTime() {
long startTime = getStartTime();
if (firstMapStartTime == 0) {
return JobTracker.getClock().getTime() - startTime;
} else {
return firstMapStartTime - startTime;
}
}
public long getFirstReduceWaitTime() {
long startTime = getStartTime();
if (firstReduceStartTime == 0) {
return JobTracker.getClock().getTime() - startTime;
} else {
return firstReduceStartTime - startTime;
}
}
public int getNumSlotsPerTask(TaskType taskType) {
if (taskType == TaskType.MAP) {
return numSlotsPerMap;
} else if (taskType == TaskType.REDUCE) {
return numSlotsPerReduce;
} else {
return 1;
}
}
public JobPriority getPriority() {
return this.priority;
}
public void setPriority(JobPriority priority) {
if(priority == null) {
priority = JobPriority.NORMAL;
}
synchronized (this) {
this.priority = priority;
status.setJobPriority(priority);
}
// log and change to the job's priority
JobHistory.JobInfo.logJobPriority(jobId, priority);
}
// Update the job start/launch time (upon restart) and log to history
synchronized void updateJobInfo(long startTime, long launchTime) {
// log and change to the job's start/launch time
this.startTime = startTime;
this.launchTime = launchTime;
JobHistory.JobInfo.logJobInfo(jobId, startTime, launchTime);
}
/**
* Get the number of times the job has restarted
*/
@Override
public int getNumRestarts() {
return restartCount;
}
long getInputLength() {
return inputLength;
}
boolean isCleanupLaunched() {
return launchedCleanup;
}
boolean isSetupLaunched() {
return launchedSetup;
}
/**
* Get all the tasks of the desired type in this job.
* @param type {@link TaskType} of the tasks required
* @return An array of {@link TaskInProgress} matching the given type.
* Returns an empty array if no tasks are found for the given type.
*/
TaskInProgress[] getTasks(TaskType type) {
TaskInProgress[] tasks = null;
switch (type) {
case MAP:
{
tasks = maps;
}
break;
case REDUCE:
{
tasks = reduces;
}
break;
case JOB_SETUP:
{
tasks = setup;
}
break;
case JOB_CLEANUP:
{
tasks = cleanup;
}
break;
default:
{
tasks = new TaskInProgress[0];
}
break;
}
return tasks;
}
/**
* Return the nonLocalRunningMaps
* @return
*/
Set<TaskInProgress> getNonLocalRunningMaps()
{
return nonLocalRunningMaps;
}
/**
* Return the runningMapCache
* @return
*/
Map<Node, Set<TaskInProgress>> getRunningMapCache()
{
return runningMapCache;
}
/**
* Return runningReduces
* @return
*/
Set<TaskInProgress> getRunningReduces()
{
return runningReduces;
}
/**
* Get the job configuration
* @return the job's configuration
*/
JobConf getJobConf() {
return conf;
}
/**
* Get the job user/owner
* @return the job's user/owner
*/
@Override
public String getUser() {
return user;
}
@Override
public synchronized Vector<TaskInProgress> reportTasksInProgress(boolean shouldBeMap,
boolean shouldBeComplete) {
return super.reportTasksInProgress(shouldBeMap, shouldBeComplete);
}
@Override
public synchronized Vector<TaskInProgress> reportCleanupTIPs(
boolean shouldBeComplete) {
return super.reportCleanupTIPs(shouldBeComplete);
}
@Override
public synchronized Vector<TaskInProgress> reportSetupTIPs(
boolean shouldBeComplete) {
return super.reportSetupTIPs(shouldBeComplete);
}
////////////////////////////////////////////////////
// Status update methods
////////////////////////////////////////////////////
/**
* Assuming {@link JobTracker} is locked on entry.
*/
public synchronized void updateTaskStatus(TaskInProgress tip,
TaskStatus status) {
double oldProgress = tip.getProgress(); // save old progress
boolean wasRunning = tip.isRunning();
boolean wasComplete = tip.isComplete();
boolean wasPending = tip.isOnlyCommitPending();
TaskAttemptID taskid = status.getTaskID();
boolean wasAttemptRunning = tip.isAttemptRunning(taskid);
// If the TIP is already completed and the task reports as SUCCEEDED then
// mark the task as KILLED.
// In case of task with no promotion the task tracker will mark the task
// as SUCCEEDED.
// User has requested to kill the task, but TT reported SUCCEEDED,
// mark the task KILLED.
if ((wasComplete || tip.wasKilled(taskid)) &&
(status.getRunState() == TaskStatus.State.SUCCEEDED)) {
status.setRunState(TaskStatus.State.KILLED);
}
// When a task has just reported its state as FAILED_UNCLEAN/KILLED_UNCLEAN,
// if the job is complete or cleanup task is switched off,
// make the task's state FAILED/KILLED without launching cleanup attempt.
// Note that if task is already a cleanup attempt,
// we don't change the state to make sure the task gets a killTaskAction
if ((this.isComplete() || jobFailed || jobKilled || !taskCleanupNeeded) &&
!tip.isCleanupAttempt(taskid)) {
if (status.getRunState() == TaskStatus.State.FAILED_UNCLEAN) {
status.setRunState(TaskStatus.State.FAILED);
} else if (status.getRunState() == TaskStatus.State.KILLED_UNCLEAN) {
status.setRunState(TaskStatus.State.KILLED);
}
}
boolean change = tip.updateStatus(status);
if (change) {
TaskStatus.State state = status.getRunState();
// get the TaskTrackerStatus where the task ran
TaskTracker taskTracker =
this.jobtracker.getTaskTracker(tip.machineWhereTaskRan(taskid));
TaskTrackerStatus ttStatus =
(taskTracker == null) ? null : taskTracker.getStatus();
String httpTaskLogLocation = null;
if (null != ttStatus){
String host;
if (NetUtils.getStaticResolution(ttStatus.getHost()) != null) {
host = NetUtils.getStaticResolution(ttStatus.getHost());
} else {
host = ttStatus.getHost();
}
httpTaskLogLocation = "http://" + host + ":" + ttStatus.getHttpPort();
//+ "/tasklog?plaintext=true&taskid=" + status.getTaskID();
}
TaskCompletionEvent taskEvent = null;
if (state == TaskStatus.State.SUCCEEDED) {
TaskCompletionEvent.Status taskCompletionStatus = TaskCompletionEvent.Status.SUCCEEDED;
// Ensure that this is a map task.
boolean isMapTask = status.getIsMap() && !tip.isJobCleanupTask() && !tip.isJobSetupTask();
if (enableNoFetchEmptyMapOutputs && isMapTask) {
long outBytes = status.getCounters().getCounter(Task.Counter.MAP_OUTPUT_BYTES);
if (outBytes == 0) {
taskCompletionStatus = TaskCompletionEvent.Status.SUCCEEDED_NO_OUTPUT;
}
}
taskEvent = new TaskCompletionEvent(
taskCompletionEventTracker,
taskid,
tip.idWithinJob(),
isMapTask,
taskCompletionStatus,
httpTaskLogLocation
);
taskEvent.setTaskRunTime((int)(status.getFinishTime()
- status.getStartTime()));
tip.setSuccessEventNumber(taskCompletionEventTracker);
} else if (state == TaskStatus.State.COMMIT_PENDING) {
// If it is the first attempt reporting COMMIT_PENDING
// ask the task to commit.
if (!wasComplete && !wasPending) {
tip.doCommit(taskid);
}
return;
} else if (state == TaskStatus.State.FAILED_UNCLEAN ||
state == TaskStatus.State.KILLED_UNCLEAN) {
tip.incompleteSubTask(taskid, this.status);
// add this task, to be rescheduled as cleanup attempt
if (tip.isMapTask()) {
mapCleanupTasks.add(taskid);
} else {
reduceCleanupTasks.add(taskid);
}
// Remove the task entry from jobtracker
jobtracker.removeTaskEntry(taskid);
}
//For a failed task update the JT datastructures.
else if (state == TaskStatus.State.FAILED ||
state == TaskStatus.State.KILLED) {
// Get the event number for the (possibly) previously successful
// task. If there exists one, then set that status to OBSOLETE
int eventNumber;
if ((eventNumber = tip.getSuccessEventNumber()) != -1) {
TaskCompletionEvent t =
this.taskCompletionEvents.get(eventNumber);
if (t.getTaskAttemptId().equals(taskid))
t.setTaskStatus(TaskCompletionEvent.Status.OBSOLETE);
}
// Tell the job to fail the relevant task
failedTask(tip, taskid, status, taskTracker,
wasRunning, wasComplete, wasAttemptRunning);
// Did the task failure lead to tip failure?
TaskCompletionEvent.Status taskCompletionStatus =
(state == TaskStatus.State.FAILED ) ?
TaskCompletionEvent.Status.FAILED :
TaskCompletionEvent.Status.KILLED;
if (tip.isFailed()) {
taskCompletionStatus = TaskCompletionEvent.Status.TIPFAILED;
}
taskEvent = new TaskCompletionEvent(taskCompletionEventTracker,
taskid,
tip.idWithinJob(),
status.getIsMap() &&
!tip.isJobCleanupTask() &&
!tip.isJobSetupTask(),
taskCompletionStatus,
httpTaskLogLocation
);
}
// Add the 'complete' task i.e. successful/failed
// It _is_ safe to add the TaskCompletionEvent.Status.SUCCEEDED
// *before* calling TIP.completedTask since:
// a. One and only one task of a TIP is declared as a SUCCESS, the
// other (speculative tasks) are marked KILLED by the TaskCommitThread
// b. TIP.completedTask *does not* throw _any_ exception at all.
if (taskEvent != null) {
this.taskCompletionEvents.add(taskEvent);
taskCompletionEventTracker++;
JobTrackerStatistics.TaskTrackerStat ttStat = jobtracker.
getStatistics().getTaskTrackerStat(tip.machineWhereTaskRan(taskid));
if(ttStat != null) { // ttStat can be null in case of lost tracker
ttStat.incrTotalTasks();
}
if (state == TaskStatus.State.SUCCEEDED) {
completedTask(tip, status);
if(ttStat != null) {
ttStat.incrSucceededTasks();
}
}
countersLog.info(status.getTaskID() + " completion counters "
+ status.getCounters().makeJsonString());
}
}
//
// Update JobInProgress status
//
if(LOG.isDebugEnabled()) {
LOG.debug("Taking progress for " + tip.getTIPId() + " from " +
oldProgress + " to " + tip.getProgress());
}
if (!tip.isJobCleanupTask() && !tip.isJobSetupTask()) {
double progressDelta = tip.getProgress() - oldProgress;
if (tip.isMapTask()) {
this.status.setMapProgress((float) (this.status.mapProgress() +
progressDelta / maps.length));
} else {
this.status.setReduceProgress((float) (this.status.reduceProgress() +
(progressDelta / reduces.length)));
}
}
}
String getHistoryFile() {
return historyFile;
}
synchronized void setHistoryFile(String file) {
this.historyFile = file;
}
boolean isHistoryFileCopied() {
return historyFileCopied;
}
synchronized void setHistoryFileCopied() {
this.historyFileCopied = true;
}
/**
* Returns the job-level counters.
*
* @return the job-level counters.
*/
public synchronized Counters getJobCounters() {
return jobCounters;
}
/**
* Returns map phase counters by summing over all map tasks in progress.
*/
public synchronized Counters getMapCounters() {
return incrementTaskCounters(new Counters(), maps);
}
/**
* Returns map phase counters by summing over all map tasks in progress.
*/
public synchronized Counters getReduceCounters() {
return incrementTaskCounters(new Counters(), reduces);
}
/**
* Returns the total job counters, by adding together the job,
* the map and the reduce counters.
*/
public Counters getCounters() {
Counters result = new Counters();
synchronized (this) {
result.incrAllCounters(getJobCounters());
}
incrementTaskCounters(result, maps);
return incrementTaskCounters(result, reduces);
}
/**
* Increments the counters with the counters from each task.
* @param counters the counters to increment
* @param tips the tasks to add in to counters
* @return counters the same object passed in as counters
*/
private Counters incrementTaskCounters(Counters counters,
TaskInProgress[] tips) {
for (TaskInProgress tip : tips) {
counters.incrAllCounters(tip.getCounters());
}
return counters;
}
/////////////////////////////////////////////////////
// Create/manage tasks
/////////////////////////////////////////////////////
/**
* Return a MapTask, if appropriate, to run on the given tasktracker
*/
public synchronized Task obtainNewMapTask(TaskTrackerStatus tts,
int clusterSize,
int numUniqueHosts
) throws IOException {
return obtainNewMapTask(tts, clusterSize, numUniqueHosts, anyCacheLevel);
}
/**
* Return a MapTask, if appropriate, to run on the given tasktracker
*/
public synchronized Task obtainNewMapTask(TaskTrackerStatus tts,
int clusterSize,
int numUniqueHosts,
int maxCacheLevel
) throws IOException {
if (status.getRunState() != JobStatus.RUNNING) {
LOG.info("Cannot create task split for " + profile.getJobID());
return null;
}
int target = findNewMapTask(tts, clusterSize, numUniqueHosts,
maxCacheLevel);
if (target == -1) {
return null;
}
Task result = maps[target].getTaskToRun(tts.getTrackerName());
if (result != null) {
addRunningTaskToTIP(maps[target], result.getTaskID(), tts, true);
}
return result;
}
public synchronized int neededSpeculativeMaps() {
if (!hasSpeculativeMaps)
return 0;
return (candidateSpeculativeMaps != null) ?
candidateSpeculativeMaps.size() : 0;
}
public synchronized int neededSpeculativeReduces() {
if (!hasSpeculativeReduces)
return 0;
return (candidateSpeculativeReduces != null) ?
candidateSpeculativeReduces.size() : 0;
}
/*
* Return task cleanup attempt if any, to run on a given tracker
*/
public Task obtainTaskCleanupTask(TaskTrackerStatus tts,
boolean isMapSlot)
throws IOException {
if (!tasksInited.get()) {
return null;
}
if (this.status.getRunState() != JobStatus.RUNNING ||
jobFailed || jobKilled) {
return null;
}
if (isMapSlot) {
if (mapCleanupTasks.isEmpty())
return null;
} else {
if (reduceCleanupTasks.isEmpty())
return null;
}
synchronized (this) {
if (this.status.getRunState() != JobStatus.RUNNING ||
jobFailed || jobKilled) {
return null;
}
String taskTracker = tts.getTrackerName();
if (!shouldRunOnTaskTracker(taskTracker)) {
return null;
}
TaskAttemptID taskid = null;
TaskInProgress tip = null;
if (isMapSlot) {
if (!mapCleanupTasks.isEmpty()) {
taskid = mapCleanupTasks.remove(0);
tip = maps[taskid.getTaskID().getId()];
}
} else {
if (!reduceCleanupTasks.isEmpty()) {
taskid = reduceCleanupTasks.remove(0);
tip = reduces[taskid.getTaskID().getId()];
}
}
if (tip != null) {
return tip.addRunningTask(taskid, taskTracker, true);
}
return null;
}
}
public synchronized Task obtainNewLocalMapTask(TaskTrackerStatus tts,
int clusterSize,
int numUniqueHosts)
throws IOException {
if (!tasksInited.get()) {
LOG.info("Cannot create task split for " + profile.getJobID());
return null;
}
int target = findNewMapTask(tts, clusterSize, numUniqueHosts, maxLevel);
if (target == -1) {
return null;
}
Task result = maps[target].getTaskToRun(tts.getTrackerName());
if (result != null) {
addRunningTaskToTIP(maps[target], result.getTaskID(), tts, true);
}
return result;
}
public synchronized Task obtainNewNonLocalMapTask(TaskTrackerStatus tts,
int clusterSize,
int numUniqueHosts)
throws IOException {
if (!tasksInited.get()) {
LOG.info("Cannot create task split for " + profile.getJobID());
return null;
}
int target = findNewMapTask(tts, clusterSize, numUniqueHosts,
NON_LOCAL_CACHE_LEVEL);
if (target == -1) {
return null;
}
Task result = maps[target].getTaskToRun(tts.getTrackerName());
if (result != null) {
addRunningTaskToTIP(maps[target], result.getTaskID(), tts, true);
}
return result;
}
/**
* Return a CleanupTask, if appropriate, to run on the given tasktracker
*
*/
public Task obtainJobCleanupTask(TaskTrackerStatus tts,
int clusterSize,
int numUniqueHosts,
boolean isMapSlot
) throws IOException {
if(!tasksInited.get() || !jobSetupCleanupNeeded) {
return null;
}
synchronized(this) {
if (!canLaunchJobCleanupTask()) {
return null;
}
String taskTracker = tts.getTrackerName();
// Update the last-known clusterSize
this.clusterSize = clusterSize;
if (!shouldRunOnTaskTracker(taskTracker)) {
return null;
}
List<TaskInProgress> cleanupTaskList = new ArrayList<TaskInProgress>();
if (isMapSlot) {
cleanupTaskList.add(cleanup[0]);
} else {
cleanupTaskList.add(cleanup[1]);
}
TaskInProgress tip = findTaskFromList(cleanupTaskList,
tts, numUniqueHosts, false);
if (tip == null) {
return null;
}
// Now launch the cleanupTask
Task result = tip.getTaskToRun(tts.getTrackerName());
if (result != null) {
addRunningTaskToTIP(tip, result.getTaskID(), tts, true);
if (jobFailed) {
result.setJobCleanupTaskState
(org.apache.hadoop.mapreduce.JobStatus.State.FAILED);
} else if (jobKilled) {
result.setJobCleanupTaskState
(org.apache.hadoop.mapreduce.JobStatus.State.KILLED);
} else {
result.setJobCleanupTaskState
(org.apache.hadoop.mapreduce.JobStatus.State.SUCCEEDED);
}
}
return result;
}
}
/**
* Check whether cleanup task can be launched for the job.
*
* Cleanup task can be launched if it is not already launched
* or job is Killed
* or all maps and reduces are complete
* @return true/false
*/
private synchronized boolean canLaunchJobCleanupTask() {
// check if the job is running
if (status.getRunState() != JobStatus.RUNNING &&
status.getRunState() != JobStatus.PREP) {
return false;
}
// check if cleanup task has been launched already or if setup isn't
// launched already. The later check is useful when number of maps is
// zero.
if (launchedCleanup || !isSetupFinished()) {
return false;
}
// check if job has failed or killed
if (jobKilled || jobFailed) {
return true;
}
boolean mapsDone = ((finishedMapTasks + failedMapTIPs) == (numMapTasks));
boolean reducesDone = ((finishedReduceTasks + failedReduceTIPs) == numReduceTasks);
boolean mapOnlyJob = (numReduceTasks == 0);
if (mapOnlyJob) {
return mapsDone;
}
if (jobFinishWhenReducesDone) {
return reducesDone;
}
return mapsDone && reducesDone;
}
/**
* Return a SetupTask, if appropriate, to run on the given tasktracker
*
*/
public Task obtainJobSetupTask(TaskTrackerStatus tts,
int clusterSize,
int numUniqueHosts,
boolean isMapSlot
) throws IOException {
if(!tasksInited.get() || !jobSetupCleanupNeeded) {
return null;
}
synchronized(this) {
if (!canLaunchSetupTask()) {
return null;
}
String taskTracker = tts.getTrackerName();
// Update the last-known clusterSize
this.clusterSize = clusterSize;
if (!shouldRunOnTaskTracker(taskTracker)) {
return null;
}
List<TaskInProgress> setupTaskList = new ArrayList<TaskInProgress>();
if (isMapSlot) {
setupTaskList.add(setup[0]);
} else {
setupTaskList.add(setup[1]);
}
TaskInProgress tip = findTaskFromList(setupTaskList,
tts, numUniqueHosts, false);
if (tip == null) {
return null;
}
// Now launch the setupTask
Task result = tip.getTaskToRun(tts.getTrackerName());
if (result != null) {
addRunningTaskToTIP(tip, result.getTaskID(), tts, true);
}
return result;
}
}
/**
* Can we start schedule reducers?
* @return true/false
*/
public synchronized boolean scheduleReduces() {
// Start scheduling reducers if we have enough maps finished or
// if the job has very few mappers or reducers.
return numMapTasks <= rushReduceMaps ||
numReduceTasks <= rushReduceReduces ||
finishedMapTasks >= completedMapsForReduceSlowstart;
}
/**
* Check whether setup task can be launched for the job.
*
* Setup task can be launched after the tasks are inited
* and Job is in PREP state
* and if it is not already launched
* or job is not Killed/Failed
* @return true/false
*/
private synchronized boolean canLaunchSetupTask() {
return (tasksInited.get() && status.getRunState() == JobStatus.PREP &&
!launchedSetup && !jobKilled && !jobFailed);
}
/**
* Return a ReduceTask, if appropriate, to run on the given tasktracker.
* We don't have cache-sensitivity for reduce tasks, as they
* work on temporary MapRed files.
*/
public synchronized Task obtainNewReduceTask(TaskTrackerStatus tts,
int clusterSize,
int numUniqueHosts
) throws IOException {
if (status.getRunState() != JobStatus.RUNNING) {
LOG.info("Cannot create task split for " + profile.getJobID());
return null;
}
// Ensure we have sufficient map outputs ready to shuffle before
// scheduling reduces
if (!scheduleReduces()) {
return null;
}
int target = findNewReduceTask(tts, clusterSize, numUniqueHosts);
if (target == -1) {
return null;
}
Task result = reduces[target].getTaskToRun(tts.getTrackerName());
if (result != null) {
addRunningTaskToTIP(reduces[target], result.getTaskID(), tts, true);
}
return result;
}
// returns the (cache)level at which the nodes matches
private int getMatchingLevelForNodes(Node n1, Node n2) {
int count = 0;
do {
if (n1.equals(n2)) {
return count;
}
++count;
n1 = n1.getParent();
n2 = n2.getParent();
} while (n1 != null && n2 != null);
return this.maxLevel;
}
/**
* Populate the data structures as a task is scheduled.
*
* Assuming {@link JobTracker} is locked on entry.
*
* @param tip The tip for which the task is added
* @param id The attempt-id for the task
* @param tts task-tracker status
* @param isScheduled Whether this task is scheduled from the JT or has
* joined back upon restart
*/
synchronized void addRunningTaskToTIP(TaskInProgress tip, TaskAttemptID id,
TaskTrackerStatus tts,
boolean isScheduled) {
// Make an entry in the tip if the attempt is not scheduled i.e externally
// added
if (!isScheduled) {
tip.addRunningTask(id, tts.getTrackerName());
}
final JobTrackerInstrumentation metrics = jobtracker.getInstrumentation();
// keeping the earlier ordering intact
String name;
String splits = "";
Enum counter = null;
if (tip.isJobSetupTask()) {
launchedSetup = true;
name = Values.SETUP.name();
} else if (tip.isJobCleanupTask()) {
launchedCleanup = true;
name = Values.CLEANUP.name();
} else if (tip.isMapTask()) {
if (firstMapStartTime == 0) {
firstMapStartTime = JobTracker.getClock().getTime();
}
name = Values.MAP.name();
counter = Counter.TOTAL_LAUNCHED_MAPS;
splits = tip.getSplitNodes();
if (tip.getActiveTasks().size() > 1) {
speculativeMapTasks++;
if (!garbageCollected) {
totalSpeculativeMapTasks.incrementAndGet();
}
metrics.speculateMap(id, tip.isUsingProcessingRateForSpeculation());
}
metrics.launchMap(id);
} else {
if (firstReduceStartTime == 0) {
firstReduceStartTime = JobTracker.getClock().getTime();
}
name = Values.REDUCE.name();
counter = Counter.TOTAL_LAUNCHED_REDUCES;
if (tip.getActiveTasks().size() > 1) {
speculativeReduceTasks++;
if (!garbageCollected) {
totalSpeculativeReduceTasks.incrementAndGet();
}
metrics.speculateReduce(id, tip.isUsingProcessingRateForSpeculation());
}
metrics.launchReduce(id);
}
// Note that the logs are for the scheduled tasks only. Tasks that join on
// restart has already their logs in place.
if (tip.isFirstAttempt(id)) {
JobHistory.Task.logStarted(tip.getTIPId(), name,
tip.getExecStartTime(), splits);
}
if (!tip.isJobSetupTask() && !tip.isJobCleanupTask()) {
jobCounters.incrCounter(counter, 1);
}
//TODO The only problem with these counters would be on restart.
// The jobtracker updates the counter only when the task that is scheduled
// if from a non-running tip and is local (data, rack ...). But upon restart
// as the reports come from the task tracker, there is no good way to infer
// when exactly to increment the locality counters. The only solution is to
// increment the counters for all the tasks irrespective of
// - whether the tip is running or not
// - whether its a speculative task or not
//
// So to simplify, increment the data locality counter whenever there is
// data locality.
if (tip.isMapTask() && !tip.isJobSetupTask() && !tip.isJobCleanupTask()) {
// increment the data locality counter for maps
Node tracker = jobtracker.getNode(tts.getHost());
int level = this.maxLevel;
// find the right level across split locations
for (String local : maps[tip.getIdWithinJob()].getSplitLocations()) {
Node datanode = jobtracker.getNode(local);
int newLevel = this.maxLevel;
if (tracker != null && datanode != null) {
newLevel = getMatchingLevelForNodes(tracker, datanode);
}
if (newLevel < level) {
level = newLevel;
// an optimization
if (level == 0) {
break;
}
}
}
switch (level) {
case 0 :
LOG.info("Choosing data-local task " + tip.getTIPId());
jobCounters.incrCounter(Counter.DATA_LOCAL_MAPS, 1);
metrics.launchDataLocalMap(id);
break;
case 1:
LOG.info("Choosing rack-local task " + tip.getTIPId());
jobCounters.incrCounter(Counter.RACK_LOCAL_MAPS, 1);
metrics.launchRackLocalMap(id);
break;
default :
// check if there is any locality
if (level != this.maxLevel) {
LOG.info("Choosing cached task at level " + level + tip.getTIPId());
jobCounters.incrCounter(Counter.OTHER_LOCAL_MAPS, 1);
}
break;
}
}
}
/**
* Note that a task has failed on a given tracker and add the tracker
* to the blacklist iff too many trackers in the cluster i.e.
* (clusterSize * CLUSTER_BLACKLIST_PERCENT) haven't turned 'flaky' already.
*
* @param taskTracker task-tracker on which a task failed
*/
synchronized void addTrackerTaskFailure(String trackerName,
TaskTracker taskTracker,
String lastFailureReason) {
if (flakyTaskTrackers < (clusterSize * CLUSTER_BLACKLIST_PERCENT)) {
String trackerHostName = convertTrackerNameToHostName(trackerName);
List<String> trackerFailures = trackerToFailuresMap.get(trackerHostName);
if (trackerFailures == null) {
trackerFailures = new LinkedList<String>();
trackerToFailuresMap.put(trackerHostName, trackerFailures);
}
trackerFailures.add(lastFailureReason);
// Check if this tasktracker has turned 'flaky'
if (trackerFailures.size() == maxTaskFailuresPerTracker) {
++flakyTaskTrackers;
// Cancel reservations if appropriate
if (taskTracker != null) {
if (trackersReservedForMaps.containsKey(taskTracker)) {
taskTracker.unreserveSlots(TaskType.MAP, this);
}
if (trackersReservedForReduces.containsKey(taskTracker)) {
taskTracker.unreserveSlots(TaskType.REDUCE, this);
}
}
LOG.info("TaskTracker at '" + trackerHostName + "' turned 'flaky'");
}
}
}
public synchronized void reserveTaskTracker(TaskTracker taskTracker,
TaskType type, int numSlots) {
Map<TaskTracker, FallowSlotInfo> map =
(type == TaskType.MAP) ? trackersReservedForMaps : trackersReservedForReduces;
long now = JobTracker.getClock().getTime();
FallowSlotInfo info = map.get(taskTracker);
int reservedSlots = 0;
if (info == null) {
info = new FallowSlotInfo(now, numSlots);
reservedSlots = numSlots;
} else {
// Increment metering info if the reservation is changing
if (info.getNumSlots() != numSlots) {
Enum<Counter> counter =
(type == TaskType.MAP) ?
Counter.FALLOW_SLOTS_MILLIS_MAPS :
Counter.FALLOW_SLOTS_MILLIS_REDUCES;
long fallowSlotMillis = (now - info.getTimestamp()) * info.getNumSlots();
jobCounters.incrCounter(counter, fallowSlotMillis);
// Update
reservedSlots = numSlots - info.getNumSlots();
info.setTimestamp(now);
info.setNumSlots(numSlots);
}
}
map.put(taskTracker, info);
if (type == TaskType.MAP) {
jobtracker.getInstrumentation().addReservedMapSlots(reservedSlots);
}
else {
jobtracker.getInstrumentation().addReservedReduceSlots(reservedSlots);
}
jobtracker.incrementReservations(type, reservedSlots);
}
public synchronized void unreserveTaskTracker(TaskTracker taskTracker,
TaskType type) {
Map<TaskTracker, FallowSlotInfo> map =
(type == TaskType.MAP) ? trackersReservedForMaps :
trackersReservedForReduces;
FallowSlotInfo info = map.get(taskTracker);
if (info == null) {
LOG.warn("Cannot find information about fallow slots for " +
taskTracker.getTrackerName());
return;
}
long now = JobTracker.getClock().getTime();
Enum<Counter> counter =
(type == TaskType.MAP) ?
Counter.FALLOW_SLOTS_MILLIS_MAPS :
Counter.FALLOW_SLOTS_MILLIS_REDUCES;
long fallowSlotMillis = (now - info.getTimestamp()) * info.getNumSlots();
jobCounters.incrCounter(counter, fallowSlotMillis);
map.remove(taskTracker);
if (type == TaskType.MAP) {
jobtracker.getInstrumentation().decReservedMapSlots(info.getNumSlots());
}
else {
jobtracker.getInstrumentation().decReservedReduceSlots(
info.getNumSlots());
}
jobtracker.decrementReservations(type, info.getNumSlots());
}
public int getNumReservedTaskTrackersForMaps() {
return trackersReservedForMaps.size();
}
public int getNumReservedTaskTrackersForReduces() {
return trackersReservedForReduces.size();
}
private int getTrackerTaskFailures(String trackerName) {
String trackerHostName = convertTrackerNameToHostName(trackerName);
List<String> failedTasks = trackerToFailuresMap.get(trackerHostName);
return (failedTasks != null) ? failedTasks.size() : 0;
}
/**
* Get the black listed trackers for the job and corresponding errors.
*
* @return Map of blacklisted tracker names and the errors for each tracker
* that triggered blacklisting
*/
Map<String, List<String>> getBlackListedTrackers() {
Map<String, List<String>> blackListedTrackers
= new HashMap<String, List<String>>();
for (Map.Entry<String,List<String>> e : trackerToFailuresMap.entrySet()) {
if (e.getValue().size() >= maxTaskFailuresPerTracker) {
blackListedTrackers.put(e.getKey(), e.getValue());
}
}
return blackListedTrackers;
}
/**
* Get the no. of 'flaky' tasktrackers for a given job.
*
* @return the no. of 'flaky' tasktrackers for a given job.
*/
int getNoOfBlackListedTrackers() {
return flakyTaskTrackers;
}
/**
* Get the information on tasktrackers and no. of errors which occurred
* on them for a given job.
*
* @return the map of tasktrackers and no. of errors which occurred
* on them for a given job.
*/
synchronized Map<String, List<String>> getTaskTrackerErrors() {
// Clone the 'trackerToFailuresMap' and return the copy
Map<String, List<String>> trackerErrors =
new TreeMap<String, List<String>>(trackerToFailuresMap);
return trackerErrors;
}
/**
* Remove a map TIP from the lists for running maps.
* Called when a map fails/completes (note if a map is killed,
* it won't be present in the list since it was completed earlier)
* @param tip the tip that needs to be retired
*/
private synchronized void retireMap(TaskInProgress tip) {
if (runningMapCache == null) {
LOG.warn("Running cache for maps missing!! "
+ "Job details are missing.");
return;
}
String[] splitLocations = tip.getSplitLocations();
// Remove the TIP from the list for running non-local maps
if (splitLocations.length == 0) {
nonLocalRunningMaps.remove(tip);
return;
}
// Remove from the running map caches
for(String host: splitLocations) {
Node node = jobtracker.getNode(host);
for (int j = 0; j < maxLevel; ++j) {
Set<TaskInProgress> hostMaps = runningMapCache.get(node);
if (hostMaps != null) {
hostMaps.remove(tip);
if (hostMaps.size() == 0) {
runningMapCache.remove(node);
}
}
node = node.getParent();
}
}
}
/**
* Remove a reduce TIP from the list for running-reduces
* Called when a reduce fails/completes
* @param tip the tip that needs to be retired
*/
private synchronized void retireReduce(TaskInProgress tip) {
if (runningReduces == null) {
LOG.warn("Running list for reducers missing!! "
+ "Job details are missing.");
return;
}
runningReduces.remove(tip);
}
/**
* Adds a map tip to the list of running maps.
* @param tip the tip that needs to be scheduled as running
*/
protected synchronized void scheduleMap(TaskInProgress tip) {
runningMapTaskStats.add(0.0f);
runningTaskMapByteProcessingRateStats.add(0.0f);
if (runningMapCache == null) {
LOG.warn("Running cache for maps is missing!! "
+ "Job details are missing.");
return;
}
String[] splitLocations = tip.getSplitLocations();
// Add the TIP to the list of non-local running TIPs
if (splitLocations.length == 0) {
nonLocalRunningMaps.add(tip);
return;
}
for(String host: splitLocations) {
Node node = jobtracker.getNode(host);
for (int j = 0; j < maxLevel; ++j) {
Set<TaskInProgress> hostMaps = runningMapCache.get(node);
if (hostMaps == null) {
// create a cache if needed
hostMaps = new LinkedHashSet<TaskInProgress>();
runningMapCache.put(node, hostMaps);
}
hostMaps.add(tip);
node = node.getParent();
}
}
}
/**
* Adds a reduce tip to the list of running reduces
* @param tip the tip that needs to be scheduled as running
*/
protected synchronized void scheduleReduce(TaskInProgress tip) {
runningReduceTaskStats.add(0.0f);
runningTaskCopyProcessingRateStats.add(0.0f);
runningTaskSortProcessingRateStats.add(0.0f);
runningTaskReduceProcessingRateStats.add(0.0f);
if (runningReduces == null) {
LOG.warn("Running cache for reducers missing!! "
+ "Job details are missing.");
return;
}
runningReduces.add(tip);
}
/**
* Adds the failed TIP in the front of the list for non-running maps
* @param tip the tip that needs to be failed
*/
private synchronized void failMap(TaskInProgress tip) {
if (nonRunningMapCache == null) {
LOG.warn("Non-running cache for maps missing!! "
+ "Job details are missing.");
return;
}
// 1. Its added everywhere since other nodes (having this split local)
// might have removed this tip from their local cache
// 2. Give high priority to failed tip - fail early
String[] splitLocations = tip.getSplitLocations();
// Add the TIP in the front of the list for non-local non-running maps
if (splitLocations.length == 0) {
nonLocalMaps.add(0, tip);
return;
}
for(String host: splitLocations) {
Node node = jobtracker.getNode(host);
for (int j = 0; j < maxLevel; ++j) {
List<TaskInProgress> hostMaps = nonRunningMapCache.get(node);
if (hostMaps == null) {
hostMaps = new LinkedList<TaskInProgress>();
nonRunningMapCache.put(node, hostMaps);
}
hostMaps.add(0, tip);
node = node.getParent();
}
}
}
/**
* Adds a failed TIP in the front of the list for non-running reduces
* @param tip the tip that needs to be failed
*/
private synchronized void failReduce(TaskInProgress tip) {
if (nonRunningReduces == null) {
LOG.warn("Failed cache for reducers missing!! "
+ "Job details are missing.");
return;
}
nonRunningReduces.add(0, tip);
}
/**
* Find a non-running task in the passed list of TIPs
* @param tips a collection of TIPs
* @param ttStatus the status of tracker that has requested a task to run
* @param numUniqueHosts number of unique hosts that run trask trackers
* @param removeFailedTip whether to remove the failed tips
*/
private synchronized TaskInProgress findTaskFromList(
Collection<TaskInProgress> tips, TaskTrackerStatus ttStatus,
int numUniqueHosts,
boolean removeFailedTip) {
Iterator<TaskInProgress> iter = tips.iterator();
while (iter.hasNext()) {
TaskInProgress tip = iter.next();
// Select a tip if
// 1. runnable : still needs to be run and is not completed
// 2. ~running : no other node is running it
// 3. earlier attempt failed : has not failed on this host
// and has failed on all the other hosts
// A TIP is removed from the list if
// (1) this tip is scheduled
// (2) if the passed list is a level 0 (host) cache
// (3) when the TIP is non-schedulable (running, killed, complete)
if (tip.isRunnable() && !tip.isRunning()) {
// check if the tip has failed on this host
if (!tip.hasFailedOnMachine(ttStatus.getHost()) ||
tip.getNumberOfFailedMachines() >= numUniqueHosts) {
// check if the tip has failed on all the nodes
iter.remove();
return tip;
} else if (removeFailedTip) {
// the case where we want to remove a failed tip from the host cache
// point#3 in the TIP removal logic above
iter.remove();
}
} else {
// see point#3 in the comment above for TIP removal logic
iter.remove();
}
}
return null;
}
@Override
public boolean hasSpeculativeMaps() {
return hasSpeculativeMaps;
}
@Override
public boolean hasSpeculativeReduces() {
return hasSpeculativeReduces;
}
@Override
public boolean shouldSpeculateAllRemainingMaps() {
if (speculativeMapUnfininshedThreshold == 0) {
return false;
}
int unfinished = desiredMaps() - finishedMaps();
if (unfinished < desiredMaps() * speculativeMapUnfininshedThreshold ||
unfinished == 1) {
return true;
}
return false;
}
@Override
public boolean shouldSpeculateAllRemainingReduces() {
if (speculativeReduceUnfininshedThreshold == 0) {
return false;
}
int unfinished = desiredReduces() - finishedReduces();
if (unfinished < desiredReduces() * speculativeReduceUnfininshedThreshold ||
unfinished == 1) {
return true;
}
return false;
}
/**
* Given a candidate set of tasks, find and order the ones that
* can be speculated and return the same.
*/
protected synchronized List<TaskInProgress> findSpeculativeTaskCandidates
(Collection<TaskInProgress> list) {
ArrayList<TaskInProgress> candidates = new ArrayList<TaskInProgress>();
long now = JobTracker.getClock().getTime();
Iterator<TaskInProgress> iter = list.iterator();
while (iter.hasNext()) {
TaskInProgress tip = iter.next();
if (tip.canBeSpeculated(now)) {
candidates.add(tip);
}
}
if (candidates.size() > 0 ) {
Comparator<TaskInProgress> LateComparator =
new EstimatedTimeLeftComparator(now);
Collections.sort(candidates, LateComparator);
}
return candidates;
}
protected synchronized TaskInProgress findSpeculativeTask(
List<TaskInProgress> candidates, String taskTrackerName,
String taskTrackerHost, TaskType taskType) {
if ((candidates == null) || candidates.isEmpty()) {
return null;
}
if (isSlowTracker(taskTrackerName) || atSpeculativeCap(taskType)) {
return null;
}
long now = JobTracker.getClock().getTime();
Iterator<TaskInProgress> iter = candidates.iterator();
while (iter.hasNext()) {
TaskInProgress tip = iter.next();
if (tip.hasRunOnMachine(taskTrackerHost, taskTrackerName)) {
continue;
}
// either we are going to speculate this task or it's not speculatable
iter.remove();
if (!tip.canBeSpeculated(now)) {
// if it can't be speculated, then:
// A. it has completed/failed etc. - in which case makes sense to never
// speculate again
// B. it's relative progress does not allow speculation. in this case
// it's fair to treat it as if it was never eligible for speculation
// to begin with.
continue;
}
if(tip.isUsingProcessingRateForSpeculation()) {
LOG.info("Using processing rate for speculation. Chose task " +
tip.getTIPId() + " to speculate." +
" Phase: " + tip.getProcessingPhase() +
" Statistics: Task's : " +
tip.getProcessingRate(tip.getProcessingPhase()) +
" Job's : " + getRunningTaskStatistics(tip.getProcessingPhase()));
} else {
LOG.info("Using progress rate for speculation. Chose task " +
tip.getTIPId() + " to speculate." +
" Statistics: Task's : " +
tip.getProgressRate() +
" Job's : " + (tip.isMapTask() ?
runningMapTaskStats : runningReduceTaskStats));
}
return tip;
}
return null;
}
/**
* Find new map task
* @param tts The task tracker that is asking for a task
* @param clusterSize The number of task trackers in the cluster
* @param numUniqueHosts The number of hosts that run task trackers
* @param avgProgress The average progress of this kind of task in this job
* @param maxCacheLevel The maximum topology level until which to schedule
* maps.
* A value of {@link #anyCacheLevel} implies any
* available task (node-local, rack-local, off-switch and
* speculative tasks).
* A value of {@link #NON_LOCAL_CACHE_LEVEL} implies only
* off-switch/speculative tasks should be scheduled.
* @return the index in tasks of the selected task (or -1 for no task)
*/
private synchronized int findNewMapTask(final TaskTrackerStatus tts,
final int clusterSize,
final int numUniqueHosts,
final int maxCacheLevel) {
if (numMapTasks == 0) {
if(LOG.isDebugEnabled()) {
LOG.debug("No maps to schedule for " + profile.getJobID());
}
return -1;
}
String taskTracker = tts.getTrackerName();
TaskInProgress tip = null;
//
// Update the last-known clusterSize
//
this.clusterSize = clusterSize;
if (!shouldRunOnTaskTracker(taskTracker)) {
return -1;
}
// Check to ensure this TaskTracker has enough resources to
// run tasks from this job
long outSize = resourceEstimator.getEstimatedMapOutputSize();
long availSpace = tts.getResourceStatus().getAvailableSpace();
final long SAVETY_BUFFER =
conf.getLong("mapred.map.reserved.disk.mb", 300) * 1024 * 1024;
if (availSpace < outSize + SAVETY_BUFFER) {
LOG.warn("No room for map task. Node " + tts.getHost() +
" has " + availSpace +
" bytes free; The safty buffer is " + SAVETY_BUFFER +
" bytes; but we expect map to take " + outSize);
return -1; //see if a different TIP might work better.
}
// For scheduling a map task, we have two caches and a list (optional)
// I) one for non-running task
// II) one for running task (this is for handling speculation)
// III) a list of TIPs that have empty locations (e.g., dummy splits),
// the list is empty if all TIPs have associated locations
// First a look up is done on the non-running cache and on a miss, a look
// up is done on the running cache. The order for lookup within the cache:
// 1. from local node to root [bottom up]
// 2. breadth wise for all the parent nodes at max level
// We fall to linear scan of the list (III above) if we have misses in the
// above caches
Node node = jobtracker.getNode(tts.getHost());
//
// I) Non-running TIP :
//
// 1. check from local node to the root [bottom up cache lookup]
// i.e if the cache is available and the host has been resolved
// (node!=null)
if (node != null) {
Node key = node;
int level = 0;
// maxCacheLevel might be greater than this.maxLevel if findNewMapTask is
// called to schedule any task (local, rack-local, off-switch or speculative)
// tasks or it might be NON_LOCAL_CACHE_LEVEL (i.e. -1) if findNewMapTask is
// (i.e. -1) if findNewMapTask is to only schedule off-switch/speculative
// tasks
int maxLevelToSchedule = Math.min(maxCacheLevel, maxLevel);
for (level = 0;level < maxLevelToSchedule; ++level) {
List <TaskInProgress> cacheForLevel = nonRunningMapCache.get(key);
if (cacheForLevel != null) {
tip = findTaskFromList(cacheForLevel, tts,
numUniqueHosts,level == 0);
if (tip != null) {
// Add to running cache
scheduleMap(tip);
// remove the cache if its empty
if (cacheForLevel.size() == 0) {
nonRunningMapCache.remove(key);
}
return tip.getIdWithinJob();
}
}
key = key.getParent();
}
// Check if we need to only schedule a local task (node-local/rack-local)
if (level == maxCacheLevel) {
return -1;
}
}
//2. Search breadth-wise across parents at max level for non-running
// TIP if
// - cache exists and there is a cache miss
// - node information for the tracker is missing (tracker's topology
// info not obtained yet)
// collection of node at max level in the cache structure
Collection<Node> nodesAtMaxLevel = jobtracker.getNodesAtMaxLevel();
// get the node parent at max level
Node nodeParentAtMaxLevel =
(node == null) ? null : JobTracker.getParentNode(node, maxLevel - 1);
for (Node parent : nodesAtMaxLevel) {
// skip the parent that has already been scanned
if (parent == nodeParentAtMaxLevel) {
continue;
}
List<TaskInProgress> cache = nonRunningMapCache.get(parent);
if (cache != null) {
tip = findTaskFromList(cache, tts, numUniqueHosts, false);
if (tip != null) {
// Add to the running cache
scheduleMap(tip);
// remove the cache if empty
if (cache.size() == 0) {
nonRunningMapCache.remove(parent);
}
LOG.info("Choosing a non-local task " + tip.getTIPId());
return tip.getIdWithinJob();
}
}
}
// 3. Search non-local tips for a new task
tip = findTaskFromList(nonLocalMaps, tts, numUniqueHosts, false);
if (tip != null) {
// Add to the running list
scheduleMap(tip);
LOG.info("Choosing a non-local task " + tip.getTIPId());
return tip.getIdWithinJob();
}
//
// II) Running TIP :
//
if (hasSpeculativeMaps) {
tip = getSpeculativeMap(tts.getTrackerName(), tts.getHost());
if (tip != null) {
LOG.info("Choosing a non-local task " + tip.getTIPId()
+ " for speculation");
return tip.getIdWithinJob();
}
}
return -1;
}
private synchronized TaskInProgress getSpeculativeMap(String taskTrackerName,
String taskTrackerHost) {
///////// Select a TIP to run on
TaskInProgress tip = findSpeculativeTask(candidateSpeculativeMaps, taskTrackerName,
taskTrackerHost, TaskType.MAP);
if (tip != null) {
LOG.info("Choosing map task " + tip.getTIPId() +
" for speculative execution");
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("No speculative map task found for tracker " + taskTrackerName);
}
}
return tip;
}
/**
* Find new reduce task
* @param tts The task tracker that is asking for a task
* @param clusterSize The number of task trackers in the cluster
* @param numUniqueHosts The number of hosts that run task trackers
* @param avgProgress The average progress of this kind of task in this job
* @return the index in tasks of the selected task (or -1 for no task)
*/
private synchronized int findNewReduceTask(TaskTrackerStatus tts,
int clusterSize,
int numUniqueHosts) {
if (numReduceTasks == 0) {
if(LOG.isDebugEnabled()) {
LOG.debug("No reduces to schedule for " + profile.getJobID());
}
return -1;
}
String taskTracker = tts.getTrackerName();
TaskInProgress tip = null;
// Update the last-known clusterSize
this.clusterSize = clusterSize;
if (!shouldRunOnTaskTracker(taskTracker)) {
return -1;
}
long outSize = resourceEstimator.getEstimatedReduceInputSize();
long availSpace = tts.getResourceStatus().getAvailableSpace();
final long SAVETY_BUFFER =
conf.getLong("mapred.reduce.reserved.disk.mb", 300) * 1024 * 1024;
if (availSpace < outSize + SAVETY_BUFFER) {
LOG.warn("No room for reduce task. Node " + taskTracker +
" has " + availSpace +
" bytes free; The safty buffer is " + SAVETY_BUFFER +
" bytes; but we expect map to take " + outSize);
return -1; //see if a different TIP might work better.
}
// 1. check for a never-executed reduce tip
// reducers don't have a cache and so pass -1 to explicitly call that out
tip = findTaskFromList(nonRunningReduces, tts, numUniqueHosts, false);
if (tip != null) {
scheduleReduce(tip);
return tip.getIdWithinJob();
}
// 2. check for a reduce tip to be speculated
if (hasSpeculativeReduces) {
tip = getSpeculativeReduce(tts.getTrackerName(), tts.getHost());
if (tip != null) {
scheduleReduce(tip);
return tip.getIdWithinJob();
}
}
return -1;
}
private synchronized TaskInProgress getSpeculativeReduce(
String taskTrackerName, String taskTrackerHost) {
TaskInProgress tip = findSpeculativeTask(
candidateSpeculativeReduces, taskTrackerName, taskTrackerHost, TaskType.REDUCE);
if (tip != null) {
LOG.info("Choosing reduce task " + tip.getTIPId() +
" for speculative execution");
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("No speculative reduce task found for tracker " + taskTrackerHost);
}
}
return tip;
}
private boolean shouldRunOnTaskTracker(String taskTracker) {
//
// Check if too many tasks of this job have failed on this
// tasktracker prior to assigning it a new one.
//
int taskTrackerFailedTasks = getTrackerTaskFailures(taskTracker);
if ((flakyTaskTrackers < (clusterSize * CLUSTER_BLACKLIST_PERCENT)) &&
taskTrackerFailedTasks >= maxTaskFailuresPerTracker) {
if (LOG.isDebugEnabled()) {
String flakyTracker = convertTrackerNameToHostName(taskTracker);
LOG.debug("Ignoring the black-listed tasktracker: '" + flakyTracker
+ "' for assigning a new task");
}
return false;
}
return true;
}
/**
* Metering: Occupied Slots * (Finish - Start)
* @param tip {@link TaskInProgress} to be metered which just completed,
* cannot be <code>null</code>
* @param status {@link TaskStatus} of the completed task, cannot be
* <code>null</code>
*/
private void meterTaskAttempt(TaskInProgress tip, TaskStatus status) {
Counter slotCounter =
(tip.isMapTask()) ? Counter.SLOTS_MILLIS_MAPS :
Counter.SLOTS_MILLIS_REDUCES;
jobCounters.incrCounter(slotCounter,
tip.getNumSlotsRequired() *
(status.getFinishTime() - status.getStartTime()));
if (!tip.isMapTask()) {
jobCounters.incrCounter(Counter.SLOTS_MILLIS_REDUCES_COPY,
tip.getNumSlotsRequired() *
(status.getShuffleFinishTime() - status.getStartTime()));
jobCounters.incrCounter(Counter.SLOTS_MILLIS_REDUCES_SORT,
tip.getNumSlotsRequired() *
(status.getSortFinishTime() - status.getShuffleFinishTime()));
jobCounters.incrCounter(Counter.SLOTS_MILLIS_REDUCES_REDUCE,
tip.getNumSlotsRequired() *
(status.getFinishTime() - status.getSortFinishTime()));
}
}
/**
* A taskid assigned to this JobInProgress has reported in successfully.
*/
public synchronized boolean completedTask(TaskInProgress tip,
TaskStatus status)
{
TaskAttemptID taskid = status.getTaskID();
int oldNumAttempts = tip.getActiveTasks().size();
final JobTrackerInstrumentation metrics = jobtracker.getInstrumentation();
// Metering
meterTaskAttempt(tip, status);
// Sanity check: is the TIP already complete?
// It _is_ safe to not decrement running{Map|Reduce}Tasks and
// finished{Map|Reduce}Tasks variables here because one and only
// one task-attempt of a TIP gets to completedTask. This is because
// the TaskCommitThread in the JobTracker marks other, completed,
// speculative tasks as _complete_.
if (tip.isComplete()) {
// Mark this task as KILLED
tip.alreadyCompletedTask(taskid);
// Let the JobTracker cleanup this taskid if the job isn't running
if (this.status.getRunState() != JobStatus.RUNNING) {
jobtracker.markCompletedTaskAttempt(status.getTaskTracker(), taskid);
}
return false;
}
LOG.info("Task '" + taskid + "' has completed " + tip.getTIPId() +
" successfully.");
// Mark the TIP as complete
tip.completed(taskid);
resourceEstimator.updateWithCompletedTask(status, tip);
// Update jobhistory
TaskTrackerStatus ttStatus =
this.jobtracker.getTaskTrackerStatus(status.getTaskTracker());
String trackerHostname = jobtracker.getNode(ttStatus.getHost()).toString();
String taskType = getTaskType(tip);
if (status.getIsMap()){
JobHistory.MapAttempt.logStarted(status.getTaskID(), status.getStartTime(),
status.getTaskTracker(),
ttStatus.getHttpPort(),
taskType);
JobHistory.MapAttempt.logFinished(status.getTaskID(), status.getFinishTime(),
trackerHostname, taskType,
status.getStateString(),
status.getCounters());
}else{
JobHistory.ReduceAttempt.logStarted( status.getTaskID(), status.getStartTime(),
status.getTaskTracker(),
ttStatus.getHttpPort(),
taskType);
JobHistory.ReduceAttempt.logFinished(status.getTaskID(), status.getShuffleFinishTime(),
status.getSortFinishTime(), status.getFinishTime(),
trackerHostname,
taskType,
status.getStateString(),
status.getCounters());
}
JobHistory.Task.logFinished(tip.getTIPId(),
taskType,
tip.getExecFinishTime(),
status.getCounters());
int newNumAttempts = tip.getActiveTasks().size();
if (tip.isJobSetupTask()) {
// setup task has finished. kill the extra setup tip
killSetupTip(!tip.isMapTask());
setupComplete();
} else if (tip.isJobCleanupTask()) {
// cleanup task has finished. Kill the extra cleanup tip
if (tip.isMapTask()) {
// kill the reduce tip
cleanup[1].kill();
} else {
cleanup[0].kill();
}
//
// The Job is done
// if the job is failed, then mark the job failed.
if (jobFailed) {
terminateJob(JobStatus.FAILED);
}
// if the job is killed, then mark the job killed.
if (jobKilled) {
terminateJob(JobStatus.KILLED);
}
else {
jobComplete();
}
// The job has been killed/failed/successful
// JobTracker should cleanup this task
jobtracker.markCompletedTaskAttempt(status.getTaskTracker(), taskid);
} else if (tip.isMapTask()) {
// check if this was a sepculative task
if (oldNumAttempts > 1) {
speculativeMapTasks -= (oldNumAttempts - newNumAttempts);
if (!garbageCollected) {
totalSpeculativeMapTasks.addAndGet(newNumAttempts - oldNumAttempts);
}
}
if (tip.isSpeculativeAttempt(taskid)) {
metrics.speculativeSucceededMap(taskid,
tip.isUsingProcessingRateForSpeculation());
}
int level = getLocalityLevel(tip, ttStatus);
long inputBytes = tip.getCounters()
.getGroup("org.apache.hadoop.mapred.Task$Counter")
.getCounter("Map input bytes");
switch (level) {
case 0: jobCounters.incrCounter(Counter.LOCAL_MAP_INPUT_BYTES,
inputBytes);
metrics.addLocalMapInputBytes(inputBytes);
break;
case 1: jobCounters.incrCounter(Counter.RACK_MAP_INPUT_BYTES,
inputBytes);
metrics.addRackMapInputBytes(inputBytes);
break;
default:metrics.addMapInputBytes(inputBytes);
break;
}
finishedMapTasks += 1;
metrics.completeMap(taskid);
if (!garbageCollected) {
if (!tip.isJobSetupTask() && hasSpeculativeMaps) {
updateTaskTrackerStats(tip,ttStatus,trackerMapStats,mapTaskStats);
}
}
// remove the completed map from the resp running caches
retireMap(tip);
if ((finishedMapTasks + failedMapTIPs) == (numMapTasks)) {
this.status.setMapProgress(1.0f);
}
} else {
if (oldNumAttempts > 1) {
speculativeReduceTasks -= (oldNumAttempts - newNumAttempts);
if (!garbageCollected) {
totalSpeculativeReduceTasks.addAndGet(newNumAttempts - oldNumAttempts);
}
}
if (tip.isSpeculativeAttempt(taskid)) {
metrics.speculativeSucceededReduce(taskid,
tip.isUsingProcessingRateForSpeculation());
}
finishedReduceTasks += 1;
metrics.completeReduce(taskid);
if (!garbageCollected) {
if (!tip.isJobSetupTask() && hasSpeculativeReduces) {
updateTaskTrackerStats(tip,ttStatus,trackerReduceStats,reduceTaskStats);
}
}
// remove the completed reduces from the running reducers set
retireReduce(tip);
if ((finishedReduceTasks + failedReduceTIPs) == (numReduceTasks)) {
this.status.setReduceProgress(1.0f);
}
}
// is job complete?
if (!jobSetupCleanupNeeded && canLaunchJobCleanupTask()) {
jobComplete();
}
return true;
}
/**
* Job state change must happen thru this call
*/
private void changeStateTo(int newState) {
int oldState = this.status.getRunState();
if (oldState == newState) {
return; //old and new states are same
}
this.status.setRunState(newState);
//update the metrics
if (oldState == JobStatus.PREP) {
this.jobtracker.getInstrumentation().decPrepJob(conf, jobId);
} else if (oldState == JobStatus.RUNNING) {
this.jobtracker.getInstrumentation().decRunningJob(conf, jobId);
}
if (newState == JobStatus.PREP) {
this.jobtracker.getInstrumentation().addPrepJob(conf, jobId);
} else if (newState == JobStatus.RUNNING) {
this.jobtracker.getInstrumentation().addRunningJob(conf, jobId);
}
}
/**
* The job is done since all it's component tasks are either
* successful or have failed.
*/
private void jobComplete() {
final JobTrackerInstrumentation metrics = jobtracker.getInstrumentation();
//
// All tasks are complete, then the job is done!
//
if (this.status.getRunState() == JobStatus.RUNNING ||
this.status.getRunState() == JobStatus.PREP) {
changeStateTo(JobStatus.SUCCEEDED);
this.status.setCleanupProgress(1.0f);
if (maps.length == 0) {
this.status.setMapProgress(1.0f);
}
if (reduces.length == 0) {
this.status.setReduceProgress(1.0f);
}
this.finishTime = JobTracker.getClock().getTime();
LOG.info("Job " + this.status.getJobID() +
" has completed successfully.");
// Log the job summary (this should be done prior to logging to
// job-history to ensure job-counters are in-sync
JobSummary.logJobSummary(this, jobtracker.getClusterStatus(false));
Counters counters = getCounters();
// Log job-history
JobHistory.JobInfo.logFinished(this.status.getJobID(), finishTime,
this.finishedMapTasks,
this.finishedReduceTasks, failedMapTasks,
failedReduceTasks, killedMapTasks,
killedReduceTasks, getMapCounters(),
getReduceCounters(), counters);
// Note that finalize will close the job history handles which garbage collect
// might try to finalize
garbageCollect();
metrics.completeJob(this.conf, this.status.getJobID());
}
}
private synchronized void terminateJob(int jobTerminationState) {
if ((status.getRunState() == JobStatus.RUNNING) ||
(status.getRunState() == JobStatus.PREP)) {
this.finishTime = JobTracker.getClock().getTime();
this.status.setMapProgress(1.0f);
this.status.setReduceProgress(1.0f);
this.status.setCleanupProgress(1.0f);
Counters counters = getCounters();
if (jobTerminationState == JobStatus.FAILED) {
changeStateTo(JobStatus.FAILED);
// Log the job summary
JobSummary.logJobSummary(this, jobtracker.getClusterStatus(false));
// Log to job-history
JobHistory.JobInfo.logFailed(this.status.getJobID(), finishTime,
this.finishedMapTasks,
this.finishedReduceTasks, counters);
} else {
changeStateTo(JobStatus.KILLED);
// Log the job summary
JobSummary.logJobSummary(this, jobtracker.getClusterStatus(false));
// Log to job-history
JobHistory.JobInfo.logKilled(this.status.getJobID(), finishTime,
this.finishedMapTasks,
this.finishedReduceTasks, counters);
}
garbageCollect();
jobtracker.getInstrumentation().terminateJob(
this.conf, this.status.getJobID());
if (jobTerminationState == JobStatus.FAILED) {
jobtracker.getInstrumentation().failedJob(
this.conf, this.status.getJobID());
} else {
jobtracker.getInstrumentation().killedJob(
this.conf, this.status.getJobID());
}
}
}
/**
* Terminate the job and all its component tasks.
* Calling this will lead to marking the job as failed/killed. Cleanup
* tip will be launched. If the job has not inited, it will directly call
* terminateJob as there is no need to launch cleanup tip.
* This method is reentrant.
* @param jobTerminationState job termination state
*/
private synchronized void terminate(int jobTerminationState) {
if(!tasksInited.get()) {
//init could not be done, we just terminate directly.
terminateJob(jobTerminationState);
return;
}
if ((status.getRunState() == JobStatus.RUNNING) ||
(status.getRunState() == JobStatus.PREP)) {
LOG.info("Killing job '" + this.status.getJobID() + "'");
if (jobTerminationState == JobStatus.FAILED) {
if(jobFailed) {//reentrant
return;
}
jobFailed = true;
} else if (jobTerminationState == JobStatus.KILLED) {
if(jobKilled) {//reentrant
return;
}
jobKilled = true;
}
// clear all unclean tasks
clearUncleanTasks();
//
// kill all TIPs.
//
for (int i = 0; i < setup.length; i++) {
setup[i].kill();
}
for (int i = 0; i < maps.length; i++) {
maps[i].kill();
}
for (int i = 0; i < reduces.length; i++) {
reduces[i].kill();
}
if (!jobSetupCleanupNeeded) {
terminateJob(jobTerminationState);
}
}
}
private void cancelReservedSlots() {
// Make a copy of the set of TaskTrackers to prevent a
// ConcurrentModificationException ...
Set<TaskTracker> tm =
new HashSet<TaskTracker>(trackersReservedForMaps.keySet());
for (TaskTracker tt : tm) {
tt.unreserveSlots(TaskType.MAP, this);
}
Set<TaskTracker> tr =
new HashSet<TaskTracker>(trackersReservedForReduces.keySet());
for (TaskTracker tt : tr) {
tt.unreserveSlots(TaskType.REDUCE, this);
}
}
private void clearUncleanTasks() {
TaskAttemptID taskid = null;
TaskInProgress tip = null;
while (!mapCleanupTasks.isEmpty()) {
taskid = mapCleanupTasks.remove(0);
tip = maps[taskid.getTaskID().getId()];
updateTaskStatus(tip, tip.getTaskStatus(taskid));
}
while (!reduceCleanupTasks.isEmpty()) {
taskid = reduceCleanupTasks.remove(0);
tip = reduces[taskid.getTaskID().getId()];
updateTaskStatus(tip, tip.getTaskStatus(taskid));
}
}
/**
* Kill the job and all its component tasks. This method should be called from
* jobtracker and should return fast as it locks the jobtracker.
*/
public void kill() {
boolean killNow = false;
synchronized(jobInitKillStatus) {
jobInitKillStatus.killed = true;
//if not in middle of init, terminate it now
if(!jobInitKillStatus.initStarted || jobInitKillStatus.initDone) {
//avoiding nested locking by setting flag
killNow = true;
}
}
if(killNow) {
terminate(JobStatus.KILLED);
}
}
/**
* Fails the job and all its component tasks. This should be called only from
* {@link JobInProgress} or {@link JobTracker}. Look at
* {@link JobTracker#failJob(JobInProgress)} for more details.
*/
synchronized void fail() {
terminate(JobStatus.FAILED);
}
/**
* A task assigned to this JobInProgress has reported in as failed.
* Most of the time, we'll just reschedule execution. However, after
* many repeated failures we may instead decide to allow the entire
* job to fail or succeed if the user doesn't care about a few tasks failing.
*
* Even if a task has reported as completed in the past, it might later
* be reported as failed. That's because the TaskTracker that hosts a map
* task might die before the entire job can complete. If that happens,
* we need to schedule reexecution so that downstream reduce tasks can
* obtain the map task's output.
*/
private void failedTask(TaskInProgress tip, TaskAttemptID taskid,
TaskStatus status,
TaskTracker taskTracker, boolean wasRunning,
boolean wasComplete, boolean wasAttemptRunning) {
this.jobtracker.getTaskErrorCollector().collect(
tip, taskid, taskTracker, JobTracker.getClock().getTime());
final JobTrackerInstrumentation metrics = jobtracker.getInstrumentation();
// check if the TIP is already failed
boolean wasFailed = tip.isFailed();
// Mark the taskid as FAILED or KILLED
tip.incompleteSubTask(taskid, this.status);
boolean isRunning = tip.isRunning();
boolean isComplete = tip.isComplete();
if (wasAttemptRunning) {
if (!tip.isJobCleanupTask() && !tip.isJobSetupTask()) {
boolean isSpeculative= tip.isSpeculativeAttempt(taskid);
boolean isUsingSpeculationByProcessingRate =
tip.isUsingProcessingRateForSpeculation();
long taskStartTime = status.getStartTime();
if (tip.isMapTask()) {
metrics.failedMap(taskid, wasFailed, isSpeculative,
isUsingSpeculationByProcessingRate, taskStartTime);
} else {
metrics.failedReduce(taskid, wasFailed, isSpeculative,
isUsingSpeculationByProcessingRate, taskStartTime);
}
}
// Metering
meterTaskAttempt(tip, status);
}
//update running count on task failure.
if (wasRunning && !isRunning) {
if (tip.isJobCleanupTask()) {
launchedCleanup = false;
} else if (tip.isJobSetupTask()) {
launchedSetup = false;
} else if (tip.isMapTask()) {
// remove from the running queue and put it in the non-running cache
// if the tip is not complete i.e if the tip still needs to be run
if (!isComplete) {
retireMap(tip);
failMap(tip);
}
} else {
// remove from the running queue and put in the failed queue if the tip
// is not complete
if (!isComplete) {
retireReduce(tip);
failReduce(tip);
}
}
}
// The case when the map was complete but the task tracker went down.
// However, we don't need to do any metering here...
if (wasComplete && !isComplete) {
if (tip.isMapTask()) {
// Put the task back in the cache. This will help locality for cases
// where we have a different TaskTracker from the same rack/switch
// asking for a task.
// We bother about only those TIPs that were successful
// earlier (wasComplete and !isComplete)
// (since they might have been removed from the cache of other
// racks/switches, if the input split blocks were present there too)
failMap(tip);
finishedMapTasks -= 1;
}
}
// update job history
// get taskStatus from tip
TaskStatus taskStatus = tip.getTaskStatus(taskid);
String taskTrackerName = taskStatus.getTaskTracker();
String taskTrackerHostName = convertTrackerNameToHostName(taskTrackerName);
int taskTrackerPort = -1;
TaskTrackerStatus taskTrackerStatus =
(taskTracker == null) ? null : taskTracker.getStatus();
if (taskTrackerStatus != null) {
taskTrackerPort = taskTrackerStatus.getHttpPort();
}
long startTime = taskStatus.getStartTime();
long finishTime = taskStatus.getFinishTime();
List<String> taskDiagnosticInfo = tip.getDiagnosticInfo(taskid);
String diagInfo = taskDiagnosticInfo == null ? "" :
StringUtils.arrayToString(taskDiagnosticInfo.toArray(new String[0]));
String taskType = getTaskType(tip);
if (taskStatus.getIsMap()) {
JobHistory.MapAttempt.logStarted(taskid, startTime,
taskTrackerName, taskTrackerPort, taskType);
if (taskStatus.getRunState() == TaskStatus.State.FAILED) {
JobHistory.MapAttempt.logFailed(taskid, finishTime,
taskTrackerHostName, diagInfo, taskType);
} else {
JobHistory.MapAttempt.logKilled(taskid, finishTime,
taskTrackerHostName, diagInfo, taskType);
}
} else {
JobHistory.ReduceAttempt.logStarted(taskid, startTime,
taskTrackerName, taskTrackerPort, taskType);
if (taskStatus.getRunState() == TaskStatus.State.FAILED) {
JobHistory.ReduceAttempt.logFailed(taskid, finishTime,
taskTrackerHostName, diagInfo, taskType);
} else {
JobHistory.ReduceAttempt.logKilled(taskid, finishTime,
taskTrackerHostName, diagInfo, taskType);
}
}
// After this, try to assign tasks with the one after this, so that
// the failed task goes to the end of the list.
if (!tip.isJobCleanupTask() && !tip.isJobSetupTask()) {
if (tip.isMapTask()) {
failedMapTasks++;
if (taskStatus.getRunState() != TaskStatus.State.FAILED) {
killedMapTasks++;
}
} else {
failedReduceTasks++;
if (taskStatus.getRunState() != TaskStatus.State.FAILED) {
killedReduceTasks++;
}
}
}
//
// Note down that a task has failed on this tasktracker
//
if (status.getRunState() == TaskStatus.State.FAILED) {
List<String> infos = tip.getDiagnosticInfo(status.getTaskID());
if (infos != null && infos.size() > 0) {
String lastFailureReason = infos.get(infos.size()-1);
addTrackerTaskFailure(taskTrackerName, taskTracker, lastFailureReason);
}
}
//
// Let the JobTracker know that this task has failed
//
jobtracker.markCompletedTaskAttempt(status.getTaskTracker(), taskid);
//
// Check if we need to kill the job because of too many failures or
// if the job is complete since all component tasks have completed
// We do it once per TIP and that too for the task that fails the TIP
if (!wasFailed && tip.isFailed()) {
//
// Allow upto 'mapFailuresPercent' of map tasks to fail or
// 'reduceFailuresPercent' of reduce tasks to fail
//
boolean killJob = tip.isJobCleanupTask() || tip.isJobSetupTask() ? true :
tip.isMapTask() ?
((++failedMapTIPs*100) > (mapFailuresPercent*numMapTasks)) :
((++failedReduceTIPs*100) > (reduceFailuresPercent*numReduceTasks));
if (killJob) {
LOG.info("Aborting job " + profile.getJobID());
// Record the task that caused the job failure for viewing on
// the job details page
recordTaskIdThatCausedFailure(tip.getTIPId());
JobHistory.Task.logFailed(tip.getTIPId(),
taskType,
finishTime,
diagInfo);
if (tip.isJobCleanupTask()) {
// kill the other tip
if (tip.isMapTask()) {
cleanup[1].kill();
} else {
cleanup[0].kill();
}
terminateJob(JobStatus.FAILED);
} else {
if (tip.isJobSetupTask()) {
// kill the other tip
killSetupTip(!tip.isMapTask());
}
fail();
}
}
//
// Update the counters
//
if (!tip.isJobCleanupTask() && !tip.isJobSetupTask()) {
if (tip.isMapTask()) {
jobCounters.incrCounter(Counter.NUM_FAILED_MAPS, 1);
} else {
jobCounters.incrCounter(Counter.NUM_FAILED_REDUCES, 1);
}
}
}
}
void killSetupTip(boolean isMap) {
if (isMap) {
setup[0].kill();
} else {
setup[1].kill();
}
}
boolean isSetupFinished() {
// if there is no setup to be launched, consider setup is finished.
if ((tasksInited.get() && setup.length == 0) ||
setup[0].isComplete() || setup[0].isFailed() || setup[1].isComplete()
|| setup[1].isFailed()) {
return true;
}
return false;
}
/**
* Fail a task with a given reason, but without a status object.
*
* Assuming {@link JobTracker} is locked on entry.
*
* @param tip The task's tip
* @param taskid The task id
* @param reason The reason that the task failed
* @param trackerName The task tracker the task failed on
*/
public void failedTask(TaskInProgress tip, TaskAttemptID taskid, String reason,
TaskStatus.Phase phase, TaskStatus.State state,
String trackerName) {
TaskStatus status = TaskStatus.createTaskStatus(tip.isMapTask(),
taskid,
0.0f,
tip.isMapTask() ?
numSlotsPerMap :
numSlotsPerReduce,
state,
reason,
reason,
trackerName, phase,
new Counters());
// update the actual start-time of the attempt
TaskStatus oldStatus = tip.getTaskStatus(taskid);
long startTime = oldStatus == null
? JobTracker.getClock().getTime()
: oldStatus.getStartTime();
status.setStartTime(startTime);
long finishTime = JobTracker.getClock().getTime();
// update finish time only if needed, as map tasks can fail after completion
if (tip.isMapTask() && oldStatus != null) {
long oldFinishTime = oldStatus.getFinishTime();
if (oldFinishTime > 0) {
finishTime = oldFinishTime;
}
}
status.setFinishTime(finishTime);
boolean wasComplete = tip.isComplete();
updateTaskStatus(tip, status);
boolean isComplete = tip.isComplete();
if (wasComplete && !isComplete) { // mark a successful tip as failed
String taskType = getTaskType(tip);
JobHistory.Task.logFailed(tip.getTIPId(), taskType,
tip.getExecFinishTime(), reason, taskid);
}
}
/**
* The job is dead. We're now GC'ing it, getting rid of the job
* from all tables. Be sure to remove all of this job's tasks
* from the various tables.
*/
synchronized void garbageCollect() {
// Cancel task tracker reservation
cancelReservedSlots();
// Remove the remaining speculative tasks counts
totalSpeculativeReduceTasks.addAndGet(-speculativeReduceTasks);
totalSpeculativeMapTasks.addAndGet(-speculativeMapTasks);
garbageCollected = true;
// Let the JobTracker know that a job is complete
jobtracker.getInstrumentation().decWaitingMaps(getJobID(), pendingMaps());
jobtracker.getInstrumentation().decWaitingReduces(getJobID(), pendingReduces());
jobtracker.storeCompletedJob(this);
jobtracker.finalizeJob(this);
try {
// Definitely remove the local-disk copy of the job file
if (localJobFile != null) {
localFs.delete(localJobFile, true);
localJobFile = null;
}
// clean up splits
for (int i = 0; i < maps.length; i++) {
maps[i].clearSplit();
}
// JobClient always creates a new directory with job files
// so we remove that directory to cleanup
// Delete temp dfs dirs created if any, like in case of
// speculative exn of reduces.
Path tempDir = jobtracker.getSystemDirectoryForJob(getJobID());
new CleanupQueue().addToQueue(new PathDeletionContext(
FileSystem.get(conf), tempDir.toUri().getPath()));
} catch (IOException e) {
LOG.warn("Error cleaning up "+profile.getJobID()+": "+e);
}
cleanUpMetrics();
// free up the memory used by the data structures
this.nonRunningMapCache = null;
this.runningMapCache = null;
this.nonRunningReduces = null;
this.runningReduces = null;
this.trackerMapStats = null;
this.trackerReduceStats = null;
}
@Override
public synchronized TaskInProgress getTaskInProgress(TaskID tipid) {
return super.getTaskInProgress(tipid);
}
/**
* Find the details of someplace where a map has finished
* @param mapId the id of the map
* @return the task status of the completed task
*/
public synchronized TaskStatus findFinishedMap(int mapId) {
TaskInProgress tip = maps[mapId];
if (tip.isComplete()) {
TaskStatus[] statuses = tip.getTaskStatuses();
for(int i=0; i < statuses.length; i++) {
if (statuses[i].getRunState() == TaskStatus.State.SUCCEEDED) {
return statuses[i];
}
}
}
return null;
}
synchronized int getNumTaskCompletionEvents() {
return taskCompletionEvents.size();
}
synchronized public TaskCompletionEvent[] getTaskCompletionEvents(
int fromEventId, int maxEvents) {
TaskCompletionEvent[] events = TaskCompletionEvent.EMPTY_ARRAY;
if (taskCompletionEvents.size() > fromEventId) {
int actualMax = Math.min(maxEvents,
(taskCompletionEvents.size() - fromEventId));
events = taskCompletionEvents.subList(fromEventId, actualMax + fromEventId).toArray(events);
}
return events;
}
synchronized public int getTaskCompletionEventsSize() {
return taskCompletionEvents.size();
}
synchronized void fetchFailureNotification(TaskAttemptID reportingAttempt,
TaskInProgress tip,
TaskAttemptID mapTaskId,
String trackerName) {
final JobTrackerInstrumentation metrics = jobtracker.getInstrumentation();
metrics.mapFetchFailure();
Integer fetchFailures = mapTaskIdToFetchFailuresMap.get(mapTaskId);
fetchFailures = (fetchFailures == null) ? 1 : (fetchFailures+1);
mapTaskIdToFetchFailuresMap.put(mapTaskId, fetchFailures);
LOG.info("Failed fetch notification #" + fetchFailures + " by " + reportingAttempt +
" for task " + mapTaskId + " tracker " + trackerName);
float failureRate = (float)fetchFailures / runningReduceTasks;
// declare faulty if fetch-failures >= max-allowed-failures
boolean isMapFaulty = (failureRate >= MAX_ALLOWED_FETCH_FAILURES_PERCENT) ||
fetchFailures > maxFetchFailuresPerMapper;
if (fetchFailures >= MAX_FETCH_FAILURES_NOTIFICATIONS
&& isMapFaulty) {
String reason = "Too many fetch-failures (" + fetchFailures + ")" +
" at " + (new Date());
LOG.info(reason + " for " + mapTaskId + " ... killing it");
metrics.mapFailedByFetchFailures();
failedTask(tip, mapTaskId, reason,
(tip.isMapTask() ? TaskStatus.Phase.MAP :
TaskStatus.Phase.REDUCE),
TaskStatus.State.FAILED, trackerName);
mapTaskIdToFetchFailuresMap.remove(mapTaskId);
}
}
/**
* @return The JobID of this JobInProgress.
*/
public JobID getJobID() {
return jobId;
}
public synchronized Object getSchedulingInfo() {
return this.schedulingInfo;
}
public synchronized void setSchedulingInfo(Object schedulingInfo) {
this.schedulingInfo = schedulingInfo;
this.status.setSchedulingInfo(schedulingInfo.toString());
}
/**
* To keep track of kill and initTasks status of this job. initTasks() take
* a lock on JobInProgress object. kill should avoid waiting on
* JobInProgress lock since it may take a while to do initTasks().
*/
private static class JobInitKillStatus {
//flag to be set if kill is called
boolean killed;
boolean initStarted;
boolean initDone;
}
boolean isComplete() {
return status.isJobComplete();
}
/**
* Get the task type for logging it to {@link JobHistory}.
*/
private String getTaskType(TaskInProgress tip) {
if (tip.isJobCleanupTask()) {
return Values.CLEANUP.name();
} else if (tip.isJobSetupTask()) {
return Values.SETUP.name();
} else if (tip.isMapTask()) {
return Values.MAP.name();
} else {
return Values.REDUCE.name();
}
}
/**
* Get the level of locality that a given task would have if launched on
* a particular TaskTracker. Returns 0 if the task has data on that machine,
* 1 if it has data on the same rack, etc (depending on number of levels in
* the network hierarchy).
*/
int getLocalityLevel(TaskInProgress tip, TaskTrackerStatus tts) {
Node tracker = jobtracker.getNode(tts.getHost());
int level = this.maxLevel;
// find the right level across split locations
for (String local : maps[tip.getIdWithinJob()].getSplitLocations()) {
Node datanode = jobtracker.getNode(local);
int newLevel = this.maxLevel;
if (tracker != null && datanode != null) {
newLevel = getMatchingLevelForNodes(tracker, datanode);
}
if (newLevel < level) {
level = newLevel;
// an optimization
if (level == 0) {
break;
}
}
}
return level;
}
/**
* Test method to set the cluster sizes
*/
void setClusterSize(int clusterSize) {
this.clusterSize = clusterSize;
}
static class JobSummary {
static final Log LOG = LogFactory.getLog(JobSummary.class);
// Escape sequences
static final char EQUALS = '=';
static final char[] charsToEscape =
{StringUtils.COMMA, EQUALS, StringUtils.ESCAPE_CHAR};
/**
* Log a summary of the job's runtime.
*
* @param job {@link JobInProgress} whose summary is to be logged, cannot
* be <code>null</code>.
* @param cluster {@link ClusterStatus} of the cluster on which the job was
* run, cannot be <code>null</code>
*/
public static void logJobSummary(JobInProgress job, ClusterStatus cluster) {
JobStatus status = job.getStatus();
JobProfile profile = job.getProfile();
String user = StringUtils.escapeString(profile.getUser(),
StringUtils.ESCAPE_CHAR,
charsToEscape);
String queue = StringUtils.escapeString(profile.getQueueName(),
StringUtils.ESCAPE_CHAR,
charsToEscape);
Counters jobCounters = job.getJobCounters();
long mapSlotSeconds =
(jobCounters.getCounter(Counter.SLOTS_MILLIS_MAPS) +
jobCounters.getCounter(Counter.FALLOW_SLOTS_MILLIS_MAPS)) / 1000;
long reduceSlotSeconds =
(jobCounters.getCounter(Counter.SLOTS_MILLIS_REDUCES) +
jobCounters.getCounter(Counter.FALLOW_SLOTS_MILLIS_REDUCES)) / 1000;
LOG.info("jobId=" + job.getJobID() + StringUtils.COMMA +
"submitTime" + EQUALS + job.getStartTime() + StringUtils.COMMA +
"launchTime" + EQUALS + job.getLaunchTime() + StringUtils.COMMA +
"finishTime" + EQUALS + job.getFinishTime() + StringUtils.COMMA +
"numMaps" + EQUALS + job.getTasks(TaskType.MAP).length +
StringUtils.COMMA +
"numSlotsPerMap" + EQUALS + job.getNumSlotsPerMap() +
StringUtils.COMMA +
"numReduces" + EQUALS + job.getTasks(TaskType.REDUCE).length +
StringUtils.COMMA +
"numSlotsPerReduce" + EQUALS + job.getNumSlotsPerReduce() +
StringUtils.COMMA +
"user" + EQUALS + user + StringUtils.COMMA +
"queue" + EQUALS + queue + StringUtils.COMMA +
"status" + EQUALS +
JobStatus.getJobRunState(status.getRunState()) +
StringUtils.COMMA +
"mapSlotSeconds" + EQUALS + mapSlotSeconds + StringUtils.COMMA +
"reduceSlotsSeconds" + EQUALS + reduceSlotSeconds +
StringUtils.COMMA +
"clusterMapCapacity" + EQUALS + cluster.getMaxMapTasks() +
StringUtils.COMMA +
"clusterReduceCapacity" + EQUALS + cluster.getMaxReduceTasks()
);
}
}
/**
* Check to see if the maximum number of speculative tasks are
* already being executed currently.
* @param tasks the set of tasks to test
* @param type the type of task (MAP/REDUCE) that we are considering
* @return has the cap been reached?
*/
private boolean atSpeculativeCap(TaskType type) {
float numTasks = (type == TaskType.MAP) ?
(float)(runningMapTasks - speculativeMapTasks) :
(float)(runningReduceTasks - speculativeReduceTasks);
if (numTasks == 0){
return true; // avoid divide by zero
}
int speculativeTaskCount = type == TaskType.MAP ? speculativeMapTasks
: speculativeReduceTasks;
int totalSpeculativeTaskCount = type == TaskType.MAP ?
totalSpeculativeMapTasks.get() : totalSpeculativeReduceTasks.get();
//return true if totalSpecTask < max(10, 0.01 * total-slots,
// 0.1 * total-running-tasks)
if (speculativeTaskCount < MIN_SPEC_CAP) {
return false; // at least one slow tracker's worth of slots(default=10)
}
ClusterStatus c = jobtracker.getClusterStatus(false);
int numSlots = (type == TaskType.MAP ? c.getMaxMapTasks() : c.getMaxReduceTasks());
if (speculativeTaskCount < numSlots * MIN_SLOTS_CAP) {
return false;
}
// Check if the total CAP has been reached
if (totalSpeculativeTaskCount >= numSlots * TOTAL_SPECULATIVECAP) {
return true;
}
boolean atCap = (((speculativeTaskCount)/numTasks) >= speculativeCap);
if (LOG.isDebugEnabled()) {
LOG.debug("SpeculativeCap is "+speculativeCap+", specTasks/numTasks is " +
((speculativeTaskCount)/numTasks)+
", so atSpecCap() is returning "+atCap);
}
return atCap;
}
/**
* A class for comparing the estimated time to completion of two tasks
*/
static class EstimatedTimeLeftComparator implements Comparator<TaskInProgress> {
private final long time;
public EstimatedTimeLeftComparator(long now) {
this.time = now;
}
/**
* Estimated time to completion is measured as:
* % of task left to complete (1 - progress) / progress rate of the task.
*
* This assumes that tasks are linear in their progress, which is
* often wrong, especially since progress for reducers is currently
* calculated by evenly weighting their three stages (shuffle, sort, map)
* which rarely account for 1/3 each. This should be fixed in the future
* by calculating progressRate more intelligently or splitting these
* multi-phase tasks into individual tasks.
*
* The ordering this comparator defines is: task1 < task2 if task1 is
* estimated to finish farther in the future => compare(t1,t2) returns -1
*/
@Override
public int compare(TaskInProgress tip1, TaskInProgress tip2) {
//we have to use the Math.max in the denominator to avoid divide by zero
//error because prog and progRate can both be zero (if one is zero,
//the other one will be 0 too).
//We use inverse of time_reminaing=[(1- prog) / progRate]
//so that (1-prog) is in denom. because tasks can have arbitrarily
//low progRates in practice (e.g. a task that is half done after 1000
//seconds will have progRate of 0.0000005) so we would rather
//use Math.maxnon (1-prog) by putting it in the denominator
//which will cause tasks with prog=1 look 99.99% done instead of 100%
//which is okay
double t1 = tip1.getProgressRate() / Math.max(0.0001,
1.0 - tip1.getProgress());
double t2 = tip2.getProgressRate() / Math.max(0.0001,
1.0 - tip2.getProgress());
if (t1 < t2) return -1;
else if (t2 < t1) return 1;
else return 0;
}
}
/**
* Compares the ave progressRate of tasks that have finished on this
* taskTracker to the ave of all succesfull tasks thus far to see if this
* TT one is too slow for speculating.
* slowNodeThreshold is used to determine the number of standard deviations
* @param taskTracker the name of the TaskTracker we are checking
* @return is this TaskTracker slow
*/
protected boolean isSlowTracker(String taskTracker) {
if (trackerMapStats.get(taskTracker) != null &&
trackerMapStats.get(taskTracker).mean() -
mapTaskStats.mean() > mapTaskStats.std()*slowNodeThreshold) {
if (LOG.isDebugEnabled()) {
LOG.debug("Tracker " + taskTracker +
" declared slow. trackerMapStats.get(taskTracker).mean() :" + trackerMapStats.get(taskTracker).mean() +
" mapTaskStats :" + mapTaskStats);
}
return true;
}
if (trackerReduceStats.get(taskTracker) != null &&
trackerReduceStats.get(taskTracker).mean() -
reduceTaskStats.mean() > reduceTaskStats.std()*slowNodeThreshold) {
if (LOG.isDebugEnabled()) {
LOG.debug("Tracker " + taskTracker +
" declared slow. trackerReduceStats.get(taskTracker).mean() :" + trackerReduceStats.get(taskTracker).mean() +
" reduceTaskStats :" + reduceTaskStats);
}
return true;
}
return false;
}
private void updateTaskTrackerStats(TaskInProgress tip, TaskTrackerStatus ttStatus,
Map<String,DataStatistics> trackerStats, DataStatistics overallStats) {
float tipDuration = tip.getExecFinishTime() -
tip.getDispatchTime(tip.getSuccessfulTaskid());
DataStatistics ttStats =
trackerStats.get(ttStatus.getTrackerName());
double oldMean = 0.0d;
//We maintain the mean of TaskTrackers' means. That way, we get a single
//data-point for every tracker (used in the evaluation in isSlowTracker)
if (ttStats != null) {
oldMean = ttStats.mean();
ttStats.add(tipDuration);
overallStats.updateStatistics(oldMean, ttStats.mean());
} else {
trackerStats.put(ttStatus.getTrackerName(),
(ttStats = new DataStatistics(tipDuration)));
overallStats.add(tipDuration);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Added mean of " +ttStats.mean() + " to trackerStats of type "+
(tip.isMapTask() ? "Map" : "Reduce") +
" on "+ttStatus.getTrackerName()+". DataStatistics is now: " +
trackerStats.get(ttStatus.getTrackerName()));
}
}
@Override
public DataStatistics getRunningTaskStatistics(boolean isMap) {
if (isMap) {
return runningMapTaskStats;
} else {
return runningReduceTaskStats;
}
}
@Override
public DataStatistics getRunningTaskStatistics(TaskStatus.Phase phase) {
switch(phase) {
case MAP: return runningTaskMapByteProcessingRateStats;
case SHUFFLE: return runningTaskCopyProcessingRateStats;
case SORT: return runningTaskSortProcessingRateStats;
case REDUCE: return runningTaskReduceProcessingRateStats;
}
LOG.error("No Statistics for phase " + phase.toString() + " in job " +
jobId);
return null;
}
@Override
public float getSlowTaskThreshold() {
return slowTaskThreshold;
}
@Override
public float getStddevMeanRatioMax() {
return speculativeStddevMeanRatioMax;
}
public static int getTotalSpeculativeMapTasks() {
return totalSpeculativeMapTasks.get();
}
public static int getTotalSpeculativeReduceTasks() {
return totalSpeculativeReduceTasks.get();
}
synchronized void refreshIfNecessary() {
if (getStatus().getRunState() != JobStatus.RUNNING) {
return;
}
long now = JobTracker.getClock().getTime();
if ((now - lastRefresh) > refreshTimeout) {
lastRefresh = now;
refresh(now);
}
}
/**
* Refresh speculative task candidates and running tasks. This needs to be
* called periodically to obtain fresh values.
*/
void refresh(long now) {
refreshCandidateSpeculativeMaps(now);
refreshCandidateSpeculativeReduces(now);
refreshTaskCountsAndWaitTime(TaskType.MAP, now);
refreshTaskCountsAndWaitTime(TaskType.REDUCE, now);
}
/**
* Refresh runningTasks, neededTasks and pendingTasks counters
* @param type TaskType to refresh
*/
protected void refreshTaskCountsAndWaitTime(TaskType type, long now) {
TaskInProgress[] allTips = getTasks(type);
int finishedTips = 0;
int runningTips = 0;
int runningTaskAttempts = 0;
long totalWaitTime = 0;
long jobStartTime = this.getStartTime();
for (TaskInProgress tip : allTips) {
if (tip.isComplete()) {
finishedTips += 1;
} else if(tip.isRunning()) {
runningTaskAttempts += tip.getActiveTasks().size();
runningTips += 1;
}
if (tip.getExecStartTime() > 0) {
totalWaitTime += tip.getExecStartTime() - jobStartTime;
} else {
totalWaitTime += now - jobStartTime;
}
}
if (TaskType.MAP == type) {
totalMapWaitTime = totalWaitTime;
runningMapTasks = runningTaskAttempts;
neededMapTasks = numMapTasks - runningTips - finishedTips
+ neededSpeculativeMaps();
pendingMapTasks = numMapTasks - runningTaskAttempts
- failedMapTIPs - finishedMapTasks + speculativeMapTasks;
} else {
totalReduceWaitTime = totalWaitTime;
runningReduceTasks = runningTaskAttempts;
neededReduceTasks = numReduceTasks - runningTips - finishedTips
+ neededSpeculativeReduces();
pendingReduceTasks = numReduceTasks - runningTaskAttempts
- failedReduceTIPs - finishedReduceTasks + speculativeReduceTasks;
}
}
private void refreshCandidateSpeculativeMaps(long now) {
if (!hasSpeculativeMaps()) {
return;
}
//////// Populate allTips with all TaskInProgress
Set<TaskInProgress> allTips = new HashSet<TaskInProgress>();
// collection of node at max level in the cache structure
Collection<Node> nodesAtMaxLevel = jobtracker.getNodesAtMaxLevel();
// Add all tasks from max-level nodes breadth-wise
for (Node parent : nodesAtMaxLevel) {
Set<TaskInProgress> cache = runningMapCache.get(parent);
if (cache != null) {
allTips.addAll(cache);
}
}
// Add all non-local TIPs
allTips.addAll(nonLocalRunningMaps);
// update the progress rates of all the candidate tips ..
for (TaskInProgress tip: allTips) {
tip.updateProgressRate(now);
}
candidateSpeculativeMaps = findSpeculativeTaskCandidates(allTips);
}
private void refreshCandidateSpeculativeReduces(long now) {
if (!hasSpeculativeReduces()) {
return;
}
// update the progress rates of all the candidate tips ..
for (TaskInProgress tip: runningReduces) {
tip.updateProgressRate(now);
}
candidateSpeculativeReduces = findSpeculativeTaskCandidates(runningReduces);
}
public TaskID getTaskIdThatCausedFailure() {
return taskIdThatCausedFailure;
}
private synchronized void recordTaskIdThatCausedFailure(TaskID tid) {
// Only the first task is considered to have caused the failure
if (taskIdThatCausedFailure == null) {
taskIdThatCausedFailure = tid;
}
}
}