Package org.apache.hadoop.mapred

Source Code of org.apache.hadoop.mapred.TaskInProgress$FailedRanges$Divide

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;


import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.mapred.JobClient.RawSplit;
import org.apache.hadoop.mapred.SortedRanges.Range;
import org.apache.hadoop.mapred.TaskStatus.Phase;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.net.Node;
import org.apache.hadoop.util.StringUtils;

/*************************************************************
* TaskInProgress maintains all the info needed for a
* Task in the lifetime of its owning Job.  A given Task
* might be speculatively executed or reexecuted, so we
* need a level of indirection above the running-id itself.
* <br>
* A given TaskInProgress contains multiple taskids,
* 0 or more of which might be executing at any one time.
* (That's what allows speculative execution.)  A taskid
* is now *never* recycled.  A TIP allocates enough taskids
* to account for all the speculation and failures it will
* ever have to handle.  Once those are up, the TIP is dead.
* **************************************************************
*/
class TaskInProgress {
  static final int MAX_TASK_EXECS = 1;
  int maxTaskAttempts = 4;   
  long speculativeLag;
  double maxProgressRateForSpeculation;
  private boolean speculativeForced = false;
  private boolean useProcessingRateForSpeculation = false;
  private static final int NUM_ATTEMPTS_PER_RESTART = 1000;

  public static final Log LOG = LogFactory.getLog(TaskInProgress.class);

  // Defines the TIP
  private String jobFile = null;
  private RawSplit rawSplit;
  private int numMaps;
  private int partition;
  private TaskID id;
  private JobInProgressTraits job;
  private final int numSlotsRequired;

  // Status of the TIP
  private int successEventNumber = -1;
  private int numTaskFailures = 0;
  private int numKilledTasks = 0;
  private double progress = 0;
  private double progressRate;
  private Phase processingPhase;
  private ProcessingRates processingRates = new ProcessingRates(0, 0, 0, 0);
  private String state = "";
  private long startTime = 0;
  private long lastDispatchTime = 0; // most recent time task given to TT
  private long execStartTime = 0;
  private long execFinishTime = 0;
  private int completes = 0;
  private boolean failed = false;
  private boolean killed = false;
  private long maxSkipRecords = 0;
  private FailedRanges failedRanges = new FailedRanges();
  private volatile boolean skipping = false;
  private boolean jobCleanup = false;
  private boolean jobSetup = false;
  
  // The 'next' usable taskid of this tip
  int nextTaskId = 0;
   
  // The taskid that took this TIP to SUCCESS
  private TaskAttemptID successfulTaskId;

  // The first taskid of this tip
  private TaskAttemptID firstTaskId;
 
  // The taskid of speculative task
  private TaskAttemptID speculativeTaskId;
 
  // Map from task Id -> TaskTracker Id, contains tasks that are
  // currently runnings
  private TreeMap<TaskAttemptID, String> activeTasks = new TreeMap<TaskAttemptID, String>();
  // All attempt Ids of this TIP
  private TreeSet<TaskAttemptID> tasks = new TreeSet<TaskAttemptID>();
  private JobConf conf;
  private Map<TaskAttemptID,List<String>> taskDiagnosticData =
    new TreeMap<TaskAttemptID,List<String>>();
  /**
   * Map from taskId -> TaskStatus
   */
  TreeMap<TaskAttemptID,TaskStatus> taskStatuses =
    new TreeMap<TaskAttemptID,TaskStatus>();

  // Map from taskId -> TaskTracker Id,
  // contains cleanup attempts and where they ran, if any
  private TreeMap<TaskAttemptID, String> cleanupTasks =
    new TreeMap<TaskAttemptID, String>();

  private TreeSet<String> machinesWhereFailed = new TreeSet<String>();
  private TreeSet<TaskAttemptID> tasksReportedClosed = new TreeSet<TaskAttemptID>();
 
  //list of tasks to kill, <taskid> -> <shouldFail>
  private TreeMap<TaskAttemptID, Boolean> tasksToKill = new TreeMap<TaskAttemptID, Boolean>();
 
  //task to commit, <taskattemptid> 
  private TaskAttemptID taskToCommit;
 
  private volatile Counters counters = new Counters();
 
  private HashMap<TaskAttemptID, Long> dispatchTimeMap =
    new HashMap<TaskAttemptID, Long>();
 
  // Whether to use the input record processing rate when speculating maps
  // based on the processing rate. Has no effect if speculating based on the
  // progress rate.
  public static final String USE_MAP_RECORDS_PROCESSING_RATE =
      "mapreduce.job.speculative.use.map.record.rate";
 
  /**
   * Private helper class to pass around / store processing rates more easily
   */
  private final static class ProcessingRates {
    private double mapRate = 0;
    private double copyRate = 0;
    private double sortRate = 0;
    private double reduceRate = 0;
   
    public ProcessingRates(double mapRate, double copyRate, double sortRate,
        double reduceRate) {
      this.mapRate = mapRate;
      this.copyRate = copyRate;
      this.sortRate = sortRate;
      this.reduceRate = reduceRate;
    }
   
    public ProcessingRates(ProcessingRates p) {
      this.mapRate = p.mapRate;
      this.copyRate = p.copyRate;
      this.sortRate = p.sortRate;
      this.reduceRate = p.reduceRate;
    }
   
    public double getRate(Phase p) {
      if (p == Phase.MAP) {
        return this.mapRate;
      } else if (p == Phase.SHUFFLE) {
        return this.copyRate;
      } else if (p == Phase.SORT) {
        return this.sortRate;
      } else if (p == Phase.REDUCE) {
        return this.reduceRate;
      } else {
        throw new RuntimeException("Invalid phase " + p);
      }
    }
  }
 
  /**
   * Constructor for MapTask
   */
  public TaskInProgress(JobID jobid, String jobFile,
                        RawSplit rawSplit,
                        JobConf conf,
                        JobInProgressTraits job, int partition,
                        int numSlotsRequired) {
    this.jobFile = jobFile;
    this.rawSplit = rawSplit;
    this.job = job;
    this.conf = conf;
    this.partition = partition;
    this.maxSkipRecords = SkipBadRecords.getMapperMaxSkipRecords(conf);
    this.numSlotsRequired = numSlotsRequired;
    setMaxTaskAttempts();
    init(jobid);
  }
       
  /**
   * Constructor for ReduceTask
   */
  public TaskInProgress(JobID jobid, String jobFile,
                        int numMaps,
                        int partition, JobConf conf,
                        JobInProgressTraits job, int numSlotsRequired) {
    this.jobFile = jobFile;
    this.numMaps = numMaps;
    this.partition = partition;
    this.job = job;
    this.conf = conf;
    this.maxSkipRecords = SkipBadRecords.getReducerMaxSkipGroups(conf);
    this.numSlotsRequired = numSlotsRequired;
    setMaxTaskAttempts();
    init(jobid);
  }
 
  /**
   * Set the max number of attempts before we declare a TIP as "failed"
   */
  private void setMaxTaskAttempts() {
    if (isMapTask()) {
      this.maxTaskAttempts = conf.getMaxMapAttempts();
    } else {
      this.maxTaskAttempts = conf.getMaxReduceAttempts();
    }
  }
   
  /**
   * Return the index of the tip within the job, so
   * "task_200707121733_1313_0002_m_012345" would return 12345;
   * @return int the tip index
   */
  public int idWithinJob() {
    return partition;
  }   

  public boolean isJobCleanupTask() {
   return jobCleanup;
  }
 
  public void setJobCleanupTask() {
    jobCleanup = true;
  }

  public boolean isJobSetupTask() {
    return jobSetup;
  }
   
  public void setJobSetupTask() {
    jobSetup = true;
  }

  public boolean isOnlyCommitPending() {
    for (TaskStatus t : taskStatuses.values()) {
      if (t.getRunState() == TaskStatus.State.COMMIT_PENDING) {
        return true;
      }
    }
    return false;
  }
  public boolean isCommitPending(TaskAttemptID taskId) {
    TaskStatus t = taskStatuses.get(taskId);
    if (t == null) {
      return false;
    }
    return t.getRunState() ==  TaskStatus.State.COMMIT_PENDING;
  }
 
  /**
   * @return true if using processing rate to determine whether the task should
   * be speculated
   */
  public boolean isUsingProcessingRateForSpeculation() {
    return useProcessingRateForSpeculation;
  }
 
  /**
   * Initialization common to Map and Reduce
   */
  void init(JobID jobId) {
    this.startTime = JobTracker.getClock().getTime();
    this.id = new TaskID(jobId, isMapTask(), partition);
    this.skipping = startSkipping();
    long speculativeDuration;
    if (isMapTask()) {
      this.speculativeLag = conf.getMapSpeculativeLag();
      speculativeDuration = conf.getMapSpeculativeDuration();
    } else {
      this.speculativeLag = conf.getReduceSpeculativeLag();
      speculativeDuration = conf.getReduceSpeculativeDuration();
    }

    // speculate only if 1/(1000 * progress_rate) > speculativeDuration
    // ie. :
    // speculate only if progress_rate < 1/(1000 * speculativeDuration)

    if (speculativeDuration > 0) {
      this.maxProgressRateForSpeculation = 1.0/(1000.0*speculativeDuration);
    } else {
      // disable this check for durations <= 0
      this.maxProgressRateForSpeculation = -1.0;
    }
   
    this.useProcessingRateForSpeculation =
        conf.getBoolean("mapreduce.job.speculative.using.processing.rate",
            false);
  }

  ////////////////////////////////////
  // Accessors, info, profiles, etc.
  ////////////////////////////////////

 
  /**
   * Return the dispatch time
   */
  public long getDispatchTime(TaskAttemptID taskid){
    Long l = dispatchTimeMap.get(taskid);
    if (l != null) {
      return l.longValue();
    }
    return 0;
  }

  public long getLastDispatchTime(){
    return this.lastDispatchTime;
  }
 
  /**
   * Set the dispatch time
   */
  public void setDispatchTime(TaskAttemptID taskid, long disTime){
    dispatchTimeMap.put(taskid, disTime);
    this.lastDispatchTime = disTime;
  }
  /**
   * Return the start time
   */
  public long getStartTime() {
    return startTime;
  }
 
  /**
   * Return the exec start time
   */
  public long getExecStartTime() {
    return execStartTime;
  }
 
  /**
   * Set the exec start time
   */
  public void setExecStartTime(long startTime) {
    execStartTime = startTime;
  }
 
  /**
   * Return the exec finish time
   */
  public long getExecFinishTime() {
    return execFinishTime;
  }

  /**
   * Set the exec finish time
   */
  public void setExecFinishTime(long finishTime) {
    execFinishTime = finishTime;
    JobHistory.Task.logUpdates(id, execFinishTime); // log the update
  }
 
  /**
   * Return the parent job
   */
  public JobInProgressTraits getJob() {
    return job;
  }
  /**
   * Return an ID for this task, not its component taskid-threads
   */
  public TaskID getTIPId() {
    return this.id;
  }
  /**
   * Whether this is a map task
   */
  public boolean isMapTask() {
    return rawSplit != null;
  }
   
  /**
   * Returns the type of the {@link TaskAttemptID} passed.
   * The type of an attempt is determined by the nature of the task and not its
   * id.
   * For example,
   * - Attempt 'attempt_123_01_m_01_0' might be a job-setup task even though it
   *   has a _m_ in its id. Hence the task type of this attempt is JOB_SETUP
   *   instead of MAP.
   * - Similarly reduce attempt 'attempt_123_01_r_01_0' might have failed and is
   *   now supposed to do the task-level cleanup. In such a case this attempt
   *   will be of type TASK_CLEANUP instead of REDUCE.
   */
  TaskType getAttemptType (TaskAttemptID id) {
    if (isCleanupAttempt(id)) {
      return TaskType.TASK_CLEANUP;
    } else if (isJobSetupTask()) {
      return TaskType.JOB_SETUP;
    } else if (isJobCleanupTask()) {
      return TaskType.JOB_CLEANUP;
    } else if (isMapTask()) {
      return TaskType.MAP;
    } else {
      return TaskType.REDUCE;
    }
  }
 
  /**
   * Is the Task associated with taskid is the first attempt of the tip?
   * @param taskId
   * @return Returns true if the Task is the first attempt of the tip
   */ 
  public boolean isFirstAttempt(TaskAttemptID taskId) {
    return firstTaskId == null ? false : firstTaskId.equals(taskId);
  }

  /**
   * Is the Task associated with taskid is the speculative attempt of the tip?
   * @param taskId
   * @return Returns true if the Task is the speculative attempt of the tip
   */ 
  public boolean isSpeculativeAttempt(TaskAttemptID taskId) {
    return speculativeTaskId == null ? false :
              speculativeTaskId.equals(taskId);
  }
 
  /**
   * Is this tip currently running any tasks?
   * @return true if any tasks are running
   */
  public boolean isRunning() {
    return !activeTasks.isEmpty();
  }

  /**
   * Is this attempt currently running ?
   * @param  taskId task attempt id.
   * @return true if attempt taskId is running
   */
  boolean isAttemptRunning(TaskAttemptID taskId) {
    return activeTasks.containsKey(taskId);
  }
   
  TaskAttemptID getSuccessfulTaskid() {
    return successfulTaskId;
  }
 
  private void setSuccessfulTaskid(TaskAttemptID successfulTaskId) {
    this.successfulTaskId = successfulTaskId;
  }
 
  private void resetSuccessfulTaskid() {
    this.successfulTaskId = null;
  }
 
  /**
   * Is this tip complete?
   *
   * @return <code>true</code> if the tip is complete, else <code>false</code>
   */
  public synchronized boolean isComplete() {
    return (completes > 0);
  }

  /**
   * Is the given taskid the one that took this tip to completion?
   *
   * @param taskid taskid of attempt to check for completion
   * @return <code>true</code> if taskid is complete, else <code>false</code>
   */
  public boolean isComplete(TaskAttemptID taskid) {
    return ((completes > 0)
            && taskid.equals(getSuccessfulTaskid()));
  }

  /**
   * Is the tip a failure?
   *
   * @return <code>true</code> if tip has failed, else <code>false</code>
   */
  public boolean isFailed() {
    return failed;
  }

  /**
   * Number of times the TaskInProgress has failed.
   */
  public int numTaskFailures() {
    return numTaskFailures;
  }

  /**
   * Number of times the TaskInProgress has been killed by the framework.
   */
  public int numKilledTasks() {
    return numKilledTasks;
  }

  /**
   * Get the overall progress (from 0 to 1.0) for this TIP
   */
  public double getProgress() {
    return progress;
  }

  /**
   * Get the last known progress rate for this task
   */
  public double getProgressRate() {
    return progressRate;
  }
  /**
   * Get the processing rate for this task (e.g. bytes/ms in reduce)
   */
  public double getProcessingRate(TaskStatus.Phase phase) {
    // we don't have processing rate information for the starting and cleaning
    // up phase
    if (phase != TaskStatus.Phase.MAP &&
        phase != TaskStatus.Phase.SHUFFLE &&
        phase != TaskStatus.Phase.SORT &&
        phase != TaskStatus.Phase.REDUCE) {
      return 0;
    }
    return processingRates.getRate(getProcessingPhase());
  }
  /**
   * Get the phase of processing
   */
  public Phase getProcessingPhase() {
    return processingPhase;
  }
 
  /**
   * Get the task's counters
   */
  public Counters getCounters() {
    return counters;
  }

  /**
   * Returns whether a component task-thread should be
   * closed because the containing JobInProgress has completed
   * or the task is killed by the user
   */
  public boolean shouldClose(TaskAttemptID taskid) {
    /**
     * If the task hasn't been closed yet, and it belongs to a completed
     * TaskInProgress close it.
     *
     * However, for completed map tasks we do not close the task which
     * actually was the one responsible for _completing_ the TaskInProgress.
     */

    if (tasksReportedClosed.contains(taskid)) {
      if (tasksToKill.keySet().contains(taskid))
        return true;
      else
        return false;
    }

    boolean close = false;
    TaskStatus ts = taskStatuses.get(taskid);

    if ((ts != null) &&
        ((this.failed) ||
        ((job.getStatus().getRunState() != JobStatus.RUNNING &&
         (job.getStatus().getRunState() != JobStatus.PREP))))) {
      tasksReportedClosed.add(taskid);
      close = true;
    } else if ((completes > 0) && // isComplete() is synchronized!
               !(isMapTask() && !jobSetup &&
                 !jobCleanup && isComplete(taskid))) {
      tasksReportedClosed.add(taskid);
      close = true;
    } else if (isCommitPending(taskid) && !shouldCommit(taskid)) {
      tasksReportedClosed.add(taskid);
      close = true;
    } else {
      close = tasksToKill.keySet().contains(taskid);
    }  
    return close;
  }

  /**
   * Commit this task attempt for the tip.
   * @param taskid
   */
  public void doCommit(TaskAttemptID taskid) {
    taskToCommit = taskid;
  }

  /**
   * Returns whether the task attempt should be committed or not
   */
  public boolean shouldCommit(TaskAttemptID taskid) {
    return !isComplete() && isCommitPending(taskid) &&
           taskToCommit.equals(taskid);
  }

  /**
   * Creates a "status report" for this task.  Includes the
   * task ID and overall status, plus reports for all the
   * component task-threads that have ever been started.
   */
  synchronized TaskReport generateSingleReport() {
    ArrayList<String> diagnostics = new ArrayList<String>();
    for (List<String> l : taskDiagnosticData.values()) {
      diagnostics.addAll(l);
    }
    TIPStatus currentStatus = null;
    if (isRunning() && !isComplete()) {
      currentStatus = TIPStatus.RUNNING;
    } else if (isComplete()) {
      currentStatus = TIPStatus.COMPLETE;
    } else if (wasKilled()) {
      currentStatus = TIPStatus.KILLED;
    } else if (isFailed()) {
      currentStatus = TIPStatus.FAILED;
    } else if (!(isComplete() || isRunning() || wasKilled())) {
      currentStatus = TIPStatus.PENDING;
    }
   
    TaskReport report = new TaskReport
      (getTIPId(), (float)progress, state,
       diagnostics.toArray(new String[diagnostics.size()]),
       currentStatus, execStartTime, execFinishTime, counters);
    if (currentStatus == TIPStatus.RUNNING) {
      report.setRunningTaskAttempts(activeTasks.keySet());
    } else if (currentStatus == TIPStatus.COMPLETE) {
      report.setSuccessfulAttempt(getSuccessfulTaskid());
    }
    return report;
  }

  /**
   * Get the diagnostic messages for a given task within this tip.
   *
   * @param taskId the id of the required task
   * @return the list of diagnostics for that task
   */
  synchronized List<String> getDiagnosticInfo(TaskAttemptID taskId) {
    return taskDiagnosticData.get(taskId);
  }
   
  ////////////////////////////////////////////////
  // Update methods, usually invoked by the owning
  // job.
  ////////////////////////////////////////////////
 
  /**
   * Save diagnostic information for a given task.
   *
   * @param taskId id of the task
   * @param diagInfo diagnostic information for the task
   */
  public void addDiagnosticInfo(TaskAttemptID taskId, String diagInfo) {
    List<String> diagHistory = taskDiagnosticData.get(taskId);
    if (diagHistory == null) {
      diagHistory = new ArrayList<String>();
      taskDiagnosticData.put(taskId, diagHistory);
    }
    diagHistory.add(diagInfo);
  }
 
  /**
   * A status message from a client has arrived.
   * It updates the status of a single component-thread-task,
   * which might result in an overall TaskInProgress status update.
   * @return has the task changed its state noticeably?
   */
  synchronized boolean updateStatus(TaskStatus status) {
    TaskAttemptID taskid = status.getTaskID();
    String taskTracker = status.getTaskTracker();
    String diagInfo = status.getDiagnosticInfo();
    TaskStatus oldStatus = taskStatuses.get(taskid);
    boolean changed = true;
    if (diagInfo != null && diagInfo.length() > 0) {
      long runTime = status.getRunTime();
      LOG.info("Error from " + taskid + " on " + taskTracker + " runTime(msec) "
        + runTime + ": " + diagInfo);
      addDiagnosticInfo(taskid, diagInfo);
    }
   
    if(skipping) {
      failedRanges.updateState(status);
    }
   
    if (oldStatus != null) {
      TaskStatus.State oldState = oldStatus.getRunState();
      TaskStatus.State newState = status.getRunState();
         
      // We should never recieve a duplicate success/failure/killed
      // status update for the same taskid! This is a safety check,
      // and is addressed better at the TaskTracker to ensure this.
      // @see {@link TaskTracker.transmitHeartbeat()}
      if ((newState != TaskStatus.State.RUNNING &&
           newState != TaskStatus.State.COMMIT_PENDING &&
           newState != TaskStatus.State.FAILED_UNCLEAN &&
           newState != TaskStatus.State.KILLED_UNCLEAN &&
           newState != TaskStatus.State.UNASSIGNED) &&
          (oldState == newState)) {
        LOG.warn("Recieved duplicate status update of '" + newState +
                 "' for '" + taskid + "' of TIP '" + getTIPId() + "'" +
                 "oldTT=" + oldStatus.getTaskTracker() +
                 " while newTT=" + status.getTaskTracker());
        return false;
      }

      // The task is not allowed to move from completed back to running.
      // We have seen out of order status messagesmoving tasks from complete
      // to running. This is a spot fix, but it should be addressed more
      // globally.
      if ((newState == TaskStatus.State.RUNNING ||
          newState == TaskStatus.State.UNASSIGNED) &&
          (oldState == TaskStatus.State.FAILED ||
           oldState == TaskStatus.State.KILLED ||
           oldState == TaskStatus.State.FAILED_UNCLEAN ||
           oldState == TaskStatus.State.KILLED_UNCLEAN ||
           oldState == TaskStatus.State.SUCCEEDED ||
           oldState == TaskStatus.State.COMMIT_PENDING)) {
        return false;
      }
     
      //Do not accept any status once the task is marked FAILED/KILLED
      //This is to handle the case of the JobTracker timing out a task
      //due to launch delay, but the TT comes back with any state or
      //TT got expired
      if (oldState == TaskStatus.State.FAILED ||
          oldState == TaskStatus.State.KILLED) {
        tasksToKill.put(taskid, true);
        return false;   
      }
         
      changed = oldState != newState;
    }
    // if task is a cleanup attempt, do not replace the complete status,
    // update only specific fields.
    // For example, startTime should not be updated,
    // but finishTime has to be updated.
    if (!isCleanupAttempt(taskid)) {
      taskStatuses.put(taskid, status);
      //we don't want to include setup tasks in the task execution stats
      if (!isJobSetupTask() && !isJobCleanupTask() && ((isMapTask() && job.hasSpeculativeMaps()) ||
          (!isMapTask() && job.hasSpeculativeReduces()))) {
        processingPhase = status.getPhase();
        updateProgressRate(JobTracker.getClock().getTime());
        if (useProcessingRateForSpeculation) {
          updateProcessingRate(JobTracker.getClock().getTime());
        }
      }
    } else {
      taskStatuses.get(taskid).statusUpdate(status.getRunState(),
        status.getProgress(), status.getStateString(), status.getPhase(),
        status.getFinishTime());
    }

    // Recompute progress
    recomputeProgress();
    return changed;
  }

  /**
   * Indicate that one of the taskids in this TaskInProgress
   * has failed.
   */
  public void incompleteSubTask(TaskAttemptID taskid,
                                JobStatus jobStatus) {
    //
    // Note the failure and its location
    //
    TaskStatus status = taskStatuses.get(taskid);
    String trackerName;
    String trackerHostName = null;
    TaskStatus.State taskState = TaskStatus.State.FAILED;
    if (status != null) {
      trackerName = status.getTaskTracker();
      trackerHostName =
        JobInProgressTraits.convertTrackerNameToHostName(trackerName);
      // Check if the user manually KILLED/FAILED this task-attempt...
      Boolean shouldFail = tasksToKill.remove(taskid);
      if (shouldFail != null) {
        if (status.getRunState() == TaskStatus.State.FAILED ||
            status.getRunState() == TaskStatus.State.KILLED) {
          taskState = (shouldFail) ? TaskStatus.State.FAILED :
                                     TaskStatus.State.KILLED;
        } else {
          taskState = (shouldFail) ? TaskStatus.State.FAILED_UNCLEAN :
                                     TaskStatus.State.KILLED_UNCLEAN;
         
        }
        status.setRunState(taskState);
        addDiagnosticInfo(taskid, "Task has been " + taskState + " by the user" );
      }
      taskState = status.getRunState();
      if (taskState != TaskStatus.State.FAILED &&
          taskState != TaskStatus.State.KILLED &&
          taskState != TaskStatus.State.FAILED_UNCLEAN &&
          taskState != TaskStatus.State.KILLED_UNCLEAN) {
        LOG.info("Task '" + taskid + "' running on '" + trackerName +
                "' in state: '" + taskState + "' being failed!");
        status.setRunState(TaskStatus.State.FAILED);
        taskState = TaskStatus.State.FAILED;
      }

      // tasktracker went down and failed time was not reported.
      if (0 == status.getFinishTime()){
        status.setFinishTime(JobTracker.getClock().getTime());
      }
    }

    this.activeTasks.remove(taskid);
   
    // Since we do not fail completed reduces (whose outputs go to hdfs), we
    // should note this failure only for completed maps, only if this taskid;
    // completed this map. however if the job is done, there is no need to
    // manipulate completed maps
    if (this.isMapTask() && !jobSetup && !jobCleanup && isComplete(taskid) &&
        jobStatus.getRunState() != JobStatus.SUCCEEDED) {
      this.completes--;
     
      // Reset the successfulTaskId since we don't have a SUCCESSFUL task now
      resetSuccessfulTaskid();
    }

    // Note that there can be failures of tasks that are hosted on a machine
    // that has not yet registered with restarted jobtracker
    // recalculate the counts only if its a genuine failure
    if (tasks.contains(taskid)) {
      if (taskState == TaskStatus.State.FAILED) {
        numTaskFailures++;
        machinesWhereFailed.add(trackerHostName);
        if(maxSkipRecords>0) {
          //skipping feature enabled
          LOG.debug("TaskInProgress adding" + status.getNextRecordRange());
          failedRanges.add(status.getNextRecordRange());
          skipping = startSkipping();
        }

      } else if (taskState == TaskStatus.State.KILLED) {
        numKilledTasks++;
      }
    }

    if (numTaskFailures >= maxTaskAttempts) {
      LOG.info("TaskInProgress " + getTIPId() + " has failed " + numTaskFailures + " times.");
      kill();
    }
  }
 
  /**
   * Get whether to start skipping mode.
   */
  private boolean startSkipping() {
    if(maxSkipRecords>0 &&
        numTaskFailures>=SkipBadRecords.getAttemptsToStartSkipping(conf)) {
      return true;
    }
    return false;
  }

  /**
   * Finalize the <b>completed</b> task; note that this might not be the first
   * task-attempt of the {@link TaskInProgress} and hence might be declared
   * {@link TaskStatus.State.SUCCEEDED} or {@link TaskStatus.State.KILLED}
   *
   * @param taskId id of the completed task-attempt
   * @param finalTaskState final {@link TaskStatus.State} of the task-attempt
   */
  private void completedTask(TaskAttemptID taskId, TaskStatus.State finalTaskState) {
    TaskStatus status = taskStatuses.get(taskId);
    status.setRunState(finalTaskState);
    activeTasks.remove(taskId);
  }
 
  /**
   * Indicate that one of the taskids in this already-completed
   * TaskInProgress has successfully completed; hence we mark this
   * taskid as {@link TaskStatus.State.KILLED}.
   */
  void alreadyCompletedTask(TaskAttemptID taskid) {
    // 'KILL' the task
    completedTask(taskid, TaskStatus.State.KILLED);
   
    // Note the reason for the task being 'KILLED'
    addDiagnosticInfo(taskid, "Already completed TIP");
   
    LOG.info("Already complete TIP " + getTIPId() +
             " has completed task " + taskid);
  }

  /**
   * Indicate that one of the taskids in this TaskInProgress
   * has successfully completed!
   */
  public void completed(TaskAttemptID taskid) {
    //
    // Record that this taskid is complete
    //
    completedTask(taskid, TaskStatus.State.SUCCEEDED);
       
    // Note the successful taskid
    setSuccessfulTaskid(taskid);
   
    //
    // Now that the TIP is complete, the other speculative
    // subtasks will be closed when the owning tasktracker
    // reports in and calls shouldClose() on this object.
    //

    this.completes++;
    this.execFinishTime = JobTracker.getClock().getTime();
    recomputeProgress();
   
  }

  /**
   * Get the split locations
   */
  public String[] getSplitLocations() {
    if (isMapTask() && !jobSetup && !jobCleanup) {
      return rawSplit.getLocations();
    }
    return new String[0];
  }
 
  /**
   * Get the Status of the tasks managed by this TIP
   */
  public TaskStatus[] getTaskStatuses() {
    return taskStatuses.values().toArray(new TaskStatus[taskStatuses.size()]);
  }

  /**
   * Get all the {@link TaskAttemptID}s in this {@link TaskInProgress}
   */
  TaskAttemptID[] getAllTaskAttemptIDs() {
    return tasks.toArray(new TaskAttemptID[tasks.size()]);
  }
 
  /**
   * Get the status of the specified task
   * @param taskid
   * @return
   */
  public TaskStatus getTaskStatus(TaskAttemptID taskid) {
    return taskStatuses.get(taskid);
  }
  /**
   * The TIP's been ordered kill()ed.
   */
  public void kill() {
    if (isComplete() || failed) {
      return;
    }
    this.failed = true;
    killed = true;
    this.execFinishTime = JobTracker.getClock().getTime();
    recomputeProgress();
  }

  /**
   * Was the task killed?
   * @return true if the task killed
   */
  public boolean wasKilled() {
    return killed;
  }
 
  /**
   * Kill the given task
   */
  boolean killTask(TaskAttemptID taskId, boolean shouldFail, String diagnosticInfo) {
    TaskStatus st = taskStatuses.get(taskId);
    if(st != null && (st.getRunState() == TaskStatus.State.RUNNING
        || st.getRunState() == TaskStatus.State.COMMIT_PENDING ||
        st.inTaskCleanupPhase() ||
        st.getRunState() == TaskStatus.State.UNASSIGNED)
        && tasksToKill.put(taskId, shouldFail) == null ) {
      addDiagnosticInfo(taskId, diagnosticInfo);
      LOG.info(diagnosticInfo);
      return true;
    }
    return false;
  }

  /**
   * This method is called whenever there's a status change
   * for one of the TIP's sub-tasks.  It recomputes the overall
   * progress for the TIP.  We examine all sub-tasks and find
   * the one that's most advanced (and non-failed).
   */
  void recomputeProgress() {
    if (isComplete()) {
      this.progress = 1;
      // update the counters and the state
      TaskStatus completedStatus = taskStatuses.get(getSuccessfulTaskid());
      this.counters = completedStatus.getCounters();
      this.state = completedStatus.getStateString();
    } else if (failed) {
      this.progress = 0;
      // reset the counters and the state
      this.state = "";
      this.counters = new Counters();
    } else {
      double bestProgress = 0;
      String bestState = "";
      Counters bestCounters = new Counters();
      for (Iterator<TaskAttemptID> it = taskStatuses.keySet().iterator(); it.hasNext();) {
        TaskAttemptID taskid = it.next();
        TaskStatus status = taskStatuses.get(taskid);
        if (status.getRunState() == TaskStatus.State.SUCCEEDED) {
          bestProgress = 1;
          bestState = status.getStateString();
          bestCounters = status.getCounters();
          break;
        } else if (status.getRunState() == TaskStatus.State.COMMIT_PENDING) {
          //for COMMIT_PENDING, we take the last state that we recorded
          //when the task was RUNNING
          bestProgress = this.progress;
          bestState = this.state;
          bestCounters = this.counters;
        } else if (status.getRunState() == TaskStatus.State.RUNNING) {
          if (status.getProgress() >= bestProgress) {
            bestProgress = status.getProgress();
            bestState = status.getStateString();
            if (status.getIncludeCounters()) {
              bestCounters = status.getCounters();
            } else {
              bestCounters = this.counters;
            }
          }
        }
      }
      this.progress = bestProgress;
      this.state = bestState;
      this.counters = bestCounters;
    }
  }

  /////////////////////////////////////////////////
  // "Action" methods that actually require the TIP
  // to do something.
  /////////////////////////////////////////////////

  /**
   * Return whether this TIP still needs to run
   */
  boolean isRunnable() {
    return !failed && (completes == 0);
  }

 

  /**
   * Can this task be speculated? This requires that it isn't done or almost
   * done and that it isn't already being speculatively executed.
   *
   * Added for use by queue scheduling algorithms.
   * @param currentTime
   */
  boolean canBeSpeculated(long currentTime) {
    if (skipping || !isRunnable() || !isRunning() ||
        completes != 0 || isOnlyCommitPending() ||
        activeTasks.size() > MAX_TASK_EXECS) {
      if (isMapTask() ? job.shouldLogCannotspeculativeMaps() :
            job.shouldLogCannotspeculativeReduces()) {
           LOG.info("Task "+ getTIPId() + " cannot be speculated because of "
               + "skipping = "+ skipping + " isRunnable() = "+ isRunnable()
               + " isRunning() = "+ isRunning() + " completes = " + completes
               + " isOnlyCommitPending() = "+ isOnlyCommitPending() 
               + " activetask-size = "+ activeTasks.size()
               + " MAX_TASK_EXECS = " + MAX_TASK_EXECS);
      }
      return false;
    }

    if (isSpeculativeForced()) {
      return true;
    }

    // no speculation for first few seconds
    if (currentTime - lastDispatchTime < speculativeLag) {
      if (isMapTask() ? job.shouldLogCannotspeculativeMaps() :
            job.shouldLogCannotspeculativeReduces()) {
           LOG.info("Task "+ getTIPId() + " cannot be speculated because of "
               + "no speculation for first few seconds");
      }
      return false;
    }

    // if the task is making progress fast enough to complete within
    // the acceptable duration allowed for each task - do not speculate
    if ((maxProgressRateForSpeculation > 0) &&
        (progressRate > maxProgressRateForSpeculation)) {
      if (isMapTask() ? job.shouldLogCannotspeculativeMaps() :
            job.shouldLogCannotspeculativeReduces()) {
           LOG.info("Task "+ getTIPId() +" cannot be speculated because "
               + "the task progress rate is fast enough to complete."
               + " maxProgressRateForSpeculation = "
               + maxProgressRateForSpeculation
               + " and progressRate = " + progressRate);
      }
      return false;
    }

    if (isMapTask() ? job.shouldSpeculateAllRemainingMaps() :
                      job.shouldSpeculateAllRemainingReduces()) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("Speculate " + getTIPId() +
            " because the job is almost finished");
      }
      return true;
    }
    if (useProcessingRateForSpeculation) {
      return canBeSpeculatedUsingProcessingRate(currentTime);
    } else {
      return canBeSpeculatedUsingProgressRate(currentTime);
    }
  }
 
  boolean canBeSpeculatedUsingProgressRate(long currentTime) {
    DataStatistics taskStats = job.getRunningTaskStatistics(isMapTask());

    if (LOG.isDebugEnabled()) {
      LOG.debug("activeTasks.size(): " + activeTasks.size() + " "
          + activeTasks.firstKey() + " task's progressrate: " +
          progressRate +
          " taskStats : " + taskStats);
    }
   
    // Find if task should be speculated based on standard deviation
    // the max difference allowed between the tasks's progress rate
    // and the mean progress rate of sibling tasks.

    double maxDiff = (taskStats.std() == 0 ?
                       taskStats.mean()/3 :
                        job.getSlowTaskThreshold() * taskStats.std());

    // if stddev > mean - we are stuck. cap the max difference at a
    // more meaningful number.
    maxDiff = Math.min(maxDiff, taskStats.mean() * job.getStddevMeanRatioMax());
    boolean canBeSpeculated = (taskStats.mean() - progressRate > maxDiff);
   
    if (canBeSpeculated) {
      LOG.info("Task " + getTIPId() + " can be speculated with progressRate = "
          + progressRate + " and taskStats = " + taskStats);
    }else{
      if (isMapTask() ? job.shouldLogCannotspeculativeMaps() :
            job.shouldLogCannotspeculativeReduces()) {
           LOG.info("Task "+ getTIPId() + " cannot be speculated with progressRate = "
           + progressRate + " and taskStats = " + taskStats
           + " and maxDiff = "+ maxDiff);
      }
    }
    return canBeSpeculated;
  }

 
  /**
   * For the map task, using the bytes processed/sec as the processing rate
   * For the reduce task, using different rate for different phase:
   * copy: using the bytes copied/sec as the processing rate
   * sort: using the accumulated progress rate as the processing rate
   * reduce: using the the bytes processed/sec as the processing rate
   * @param currentTime
   * @return
   */
  boolean canBeSpeculatedUsingProcessingRate(long currentTime) {

    TaskStatus.Phase p = getProcessingPhase();
    // check if the task is on one of following four phases
    if ((p != TaskStatus.Phase.MAP) &&
        (p != TaskStatus.Phase.SHUFFLE) &&
        (p != TaskStatus.Phase.SORT) &&
        (p != TaskStatus.Phase.REDUCE)) {
      return false;
    }
   
    DataStatistics taskStats = job.getRunningTaskStatistics(p);
    if (LOG.isDebugEnabled()) {
      LOG.debug("TaskID: " + this.id + "processing phase is " + p +
          " and processing rate for this phase is " +
          getProcessingRate(p));
    }
    // Find if task should be speculated based on standard deviation
    // the max difference allowed between the tasks's progress rate
    // and the mean progress rate of sibling tasks.
   
    double maxDiff = (taskStats.std() == 0 ?
        taskStats.mean()/3 :
          job.getSlowTaskThreshold() * taskStats.std());
   
    // if stddev > mean - we are stuck. cap the max difference at a
    // more meaningful number.
    maxDiff = Math.min(maxDiff, taskStats.mean() * job.getStddevMeanRatioMax());

    return (taskStats.mean() - processingRates.getRate(p) > maxDiff);
  }
   
  /**
   * Return a Task that can be sent to a TaskTracker for execution.
   */
  public Task getTaskToRun(String taskTracker) {

    // Create the 'taskid'; do not count the 'killed' tasks against the job!
    TaskAttemptID taskid = null;
    if (nextTaskId < (MAX_TASK_EXECS + maxTaskAttempts + numKilledTasks)) {
      // Make sure that the attempts are unqiue across restarts
      int attemptId = job.getNumRestarts() * NUM_ATTEMPTS_PER_RESTART + nextTaskId;
      taskid = new TaskAttemptID( id, attemptId);
      ++nextTaskId;
    } else {
      LOG.warn("Exceeded limit of " + (MAX_TASK_EXECS + maxTaskAttempts) +
              " (plus " + numKilledTasks + " killed)"  +
              " attempts for the tip '" + getTIPId() + "'");
      return null;
    }
    //keep track of the last time we started an attempt at this TIP
    //used to calculate the progress rate of this TIP
    setDispatchTime(taskid, JobTracker.getClock().getTime());
    if (0 == execStartTime){
      // assume task starts running now
      execStartTime = JobTracker.getClock().getTime();
    }
    return addRunningTask(taskid, taskTracker);
  }
 
  public Task addRunningTask(TaskAttemptID taskid, String taskTracker) {
    return addRunningTask(taskid, taskTracker, false);
  }
 
  /**
   * Adds a previously running task to this tip. This is used in case of
   * jobtracker restarts.
   */
  public Task addRunningTask(TaskAttemptID taskid,
                             String taskTracker,
                             boolean taskCleanup) {
    // 1 slot is enough for taskCleanup task
    int numSlotsNeeded = taskCleanup ? 1 : numSlotsRequired;
    // create the task
    Task t = null;
    if (isMapTask()) {
      LOG.debug("attempt " + numTaskFailures + " sending skippedRecords "
          + failedRanges.getIndicesCount());
      String splitClass = null;
      BytesWritable split;
      if (!jobSetup && !jobCleanup) {
        splitClass = rawSplit.getClassName();
        split = rawSplit.getBytes();
      } else {
        split = new BytesWritable();
      }
      t = new MapTask(jobFile, taskid, partition, splitClass, split,
                      numSlotsNeeded, job.getUser());
    } else {
      t = new ReduceTask(jobFile, taskid, partition, numMaps,
                         numSlotsNeeded, job.getUser());
    }
    if (jobCleanup) {
      t.setJobCleanupTask();
    }
    if (jobSetup) {
      t.setJobSetupTask();
    }
    if (taskCleanup) {
      t.setTaskCleanupTask();
      t.setState(taskStatuses.get(taskid).getRunState());
      cleanupTasks.put(taskid, taskTracker);
    }
    t.setConf(conf);
    LOG.debug("Launching task with skipRanges:"+failedRanges.getSkipRanges());
    t.setSkipRanges(failedRanges.getSkipRanges());
    t.setSkipping(skipping);
    if(failedRanges.isTestAttempt()) {
      t.setWriteSkipRecs(false);
    }

    if (activeTasks.size() >= 1) {
      speculativeTaskId = taskid;
    } else {
      speculativeTaskId = null;
    }
    activeTasks.put(taskid, taskTracker);
    tasks.add(taskid);

    // Ask JobTracker to note that the task exists
    // jobtracker.createTaskEntry(taskid, taskTracker, this);

    /*
      // code to find call paths to createTaskEntry
      StackTraceElement[] stackTraceElements = Thread.currentThread().getStackTrace();
      boolean found = false;
      for (StackTraceElement s: stackTraceElements) {
      if (s.getMethodName().indexOf("heartbeat") != -1 ||
      s.getMethodName().indexOf("findTask") != -1 ||
      s.getMethodName().indexOf("createAndAddAttempt") != -1 ||
      s.getMethodName().indexOf("processTaskAttempt") != -1) {
      found = true;
      break;
      }
      }

      if (!found) {
      RuntimeException e = new RuntimeException ("calling addRunningTask from outside heartbeat");
      LOG.info(StringUtils.stringifyException(e));
      throw (e);
      }
    */

    // check and set the first attempt
    if (firstTaskId == null) {
      firstTaskId = taskid;
    }
    return t;
  }

  boolean isRunningTask(TaskAttemptID taskid) {
    TaskStatus status = taskStatuses.get(taskid);
    return status != null && status.getRunState() == TaskStatus.State.RUNNING;
  }
 
  boolean isCleanupAttempt(TaskAttemptID taskid) {
    return cleanupTasks.containsKey(taskid);
  }
 
  String machineWhereCleanupRan(TaskAttemptID taskid) {
    return cleanupTasks.get(taskid);
  }
 
  String machineWhereTaskRan(TaskAttemptID taskid) {
    return taskStatuses.get(taskid).getTaskTracker();
  }
   
  boolean wasKilled(TaskAttemptID taskid) {
    return tasksToKill.containsKey(taskid);
  }
 
  /**
   * Has this task already failed on this machine?
   * @param trackerHost The task tracker hostname
   * @return Has it failed?
   */
  public boolean hasFailedOnMachine(String trackerHost) {
    return machinesWhereFailed.contains(trackerHost);
  }
   
  /**
   * Was this task ever scheduled to run on this machine?
   * @param trackerHost The task tracker hostname
   * @param trackerName The tracker name
   * @return Was task scheduled on the tracker?
   */
  public boolean hasRunOnMachine(String trackerHost, String trackerName) {
    return this.activeTasks.values().contains(trackerName) ||
      hasFailedOnMachine(trackerHost);
  }
  /**
   * Get the number of machines where this task has failed.
   * @return the size of the failed machine set
   */
  public int getNumberOfFailedMachines() {
    return machinesWhereFailed.size();
  }
   
  /**
   * Get the id of this map or reduce task.
   * @return The index of this tip in the maps/reduces lists.
   */
  public int getIdWithinJob() {
    return partition;
  }
   
  /**
   * Set the event number that was raised for this tip
   */
  public void setSuccessEventNumber(int eventNumber) {
    successEventNumber = eventNumber;
  }
      
  /**
   * Get the event number that was raised for this tip
   */
  public int getSuccessEventNumber() {
    return successEventNumber;
  }
 
  /**
   * Gets the Node list of input split locations sorted in rack order.
   */
  public String getSplitNodes() {
    if (!isMapTask() || jobSetup || jobCleanup) {
      return "";
    }
    String[] nodes = rawSplit.getLocations();
    if (nodes == null || nodes.length == 0) {
      return "";
    }
    StringBuffer ret = new StringBuffer(nodes[0]);
    for(int i = 1; i < nodes.length;i++) {
      ret.append(",");
      ret.append(nodes[i]);
    }
    return ret.toString();
  }

  public long getMapInputSize() {
    if(isMapTask() && !jobSetup && !jobCleanup) {
      return rawSplit.getDataLength();
    } else {
      return 0;
    }
  }
 
  public void clearSplit() {
    rawSplit.clearBytes();
  }


  /**
   * update progress rate for a task
   *
   * The assumption is that the JIP lock is held entering this routine.
   * So it's left unsynchronized. Currently the only places it's called
   * from are TIP.updateStatus and JIP.refreshCandidate*
   */
  public void updateProgressRate(long currentTime) {

    double bestProgressRate = 0;

    for (TaskStatus ts : taskStatuses.values()){
      if (ts.getRunState() == TaskStatus.State.RUNNING  ||
          ts.getRunState() == TaskStatus.State.SUCCEEDED ||
          ts.getRunState() == TaskStatus.State.COMMIT_PENDING) {

        double tsProgressRate = ts.getProgress()/Math.max(1,
            currentTime - getDispatchTime(ts.getTaskID()));
        if (tsProgressRate > bestProgressRate){
          bestProgressRate = tsProgressRate;
        }
      }
    }

    DataStatistics taskStats = job.getRunningTaskStatistics(isMapTask());
    taskStats.updateStatistics(progressRate, bestProgressRate);

    progressRate = bestProgressRate;
  }

  /**
   * Update the processing rate for this task. (e.g. bytes/ms in reduce phase)
   * @param currentTime
   */
  public void updateProcessingRate(long currentTime) {

    double bestMapRate = processingRates.getRate(Phase.MAP);
    double bestShuffleRate = processingRates.getRate(Phase.SHUFFLE);
    double bestSortRate = processingRates.getRate(Phase.SORT);
    double bestReduceRate = processingRates.getRate(Phase.REDUCE);
    // Find the best processing rates. There could be a running task and the
    // speculated task. (should be verified)
    for (TaskStatus ts : taskStatuses.values()){
      // There could be failed/killed/etc tasks - filter those out as they could
      // have had a high processing rate that should no longer be considered.
      if (ts.getRunState() == TaskStatus.State.RUNNING  ||
          ts.getRunState() == TaskStatus.State.SUCCEEDED ||
          ts.getRunState() == TaskStatus.State.COMMIT_PENDING) {
        double mapRate = 0;
        // Since we are not sure if map byte processing rate, or the map
        // record processing rate is better for speculation, offer an option
        if (conf.getBoolean(USE_MAP_RECORDS_PROCESSING_RATE, false)) {
          mapRate = ts.getMapRecordProcessingRate(currentTime);
        } else {
          mapRate = ts.getMapByteProcessingRate(currentTime);
        }
        double shuffleRate = ts.getCopyProcessingRate(currentTime);
        double sortRate = ts.getSortProcessingRate(currentTime);
        double reduceRate = ts.getReduceProcessingRate(currentTime);
       
        if (mapRate > bestMapRate) {
          bestMapRate = mapRate;
        }
        if (shuffleRate > bestShuffleRate) {
          bestShuffleRate = shuffleRate;
        }
        if (sortRate > bestSortRate) {
          bestSortRate = sortRate;
        }
        if (reduceRate > bestReduceRate) {
          bestReduceRate = reduceRate;
        }
      }
    }
   
    ProcessingRates updatedRates = new ProcessingRates(bestMapRate,
        bestShuffleRate, bestSortRate, bestReduceRate);
   
    // Update the statistics for the job
    updateJobStats(Phase.MAP, processingRates, updatedRates);
    updateJobStats(Phase.SHUFFLE, processingRates, updatedRates);
    updateJobStats(Phase.SORT, processingRates, updatedRates);
    updateJobStats(Phase.REDUCE, processingRates, updatedRates);
   
    processingRates = updatedRates;
  }  
 
  /**
   * Helper function that updates the processing rates stats for this job. Only
   * updates the rate in the corresponding phase.
   * @param phase
   * @param oldRates
   * @param newRates
   */
  private void updateJobStats(Phase phase, ProcessingRates oldRates,
      ProcessingRates newRates) {
    DataStatistics stats = job.getRunningTaskStatistics(phase);
    stats.updateStatistics(oldRates.getRate(phase), newRates.getRate(phase));
  }
 
  /**
   * Convert a progress rate to the total duration projected by
   * that progress rate
   */
  private static long progressRateToTotalDuration(double rate) {
    if (rate == 0)
      return Long.MAX_VALUE;

    return (long)(1.0/rate);
  }

  /**
   * This class keeps the records to be skipped during further executions
   * based on failed records from all the previous attempts.
   * It also narrow down the skip records if it is more than the
   * acceptable value by dividing the failed range into half. In this case one
   * half is executed in the next attempt (test attempt).
   * In the test attempt, only the test range gets executed, others get skipped.
   * Based on the success/failure of the test attempt, the range is divided
   * further.
   */
  private class FailedRanges {
    private SortedRanges skipRanges = new SortedRanges();
    private Divide divide;
   
    synchronized SortedRanges getSkipRanges() {
      if(divide!=null) {
        return divide.skipRange;
      }
      return skipRanges;
    }
   
    synchronized boolean isTestAttempt() {
      return divide!=null;
    }
   
    synchronized long getIndicesCount() {
      if(isTestAttempt()) {
        return divide.skipRange.getIndicesCount();
      }
      return skipRanges.getIndicesCount();
    }
   
    synchronized void updateState(TaskStatus status){
      if (isTestAttempt() &&
          (status.getRunState() == TaskStatus.State.SUCCEEDED)) {
        divide.testPassed = true;
        //since it was the test attempt we need to set it to failed
        //as it worked only on the test range
        status.setRunState(TaskStatus.State.FAILED);
       
      }
    }
   
    synchronized void add(Range failedRange) {
      LOG.warn("FailedRange:"+ failedRange);
      if(divide!=null) {
        LOG.warn("FailedRange:"+ failedRange +"  test:"+divide.test +
            "  pass:"+divide.testPassed);
        if(divide.testPassed) {
          //test range passed
          //other range would be bad. test it
          failedRange = divide.other;
        }
        else {
          //test range failed
          //other range would be good.
          failedRange = divide.test;
        }
        //reset
        divide = null;
      }
     
      if(maxSkipRecords==0 || failedRange.getLength()<=maxSkipRecords) {
        skipRanges.add(failedRange);
      } else {
        //start dividing the range to narrow down the skipped
        //records until maxSkipRecords are met OR all attempts
        //get exhausted
        divide = new Divide(failedRange);
      }
    }
   
    class Divide {
      private final SortedRanges skipRange;
      private final Range test;
      private final Range other;
      private boolean testPassed;
      Divide(Range range){
        long half = range.getLength()/2;
        test = new Range(range.getStartIndex(), half);
        other = new Range(test.getEndIndex(), range.getLength()-half);
        //construct the skip range from the skipRanges
        skipRange = new SortedRanges();
        for(Range r : skipRanges.getRanges()) {
          skipRange.add(r);
        }
        skipRange.add(new Range(0,test.getStartIndex()));
        skipRange.add(new Range(test.getEndIndex(),
            (Long.MAX_VALUE-test.getEndIndex())));
      }
    }
   
  }

  TreeMap<TaskAttemptID, String> getActiveTasks() {
    return activeTasks;
  }

  TreeMap<TaskAttemptID, String> getActiveTasksCopy() {
    return new TreeMap<TaskAttemptID, String>(activeTasks);
  }

  int getNumSlotsRequired() {
    return numSlotsRequired;
  }

  /**
   * Force speculative execution if speculation is allowed in JobInProgress
   */
  public void setSpeculativeForced(boolean speculativeForced) {
    this.speculativeForced = speculativeForced;
  }

  /**
   * Is forced speculative execution enabled?
   */
  public boolean isSpeculativeForced() {
    return speculativeForced;
  }
}
TOP

Related Classes of org.apache.hadoop.mapred.TaskInProgress$FailedRanges$Divide

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.