Source Code of org.apache.hadoop.raid.DistBlockIntegrityMonitor$ReconstructionInputFormat

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package org.apache.hadoop.raid;


import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.lang.reflect.Constructor;
import java.net.InetSocketAddress;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.TreeMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


import javax.security.auth.login.LoginException;


import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.tools.DFSck;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobInProgress;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.CounterGroup;
import org.apache.hadoop.mapreduce.Counters;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.raid.BlockReconstructor.CorruptBlockReconstructor;
import org.apache.hadoop.raid.DistBlockIntegrityMonitor.Worker.LostFileInfo;
import org.apache.hadoop.raid.LogUtils.LOGRESULTS;
import org.apache.hadoop.raid.LogUtils.LOGTYPES;
import org.apache.hadoop.raid.RaidUtils.RaidInfo;
import org.apache.hadoop.raid.protocol.RaidProtocol;
import org.apache.hadoop.security.UnixUserGroupInformation;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.ToolRunner;


/**
 * distributed block integrity monitor, uses parity to reconstruct lost files
 *
 * configuration options
 * raid.blockfix.filespertask       - number of files to reconstruct in a single
 *                                    map reduce task (i.e., at one mapper node)
 *
 * raid.blockfix.fairscheduler.pool - the pool to use for MR jobs
 *
 * raid.blockfix.maxpendingjobs    - maximum number of MR jobs
 *                                    running simultaneously
 */
public class DistBlockIntegrityMonitor extends BlockIntegrityMonitor {
  public final static String[] BLOCKFIXER_MAPREDUCE_KEYS = {
    "mapred.job.tracker",
    "cm.server.address",
    "cm.server.http.address",
    "mapred.job.tracker.corona.proxyaddr",
    "corona.proxy.job.tracker.rpcaddr",
    "corona.system.dir",
    "mapred.temp.dir"
  };
  public final static String BLOCKFIXER = "blockfixer";


  private static final String IN_FILE_SUFFIX = ".in";
  private static final String PART_PREFIX = "part-";
  static final Pattern LIST_CORRUPT_FILE_PATTERN =
      Pattern.compile("blk_-*\\d+\\s+(.*)");
  static final Pattern LIST_DECOMMISSION_FILE_PATTERN = 
      Pattern.compile("blk_-*\\d+\\s+(.*)"); // For now this is the same because of how dfsck generates output 
  private static final String FILES_PER_TASK = 
    "raid.blockfix.filespertask";
  public static final String MAX_PENDING_JOBS =
    "raid.blockfix.maxpendingjobs";
  private static final String HIGH_PRI_SCHEDULER_OPTION =     
    "raid.blockfix.highpri.scheduleroption";        
  private static final String LOW_PRI_SCHEDULER_OPTION =        
    "raid.blockfix.lowpri.scheduleroption";     
  private static final String LOWEST_PRI_SCHEDULER_OPTION =     
    "raid.blockfix.lowestpri.scheduleroption";
  private static final String MAX_FIX_TIME_FOR_FILE =
    "raid.blockfix.max.fix.time.for.file";
  private static final String LOST_FILES_LIMIT =
    "raid.blockfix.corruptfiles.limit";
  private static final String RAIDNODE_BLOCK_FIXER_SCAN_NUM_THREADS_KEY =
    "raid.block.fixer.scan.threads";
  private static final int DEFAULT_BLOCK_FIXER_SCAN_NUM_THREADS = 5;
  private int blockFixerScanThreads = DEFAULT_BLOCK_FIXER_SCAN_NUM_THREADS;
  // The directories checked by the corrupt file monitor, seperate by comma
  public static final String RAIDNODE_BLOCK_FIX_SUBMISSION_INTERVAL_KEY = 
    "raid.block.fix.submission.interval";
  private static final long DEFAULT_BLOCK_FIX_SUBMISSION_INTERVAL = 5 * 1000;
  public static final String RAIDNODE_BLOCK_FIX_SCAN_SUBMISSION_INTERVAL_KEY = 
      "raid.block.fix.scan.submission.interval";
  private static final long DEFAULT_BLOCK_FIX_SCAN_SUBMISSION_INTERVAL = 5 * 1000;
  public static final String RAIDNODE_MAX_NUM_DETECTION_TIME_COLLECTED_KEY =
    "raid.max.num.detection.time.collected";
  public static final int DEFAULT_RAIDNODE_MAX_NUM_DETECTION_TIME_COLLECTED = 100;
  public enum UpdateNumFilesDropped {
    SET,
    ADD
  };


  // default number of files to reconstruct in a task
  private static final long DEFAULT_FILES_PER_TASK = 10L;


  private static final int TASKS_PER_JOB = 50;


  // default number of files to reconstruct simultaneously
  private static final long DEFAULT_MAX_PENDING_JOBS = 100L;


  private static final long DEFAULT_MAX_FIX_TIME_FOR_FILE =
    4 * 60 * 60 * 1000;  // 4 hrs.


  private static final int DEFAULT_LOST_FILES_LIMIT = 200000;
  public static final String FAILED_FILE = "failed";
  public static final String SIMULATION_FAILED_FILE = "simulation_failed";
 
  protected static final Log LOG = LogFactory.getLog(DistBlockIntegrityMonitor.class);
  
  private static final String CORRUPT_FILE_DETECT_TIME = "corrupt_detect_time";


  // number of files to reconstruct in a task
  private long filesPerTask;


  // number of files to reconstruct simultaneously
  final private long maxPendingJobs;


  final private long maxFixTimeForFile;


  final private int lostFilesLimit;


  private static final SimpleDateFormat dateFormat =
    new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss");
  
  private Worker corruptionWorker = new CorruptionWorker();
  private Worker decommissioningWorker = new DecommissioningWorker();
  private Runnable corruptFileCounterWorker = new CorruptFileCounter();


  static enum RaidCounter {
    FILES_SUCCEEDED, FILES_FAILED, FILES_NOACTION,
    BLOCK_FIX_SIMULATION_FAILED, BLOCK_FIX_SIMULATION_SUCCEEDED, 
    FILE_FIX_NUM_READBYTES_REMOTERACK
  }
  
  static enum CorruptFileStatus {
    POTENTIALLY_CORRUPT,
    RAID_UNRECOVERABLE,
    NOT_RAIDED_UNRECOVERABLE,
    NOT_EXIST,
    RECOVERABLE
  }
  
  static enum Priority {
    HIGH  (HIGH_PRI_SCHEDULER_OPTION,   2),
    LOW   (LOW_PRI_SCHEDULER_OPTION,    1),
    LOWEST(LOWEST_PRI_SCHEDULER_OPTION, 0);
    
    public final String configOption;
    private final int underlyingValue;
    
    private Priority(String s, int value) {
      configOption = s;
      underlyingValue = value;
    }
    
    public boolean higherThan (Priority other) {
      return (underlyingValue > other.underlyingValue);
    }
  }
  
  static public class TrackingUrlInfo {
    String trackingUrl;
    long insertTime;
    public TrackingUrlInfo(String newUrl, long newTime) {
      trackingUrl = newUrl;
      insertTime = newTime;
    }
  }
  
  /**
   * Hold information about a failed file with task id
   */
  static public class FailedFileInfo {
    String taskId;
    LostFileInfo fileInfo;
    public FailedFileInfo(String newTaskId, LostFileInfo newFileInfo) {
      this.taskId = newTaskId;
      this.fileInfo = newFileInfo;
    }
  }


  public DistBlockIntegrityMonitor(Configuration conf) throws Exception {
    super(conf);
    filesPerTask = DistBlockIntegrityMonitor.getFilesPerTask(getConf());
    maxPendingJobs = DistBlockIntegrityMonitor.getMaxPendingJobs(getConf());
    maxFixTimeForFile = DistBlockIntegrityMonitor.getMaxFixTimeForFile(getConf());
    lostFilesLimit = DistBlockIntegrityMonitor.getLostFilesLimit(getConf());
  }
  
  public static void updateBlockFixerMapreduceConfigs(Configuration conf, String suffix) {
    for (String configKey: BLOCKFIXER_MAPREDUCE_KEYS) {
      String newKey = configKey + "." + suffix;
      String value = conf.get(newKey);
      if (value != null) {
        conf.set(configKey, value);
      }
    }
  }


  /**
   * determines how many files to reconstruct in a single task
   */ 
  protected static long getFilesPerTask(Configuration conf) {
    return conf.getLong(FILES_PER_TASK, 
                        DEFAULT_FILES_PER_TASK);


  }
  /**
   * determines how many files to reconstruct simultaneously
   */ 
  protected static long getMaxPendingJobs(Configuration conf) {
    return conf.getLong(MAX_PENDING_JOBS,
                        DEFAULT_MAX_PENDING_JOBS);
  }


  protected static long getMaxFixTimeForFile(Configuration conf) {
    return conf.getLong(MAX_FIX_TIME_FOR_FILE,
                        DEFAULT_MAX_FIX_TIME_FOR_FILE);
  }


  protected static int getLostFilesLimit(Configuration conf) {
    return conf.getInt(LOST_FILES_LIMIT, DEFAULT_LOST_FILES_LIMIT);
  }
  
  // Return true if succeed to start one job
  public static Job startOneJob(Worker newWorker, 
      Priority pri, Set<String> jobFiles, long detectTime, 
      AtomicLong numFilesSubmitted, AtomicLong lastCheckingTime,
      long maxPendingJobs) 
          throws IOException, InterruptedException, ClassNotFoundException {
    if (lastCheckingTime != null) {
      lastCheckingTime.set(System.currentTimeMillis());
    }
    String startTimeStr = dateFormat.format(new Date());
    String jobName = newWorker.JOB_NAME_PREFIX + "." + newWorker.jobCounter + 
        "." + pri + "-pri" + "." + startTimeStr;
    Job job = null;
    synchronized(jobFiles) {
      if (jobFiles.size() == 0) {
        return null;
      }
      newWorker.jobCounter++;
      
      synchronized(newWorker.jobIndex) {
        if (newWorker.jobIndex.size() >= maxPendingJobs) {
          // full 
          return null;
        }
        job = newWorker.startJob(jobName, jobFiles, pri, detectTime);
      }
      numFilesSubmitted.addAndGet(jobFiles.size());
      jobFiles.clear();
      
    }
    return job;
  }
  
  public abstract class Worker implements Runnable {


    protected Map<String, LostFileInfo> fileIndex = Collections.synchronizedMap(
      new HashMap<String, LostFileInfo>());
    protected Map<JobID, TrackingUrlInfo> idToTrakcingUrlMap = 
        Collections.synchronizedMap(new HashMap<JobID, TrackingUrlInfo>());
    protected Map<Job, List<LostFileInfo>> jobIndex =
      Collections.synchronizedMap(new HashMap<Job, List<LostFileInfo>>());
    protected Map<Job, List<FailedFileInfo>> failJobIndex =
        new HashMap<Job, List<FailedFileInfo>>();
    protected Map<Job, List<FailedFileInfo>> simFailJobIndex =
      new HashMap<Job, List<FailedFileInfo>>();


    private long jobCounter = 0;
    private AtomicInteger numJobsRunning = new AtomicInteger(0);
    
    protected AtomicLong numFilesDropped = new AtomicLong(0);
    
    volatile BlockIntegrityMonitor.Status lastStatus = null;
    AtomicLong recentNumFilesSucceeded = new AtomicLong();
    AtomicLong recentNumFilesFailed = new AtomicLong();
    AtomicLong recentSlotSeconds = new AtomicLong();
    AtomicLong recentNumBlockFixSimulationSucceeded = new AtomicLong();
    AtomicLong recentNumBlockFixSimulationFailed = new AtomicLong();
    AtomicLong recentNumReadBytesRemoteRack = new AtomicLong();
    Map<String, Long> recentLogMetrics = 
        Collections.synchronizedMap(new HashMap<String, Long>());
    
    private static final int POOL_SIZE = 2;
    private final ExecutorService executor = 
        Executors.newFixedThreadPool(POOL_SIZE);
    private static final int DEFAULT_CHECK_JOB_TIMEOUT_SEC = 600; //10 mins


    protected final Log LOG;
    protected final Class<? extends BlockReconstructor> RECONSTRUCTOR_CLASS;
    protected final String JOB_NAME_PREFIX;


    protected Worker(Log log, 
        Class<? extends BlockReconstructor> rClass, 
        String prefix) {


      this.LOG = log;
      this.RECONSTRUCTOR_CLASS = rClass;
      this.JOB_NAME_PREFIX = prefix;
      Path workingDir = new Path(prefix);
      try {
        FileSystem fs = workingDir.getFileSystem(getConf());
        // Clean existing working dir
        fs.delete(workingDir, true);
      } catch (IOException ioe) {
        LOG.warn("Get exception when cleaning " + workingDir, ioe);
      }
    }
    
    public void shutdown() {
    }




    /**
     * runs the worker periodically
     */
    public void run() {
      try {
        while (running) {
          try {
            updateStatus();
            checkAndReconstructBlocks();
          } catch (InterruptedException ignore) {
            LOG.info("interrupted");
          } catch (Exception e) {
            // log exceptions and keep running
            LOG.error(StringUtils.stringifyException(e));
          } catch (Error e) {
            LOG.error(StringUtils.stringifyException(e));
            throw e;
          }
  
          try {
            Thread.sleep(blockCheckInterval);
          } catch (InterruptedException ignore) {
            LOG.info("interrupted");
          }
        }
      } finally {
        shutdown();
      }
    }


    /**
     * checks for lost blocks and reconstructs them (if any)
     */
    void checkAndReconstructBlocks() throws Exception {
      checkJobsWithTimeOut(DEFAULT_CHECK_JOB_TIMEOUT_SEC);
      int size = jobIndex.size();
      if (size >= maxPendingJobs) {
        LOG.info("Waiting for " + size + " pending jobs");
        return;
      }


      FileSystem fs = new Path("/").getFileSystem(getConf());
      Map<String, Integer> lostFiles = getLostFiles(fs);
      long detectTime = System.currentTimeMillis();
      computePrioritiesAndStartJobs(fs, lostFiles, detectTime);
    }


    /**
     * Handle a failed job.
     */
    private void failJob(Job job) {
      // assume no files have been reconstructed
      LOG.error("Job " + job.getID() + "(" + job.getJobName() +
      ") finished (failed)");
      // We do not change metrics here since we do not know for sure if file
      // reconstructing failed.
      for (LostFileInfo fileInfo: jobIndex.get(job)) {
        boolean failed = true;
        addToMap(job, job.getID().toString(), fileInfo, failJobIndex);
        fileInfo.finishJob(job.getJobName(), failed);
      }
      numJobsRunning.decrementAndGet();
    }
    
    private void addToMap(Job job, String taskId, LostFileInfo fileInfo, 
        Map<Job, List<FailedFileInfo>> index) {
      List<FailedFileInfo> failFiles = null;
      if (!index.containsKey(job)) {
        failFiles = new ArrayList<FailedFileInfo>();
        index.put(job, failFiles);
      } else {
        failFiles = index.get(job);
      }
      failFiles.add(new FailedFileInfo(taskId, fileInfo));
    }


    /**
     * Handle a successful job.
     */
    private void succeedJob(Job job, long filesSucceeded, long filesFailed)
    throws IOException {
      String jobName = job.getJobName();
      LOG.info("Job " + job.getID() + "(" + jobName +
      ") finished (succeeded)");
      // we have to look at the output to check which files have failed
      HashMap<String, String> failedFiles = getFailedFiles(job);
      for (LostFileInfo fileInfo: jobIndex.get(job)) {
        String filePath = fileInfo.getFile().toString();
        String failedFilePath = 
            DistBlockIntegrityMonitor.FAILED_FILE + "," +
            filePath;
        String simulatedFailedFilePath =
            DistBlockIntegrityMonitor.SIMULATION_FAILED_FILE + "," + 
            filePath;
        if (failedFiles.containsKey(simulatedFailedFilePath)) {
          String taskId = failedFiles.get(simulatedFailedFilePath);
          addToMap(job, taskId, fileInfo, simFailJobIndex);
          LOG.error("Simulation failed file: " + fileInfo.getFile());
        }
        if (failedFiles.containsKey(failedFilePath)) {
          String taskId = failedFiles.get(failedFilePath);
          addToMap(job, taskId, fileInfo, failJobIndex);
          boolean failed = true;
          fileInfo.finishJob(jobName, failed);
        } else {
          // call succeed for files that have succeeded or for which no action
          // was taken
          boolean failed = false;
          fileInfo.finishJob(jobName, failed);
        }
      }
      // report succeeded files to metrics
      this.recentNumFilesSucceeded.addAndGet(filesSucceeded);
      this.recentNumFilesFailed.addAndGet(filesFailed);
      if (filesSucceeded > 0) {
        lastSuccessfulFixTime = System.currentTimeMillis();
      }
      numJobsRunning.decrementAndGet();
    }
    
    /**
     * Check the jobs with timeout
     */
    void checkJobsWithTimeOut(int timeoutSec) 
        throws ExecutionException {
      Future<Boolean> future = executor.submit(new Callable<Boolean>() {
        @Override
        public Boolean call() throws Exception {
          checkJobs();
          return true;
        }
      });
      try {
        future.get(timeoutSec, TimeUnit.SECONDS);
      } catch (TimeoutException e) {
        // ignore this.
        LOG.warn("Timeout when checking jobs' status.");
      } catch (InterruptedException e) {
        // ignore this.
        LOG.warn("checkJobs function is interrupted.");
      }
      if (!future.isDone()) {
        future.cancel(true);
      }
    }


    /**
     * checks if jobs have completed and updates job and file index
     * returns a list of failed files for restarting
     */
    void checkJobs() throws IOException {
      List<Job> nonRunningJobs = new ArrayList<Job>();
      synchronized(jobIndex) {
        Iterator<Job> jobIter = jobIndex.keySet().iterator();
        while(jobIter.hasNext()) {
          Job job = jobIter.next();
  
          try {
            if (job.isComplete()) {
              Counters ctrs = job.getCounters();
              if (ctrs != null) {
                // If we got counters, perform extra validation.
                this.recentSlotSeconds.addAndGet(ctrs.findCounter(
                    JobInProgress.Counter.SLOTS_MILLIS_MAPS).getValue() / 1000);
                
                long filesSucceeded =
                    ctrs.findCounter(RaidCounter.FILES_SUCCEEDED) != null ?
                      ctrs.findCounter(RaidCounter.FILES_SUCCEEDED).getValue() : 0;
                long filesFailed =
                    ctrs.findCounter(RaidCounter.FILES_FAILED) != null ?
                      ctrs.findCounter(RaidCounter.FILES_FAILED).getValue() : 0;
                long filesNoAction =
                    ctrs.findCounter(RaidCounter.FILES_NOACTION) != null ?
                      ctrs.findCounter(RaidCounter.FILES_NOACTION).getValue() : 0;
                long blockFixSimulationFailed = 
                    ctrs.findCounter(RaidCounter.BLOCK_FIX_SIMULATION_FAILED) != null?
                      ctrs.findCounter(RaidCounter.BLOCK_FIX_SIMULATION_FAILED).getValue() : 0;
                long blockFixSimulationSucceeded = 
                    ctrs.findCounter(RaidCounter.BLOCK_FIX_SIMULATION_SUCCEEDED) != null?
                      ctrs.findCounter(RaidCounter.BLOCK_FIX_SIMULATION_SUCCEEDED).getValue() : 0;
                this.recentNumBlockFixSimulationFailed.addAndGet(blockFixSimulationFailed);
                this.recentNumBlockFixSimulationSucceeded.addAndGet(blockFixSimulationSucceeded);
                long fileFixNumReadBytesRemoteRack = 
                    ctrs.findCounter(RaidCounter.FILE_FIX_NUM_READBYTES_REMOTERACK) != null ?
                      ctrs.findCounter(RaidCounter.FILE_FIX_NUM_READBYTES_REMOTERACK).getValue() : 0;
                this.recentNumReadBytesRemoteRack.addAndGet(fileFixNumReadBytesRemoteRack);
                CounterGroup counterGroup = ctrs.getGroup(LogUtils.LOG_COUNTER_GROUP_NAME);
                for (Counter ctr: counterGroup) {
                  Long curVal = ctr.getValue();
                  if (this.recentLogMetrics.containsKey(ctr.getName())) {
                    curVal += this.recentLogMetrics.get(ctr.getName());
                  }
                  this.recentLogMetrics.put(ctr.getName(), curVal);
                }
                
                int files = jobIndex.get(job).size();
                
                if (job.isSuccessful() &&
                    (filesSucceeded + filesFailed + filesNoAction ==
                      ((long) files))) {
                  // job has processed all files
                  succeedJob(job, filesSucceeded, filesFailed);
                } else {
                  failJob(job);
                }
              } else {
                long filesSucceeded = jobIndex.get(job).size();
                long filesFailed = 0;
                if (job.isSuccessful()) {
                  succeedJob(job, filesSucceeded, filesFailed);
                } else {
                  failJob(job);
                }
              }
              jobIter.remove();
              nonRunningJobs.add(job);
            } else {
              LOG.info("Job " + job.getID() + "(" + job.getJobName()
                  + " still running");
            }
          } catch (Exception e) {
            LOG.error(StringUtils.stringifyException(e));
            failJob(job);
            jobIter.remove();
            nonRunningJobs.add(job);
            try {
              job.killJob();
            } catch (Exception ee) {
              LOG.error(StringUtils.stringifyException(ee));
            }
          }
        }
      }
      purgeFileIndex();
      cleanupNonRunningJobs(nonRunningJobs);
    }


    /**
     * Delete (best-effort) the input and output directories of jobs.
     * @param nonRunningJobs
     */
    private void cleanupNonRunningJobs(List<Job> nonRunningJobs) {
      for (Job job: nonRunningJobs) {
        Path outDir = null;
        try {
          outDir = SequenceFileOutputFormat.getOutputPath(job);
          outDir.getFileSystem(getConf()).delete(outDir, true);
        } catch (IOException e) {
          LOG.warn("Could not delete output dir " + outDir, e);
        }
        Path[] inDir = null;
        try {
          // We only create one input directory.
          inDir = ReconstructionInputFormat.getInputPaths(job);
          inDir[0].getFileSystem(getConf()).delete(inDir[0], true);
        } catch (IOException e) {
          LOG.warn("Could not delete input dir " + inDir[0], e);
        }
      }
    }




    /**
     * determines which files have failed for a given job
     */
    private HashMap<String, String> getFailedFiles(Job job) throws IOException {
      HashMap<String, String> failedFiles = new HashMap<String, String>();


      Path outDir = SequenceFileOutputFormat.getOutputPath(job);
      FileSystem fs  = outDir.getFileSystem(getConf());
      if (!fs.getFileStatus(outDir).isDir()) {
        throw new IOException(outDir.toString() + " is not a directory");
      }


      FileStatus[] files = fs.listStatus(outDir);


      for (FileStatus f: files) {
        Path fPath = f.getPath();
        if ((!f.isDir()) && (fPath.getName().startsWith(PART_PREFIX))) {
          LOG.info("opening " + fPath.toString());
          SequenceFile.Reader reader = 
            new SequenceFile.Reader(fs, fPath, getConf());


          Text key = new Text();
          Text value = new Text();
          while (reader.next(key, value)) {
            if (LOG.isDebugEnabled()) {
              LOG.debug("key: " + key.toString() + " , value: " + value.toString());
            }
            failedFiles.put(key.toString(), value.toString());
          }
          reader.close();
        }
      }
      return failedFiles;
    }




    /**
     * purge expired jobs from the file index
     */
    private void purgeFileIndex() {
      Iterator<String> fileIter = fileIndex.keySet().iterator();
      long now = System.currentTimeMillis();
      while(fileIter.hasNext()) {
        String file = fileIter.next();
        if (fileIndex.get(file).isTooOld(now)) {
          fileIter.remove();
        }
      }
      Iterator<TrackingUrlInfo> tuiIter = 
          this.idToTrakcingUrlMap.values().iterator();
      while (tuiIter.hasNext()) {
        TrackingUrlInfo tui = tuiIter.next();
        if (System.currentTimeMillis() - tui.insertTime > maxWindowTime) {
          tuiIter.remove();
        }
      }
    }


    // Start jobs for all the lost files.
    public void startJobs(Map<String, Priority> filePriorities, long detectTime)
    throws IOException, InterruptedException, ClassNotFoundException {
      AtomicLong numFilesSubmitted = new AtomicLong(0);
      for (Priority pri : Priority.values()) {
        Set<String> jobFiles = new HashSet<String>();
        for (Map.Entry<String, Priority> entry: filePriorities.entrySet()) {
          // Check if file priority matches the current round.
          if (entry.getValue() != pri) {
            continue;
          }
          jobFiles.add(entry.getKey());
          // Check if we have hit the threshold for number of files in a job.
          if (jobFiles.size() == filesPerTask * TASKS_PER_JOB) {
            boolean succeed = startOneJob(this, pri, jobFiles, detectTime,
                numFilesSubmitted, null, maxPendingJobs) != null;
            if (!succeed) {
              this.numFilesDropped.set(filePriorities.size() -
                  numFilesSubmitted.get());
              LOG.debug("Submitted a job with max number of files allowed. " + 
                        "Num files dropped is " + this.numFilesDropped.get());
              return;
            }
          }
        }
        if (jobFiles.size() > 0) {
          boolean succeed = startOneJob(this, pri, jobFiles, detectTime,
              numFilesSubmitted, null, maxPendingJobs) != null;
          if (!succeed) {
            this.numFilesDropped.set(filePriorities.size() -
                numFilesSubmitted.get());
            LOG.debug("Submitted a job with max number of files allowed. " + 
                      "Num files dropped is " + this.numFilesDropped.get());
            return;
          }
        }
      }
      this.numFilesDropped.set(filePriorities.size() - numFilesSubmitted.get());
    }


    /**
     * creates and submits a job, updates file index and job index
     */
    private Job startJob(String jobName, Set<String> lostFiles, Priority priority,
        long detectTime)
    throws IOException, InterruptedException, ClassNotFoundException {
      Path inDir = new Path(JOB_NAME_PREFIX + "/in/" + jobName);
      Path outDir = new Path(JOB_NAME_PREFIX + "/out/" + jobName);
      List<String> filesInJob = createInputFile(
          jobName, inDir, lostFiles);
      if (filesInJob.isEmpty()) return null;


      Configuration jobConf = new Configuration(getConf());
      DistBlockIntegrityMonitor.updateBlockFixerMapreduceConfigs(jobConf, BLOCKFIXER);
      RaidUtils.parseAndSetOptions(jobConf, priority.configOption);
      Job job = new Job(jobConf, jobName);
      job.getConfiguration().set(CORRUPT_FILE_DETECT_TIME, Long.toString(detectTime));
      configureJob(job, this.RECONSTRUCTOR_CLASS);
      job.setJarByClass(getClass());
      job.setMapperClass(ReconstructionMapper.class);
      job.setNumReduceTasks(0);
      job.setInputFormatClass(ReconstructionInputFormat.class);
      job.setOutputFormatClass(SequenceFileOutputFormat.class);
      job.setOutputKeyClass(Text.class);
      job.setOutputValueClass(Text.class);


      ReconstructionInputFormat.setInputPaths(job, inDir);
      SequenceFileOutputFormat.setOutputPath(job, outDir);
      


      submitJob(job, filesInJob, priority);
      List<LostFileInfo> fileInfos =
        updateFileIndex(jobName, filesInJob, priority);
      // The implementation of submitJob() need not update jobIndex.
      // So check if the job exists in jobIndex before updating jobInfos.
      if (jobIndex.containsKey(job)) {
        jobIndex.put(job, fileInfos);
      }
      numJobsRunning.incrementAndGet();
      return job;
    }


    void submitJob(Job job, List<String> filesInJob, Priority priority)
    throws IOException, InterruptedException, ClassNotFoundException {
      LOG.info("Submitting job");
      DistBlockIntegrityMonitor.this.submitJob(job, filesInJob, priority, this.jobIndex,
          this.idToTrakcingUrlMap);
    }


    /**
     * inserts new job into file index and job index
     */
    private List<LostFileInfo> updateFileIndex(
        String jobName, List<String> lostFiles, Priority priority) {
      List<LostFileInfo> fileInfos = new ArrayList<LostFileInfo>();


      for (String file: lostFiles) {
        LostFileInfo fileInfo = fileIndex.get(file);
        if (fileInfo != null) {
          fileInfo.addJob(jobName, priority);
        } else {
          fileInfo = new LostFileInfo(file, jobName, priority);
          fileIndex.put(file, fileInfo);
        }
        fileInfos.add(fileInfo);
      }
      return fileInfos;
    }


    /**
     * creates the input file (containing the names of the files to be 
     * reconstructed)
     */
    private List<String> createInputFile(String jobName, Path inDir,
        Set<String> lostFiles) throws IOException {
      Path file = new Path(inDir, jobName + IN_FILE_SUFFIX);
      FileSystem fs = file.getFileSystem(getConf());
      SequenceFile.Writer fileOut = SequenceFile.createWriter(fs, getConf(), file,
          LongWritable.class,
          Text.class);
      long index = 0L;


      List<String> filesAdded = new ArrayList<String>();
      int count = 0;
      for (String lostFileName: lostFiles) {
        fileOut.append(new LongWritable(index++), new Text(lostFileName));
        filesAdded.add(lostFileName);
        count++;


        if (index % filesPerTask == 0) {
          fileOut.sync(); // create sync point to make sure we can split here
        }
      }


      fileOut.close();
      return filesAdded;
    }
  
    /**
     * Update {@link lastStatus} so that it can be viewed from outside
     */
    protected void updateStatus() {
      int highPriorityFiles = 0;
      int lowPriorityFiles = 0;
      int lowestPriorityFiles = 0;
      List<JobStatus> jobs = new ArrayList<JobStatus>();
      List<JobStatus> failJobs = new ArrayList<JobStatus>();
      List<JobStatus> simFailJobs = new ArrayList<JobStatus>();
      List<String> highPriorityFileNames = new ArrayList<String>();
      for (Map.Entry<String, LostFileInfo> e : fileIndex.entrySet()) {
        String fileName = e.getKey();
        LostFileInfo fileInfo = e.getValue();
        Priority pri = fileInfo.getHighestPriority();
        if (pri == Priority.HIGH) {
          highPriorityFileNames.add(fileName);
          highPriorityFiles++;
        } else if (pri == Priority.LOW){
          lowPriorityFiles++;
        } else if (pri == Priority.LOWEST) {
          lowestPriorityFiles++;
        }
      }
      synchronized(jobIndex) {
        for (Job job : jobIndex.keySet()) {
          String url = job.getTrackingURL();
          String name = job.getJobName();
          JobID jobId = job.getID();
          jobs.add(new BlockIntegrityMonitor.JobStatus(jobId, name, url,
              jobIndex.get(job), null));
        }
        for (Job job : failJobIndex.keySet()) {
          String url = job.getTrackingURL();
          String name = job.getJobName();
          JobID jobId = job.getID();
          failJobs.add(new BlockIntegrityMonitor.JobStatus(jobId, name, url,
              null, failJobIndex.get(job)));
        }
        for (Job simJob : simFailJobIndex.keySet()) {
          String url = simJob.getTrackingURL();
          String name = simJob.getJobName();
          JobID jobId = simJob.getID();
          simFailJobs.add(new BlockIntegrityMonitor.JobStatus(jobId, name, url,
              null, simFailJobIndex.get(simJob)));
        }
      }
      lastStatus = new BlockIntegrityMonitor.Status(highPriorityFiles, lowPriorityFiles,
          lowestPriorityFiles, jobs, highPriorityFileNames, failJobs, simFailJobs);
      updateRaidNodeMetrics();
    }
    
    public Status getStatus() {
      return lastStatus;
    }


    abstract void computePrioritiesAndStartJobs(
        FileSystem fs, Map<String, Integer> lostFiles, long detectTime)
            throws IOException, InterruptedException, ClassNotFoundException;


    protected abstract Map<String, Integer> getLostFiles(FileSystem fs) throws IOException;


    protected abstract void updateRaidNodeMetrics();
    
    /**
     * hold information about a lost file that is being reconstructed
     */
    class LostFileInfo {


      private String file;
      private List<String> jobNames;  // Jobs reconstructing this file.
      private boolean done;
      private List<Priority> priorities;
      private long insertTime;


      public LostFileInfo(String file, String jobName, Priority priority) {
        this.file = file;
        this.jobNames = new ArrayList<String>();
        this.priorities = new ArrayList<Priority>();
        this.done = false;
        this.insertTime = System.currentTimeMillis();
        addJob(jobName, priority);
      }


      public boolean isTooOld(long now) {
        return now - insertTime > maxFixTimeForFile;
      }


      public boolean isDone() {
        return done;
      }


      public void addJob(String jobName, Priority priority) {
        this.jobNames.add(jobName);
        this.priorities.add(priority);
      }


      public Priority getHighestPriority() {
        Priority max = Priority.LOWEST;
        for (Priority p: priorities) {
          if (p.higherThan(max)) max = p;
        }
        return max;
      }


      public String getFile() {
        return file;
      }


      /**
       * Updates state with the completion of a job. If all jobs for this file
       * are done, the file index is updated.
       */
      public void finishJob(String jobName, boolean failed) {
        int idx = jobNames.indexOf(jobName);
        if (idx == -1) return;
        jobNames.remove(idx);
        priorities.remove(idx);
        LOG.info("reconstructing " + file +
            (failed ? " failed in " : " succeeded in ") +
            jobName);
        if (jobNames.isEmpty()) {
          // All jobs dealing with this file are done,
          // remove this file from the index
          LostFileInfo removed = fileIndex.remove(file);
          if (removed == null) {
            LOG.error("trying to remove file not in file index: " + file);
          }
          done = true;
        }
      }
    }


    public String getTrackingUrl(JobID jobId) {
      TrackingUrlInfo tui = this.idToTrakcingUrlMap.get(jobId); 
      if (tui == null) {
        return "";
      } else {
        return tui.trackingUrl;
      }
    }
  }
  
  /**
   * CorruptFileCounter is a periodical running daemon that keeps running raidfsck 
   * to get the number of the corrupt files under the give directories defined by 
   * RAIDNODE_CORRUPT_FILE_COUNTER_DIRECTORIES_KEY
   * @author weiyan
   *
   */
  public class CorruptFileCounter implements Runnable {
    private long filesWithMissingBlksCnt = 0;
    private Map<String, long[]> numStrpWithMissingBlksMap = new HashMap<String, long[]>();
    private Object counterMapLock = new Object();
    private long numNonRaidedMissingBlocks = 0;


    public CorruptFileCounter() {
      for (Codec codec : Codec.getCodecs()) {
        this.numStrpWithMissingBlksMap.put(codec.id,
            new long[codec.stripeLength + codec.parityLength]);
      }
    }


    public void run() {
      RaidNodeMetrics.getInstance(RaidNodeMetrics.DEFAULT_NAMESPACE_ID)
        .initCorruptFilesMetrics(getConf());
      while (running) {
        TreeMap<String, Long> newUnRecoverableCounterMap = new TreeMap<String, Long>();
        Map<String, Long> newRecoverableCounterMap = new HashMap<String, Long>();
        long newfilesWithMissingBlksCnt = 0;
        String srcDir = "/";
        try {
          ByteArrayOutputStream bout = new ByteArrayOutputStream();
          PrintStream ps = new PrintStream(bout, true);
          RaidShell shell = new RaidShell(getConf(), ps);
          int res = ToolRunner.run(shell, new String[] { "-fsck", srcDir,
              "-count", "-retNumStrpsMissingBlks" });
          shell.close();
          ByteArrayInputStream bin = new ByteArrayInputStream(
              bout.toByteArray());
          BufferedReader reader = new BufferedReader(new InputStreamReader(bin));
          String line = reader.readLine();
          if (line == null) {
            throw new IOException("Raidfsck fails without output");
          }
          Long corruptCount = Long.parseLong(line);
          LOG.info("The number of corrupt files under " + srcDir + " is "
              + corruptCount);
          newUnRecoverableCounterMap.put(srcDir, corruptCount);
          line = reader.readLine();
          if (line == null) {
            throw new IOException("Raidfsck did not print number "
                + "of files with missing blocks");
          }


          // get files with Missing Blks
          // fsck with '-count' prints this number in line2
          long incfilesWithMissingBlks = Long.parseLong(line);
          LOG.info("The number of files with missing blocks under " + srcDir
              + " is " + incfilesWithMissingBlks);


          long numRecoverableFiles = incfilesWithMissingBlks - corruptCount;
          newRecoverableCounterMap.put(srcDir, numRecoverableFiles);
          approximateNumRecoverableFiles = numRecoverableFiles;


          // Add filesWithMissingBlks and numStrpWithMissingBlks only for "/"
          // dir to avoid duplicates
          Map<String, long[]> newNumStrpWithMissingBlksMap = new HashMap<String, long[]>();
          newfilesWithMissingBlksCnt += incfilesWithMissingBlks;
          // read the array for num stripes with missing blocks


          line = reader.readLine();
          if (line == null) {
            throw new IOException("Raidfsck did not print the number of "
                + "missing blocks in non raided files");
          }
          long numNonRaided = Long.parseLong(line);


          for (int i = 0; i < Codec.getCodecs().size(); i++) {
            line = reader.readLine();
            if (line == null) {
              throw new IOException("Raidfsck did not print the missing "
                  + "block info for codec at index " + i);
            }


            Codec codec = Codec.getCodec(line);
            long[] incNumStrpWithMissingBlks = new long[codec.stripeLength
                                                        + codec.parityLength];
            for (int j = 0; j < incNumStrpWithMissingBlks.length; j++) {
              line = reader.readLine();
              if (line == null) {
                throw new IOException("Raidfsck did not print the array "
                          + "for number stripes with missing blocks for index "
                          + j);
              }
              incNumStrpWithMissingBlks[j] = Long.parseLong(line);
              LOG.info("The number of stripes with missing blocks at index"
                        + j + "under" + srcDir + " is "
                        + incNumStrpWithMissingBlks[j]);
            }
            newNumStrpWithMissingBlksMap.put(codec.id, incNumStrpWithMissingBlks);
          }
          synchronized (counterMapLock) {
            this.numNonRaidedMissingBlocks = numNonRaided;
            for (String codeId : newNumStrpWithMissingBlksMap.keySet()) {
              numStrpWithMissingBlksMap.put(codeId,
              newNumStrpWithMissingBlksMap.get(codeId));
            }
          }
          reader.close();
          bin.close();
        } catch (Exception e) {
          LOG.error("Fail to count the corrupt files under " + srcDir, e);
        }
        synchronized (counterMapLock) {
          this.filesWithMissingBlksCnt = newfilesWithMissingBlksCnt;
        }
        updateRaidNodeMetrics();
        if (!running) {
          break;
        }
        try {
          Thread.sleep(corruptFileCountInterval);
        } catch (InterruptedException ignore) {
          LOG.info("interrupted");
        }
      }
    }


    public long getNumNonRaidedMissingBlks() {
      synchronized (counterMapLock) {
        return this.numNonRaidedMissingBlocks;
      }
    }


    public long getFilesWithMissingBlksCnt() {
      synchronized (counterMapLock) {
        return filesWithMissingBlksCnt;
      }
    }


    public long[] getNumStrpWithMissingBlksRS() {
      synchronized (counterMapLock) {
        return numStrpWithMissingBlksMap.get("rs");
      }
    }


    protected void updateRaidNodeMetrics() {
      RaidNodeMetrics rnm = RaidNodeMetrics
        .getInstance(RaidNodeMetrics.DEFAULT_NAMESPACE_ID);


      synchronized (counterMapLock) {
        rnm.numFilesWithMissingBlks.set(this.filesWithMissingBlksCnt);
        long[] numStrpWithMissingBlksRS = this.numStrpWithMissingBlksMap
          .get("rs");


        if (numStrpWithMissingBlksRS != null) {
          rnm.numStrpsOneMissingBlk.set(numStrpWithMissingBlksRS[0]);
          rnm.numStrpsTwoMissingBlk.set(numStrpWithMissingBlksRS[1]);
          rnm.numStrpsThreeMissingBlk.set(numStrpWithMissingBlksRS[2]);
          rnm.numStrpsFourMissingBlk.set(numStrpWithMissingBlksRS[3]);


          long tmp_sum = 0;
          for (int idx = 4; idx < numStrpWithMissingBlksRS.length; idx++) {
            tmp_sum += numStrpWithMissingBlksRS[idx];
          }
          rnm.numStrpsFiveMoreMissingBlk.set(tmp_sum);
        }
      }
    }


    public String getMissingBlksHtmlTable() {
      synchronized (counterMapLock) {
        return RaidUtils.getMissingBlksHtmlTable(
            this.numNonRaidedMissingBlocks, this.numStrpWithMissingBlksMap);
      }
    }
  }
  
  /**
   * Get the lost blocks numbers per stripe in the source file.
   */
  private Map<Integer, Integer> getLostStripes(
              Configuration conf, FileStatus stat, FileSystem fs) 
                  throws IOException {
    Map<Integer, Integer> lostStripes = new HashMap<Integer, Integer>();
    RaidInfo raidInfo = RaidUtils.getFileRaidInfo(stat, conf);
    if (raidInfo.codec == null) {
      // Can not find the parity file, the file is not raided.
      return lostStripes;
    }
    Codec codec = raidInfo.codec;
    
    if (codec.isDirRaid) {
      RaidUtils.collectDirectoryCorruptBlocksInStripe(conf, 
          (DistributedFileSystem)fs, raidInfo, 
          stat, lostStripes);
    } else {
      RaidUtils.collectFileCorruptBlocksInStripe((DistributedFileSystem)fs, 
          raidInfo, stat, lostStripes);
    }
    return lostStripes;
  }
  
  public class CorruptFile {
    public String path;
    public long detectTime;
    public volatile int numCorrupt;
    public volatile CorruptFileStatus fileStatus;
    public volatile long lastSubmitTime;
    public CorruptFile(String newPath, int newNumCorrupt, long newDetectTime) {
      this.path = newPath;
      this.numCorrupt = newNumCorrupt;
      this.fileStatus = CorruptFileStatus.POTENTIALLY_CORRUPT;
      this.lastSubmitTime = System.currentTimeMillis();
      this.detectTime = newDetectTime;
    }
    
    public String toString() {
      return fileStatus.name();
    }
  }
  
  public class MonitorSet {
    public ConcurrentHashMap<String, CorruptFile> toScanFiles;
    public ExecutorService executor;
    public BlockingQueue<Runnable> scanningQueue;
    public MonitorSet(final String monitorDir) {
      this.scanningQueue = new LinkedBlockingQueue<Runnable>();
      ThreadFactory factory = new ThreadFactory() {
        final AtomicInteger numThreads = new AtomicInteger();
        public Thread newThread(Runnable r) {
          Thread t = new Thread(r);
          t.setName("BlockFix-Scanner-" + monitorDir + "-" + 
            numThreads.getAndIncrement());
          return t;
        }
      };
      this.executor = new ThreadPoolExecutor(blockFixerScanThreads,
          blockFixerScanThreads, 0L, TimeUnit.MILLISECONDS, scanningQueue,
          factory);
      this.toScanFiles = new ConcurrentHashMap<String, CorruptFile>();
    }
  }


  public class CorruptionWorker extends Worker {
    public static final String RAIDNODE_JOB_SUBMIT_NUM_THREADS_KEY = 
        "raid.job.submit.num.threads";
    public static final int RAIDNODE_JOB_SUBMIT_NUM_THREADS_DEFAULT = 5;
    public String[] corruptMonitorDirs = null;
    public HashMap<String, MonitorSet> monitorSets;
    public final String OTHERS = "others";
    public HashMap<Priority, HashSet<String>> jobFilesMap;
    public HashMap<Priority, AtomicLong> lastCheckingTimes;
    public AtomicLong numFilesSubmitted = new AtomicLong(0);
    public AtomicLong totalFilesToSubmit = new AtomicLong(0);
    private long blockFixSubmissionInterval =
        DEFAULT_BLOCK_FIX_SUBMISSION_INTERVAL;
    private long blockFixScanSubmissionInterval = 
        DEFAULT_BLOCK_FIX_SCAN_SUBMISSION_INTERVAL;
    private int maxNumDetectionTime;
    // Collection of recent X samples;
    private long[] detectionTimeCollection;
    private int currPos;
    private long totalDetectionTime;
    private long totalCollecitonSize;
    private ExecutorService jobSubmitExecutor;
    private BlockingQueue<Runnable> jobSubmitQueue;
    private int jobSubmitThreads = RAIDNODE_JOB_SUBMIT_NUM_THREADS_DEFAULT;


    public CorruptionWorker() {
      super(LogFactory.getLog(CorruptionWorker.class), 
          CorruptBlockReconstructor.class, 
          "blockfixer");
      blockFixerScanThreads = getConf().getInt(
          RAIDNODE_BLOCK_FIXER_SCAN_NUM_THREADS_KEY,
          DEFAULT_BLOCK_FIXER_SCAN_NUM_THREADS);
      this.blockFixSubmissionInterval = getConf().getLong(
          RAIDNODE_BLOCK_FIX_SUBMISSION_INTERVAL_KEY,
          DEFAULT_BLOCK_FIX_SUBMISSION_INTERVAL);
      this.blockFixScanSubmissionInterval = getConf().getLong(
          RAIDNODE_BLOCK_FIX_SCAN_SUBMISSION_INTERVAL_KEY, 
          DEFAULT_BLOCK_FIX_SCAN_SUBMISSION_INTERVAL);
      this.corruptMonitorDirs = DistBlockIntegrityMonitor.getCorruptMonitorDirs(
          getConf());
      this.monitorSets = new HashMap<String, MonitorSet>();
      for (String monitorDir : this.corruptMonitorDirs) {
        this.monitorSets.put(monitorDir, new MonitorSet(monitorDir));
      }
      this.monitorSets.put(OTHERS, new MonitorSet(OTHERS));
      this.jobFilesMap = new HashMap<Priority, HashSet<String>>();
      lastCheckingTimes = new HashMap<Priority, AtomicLong>();
      for (Priority priority: Priority.values()) {
        this.jobFilesMap.put(priority, new HashSet<String>());
        this.lastCheckingTimes.put(priority, new AtomicLong(System.currentTimeMillis()));
      }
      this.maxNumDetectionTime = getConf().getInt(
          RAIDNODE_MAX_NUM_DETECTION_TIME_COLLECTED_KEY, 
          DEFAULT_RAIDNODE_MAX_NUM_DETECTION_TIME_COLLECTED);
      detectionTimeCollection = new long[maxNumDetectionTime];
      this.totalCollecitonSize = 0;
      this.totalDetectionTime = 0;
      this.currPos = 0;
      this.jobSubmitThreads = getConf().getInt(RAIDNODE_JOB_SUBMIT_NUM_THREADS_KEY,
          RAIDNODE_JOB_SUBMIT_NUM_THREADS_DEFAULT);
      this.jobSubmitQueue = new LinkedBlockingQueue<Runnable>();
      ThreadFactory factory = new ThreadFactory() {
        final AtomicInteger numThreads = new AtomicInteger();
        public Thread newThread(Runnable r) {
          Thread t = new Thread(r);
          t.setName("BlockFix-Job-Submit-" + numThreads.getAndIncrement());
          return t;
        }
      };
      this.jobSubmitExecutor = new ThreadPoolExecutor(this.jobSubmitThreads,
          this.jobSubmitThreads, 0L, TimeUnit.MILLISECONDS, this.jobSubmitQueue,
          factory);
    }
    
    public void putDetectionTime(long detectionTime) {
      synchronized(detectionTimeCollection) {
        long oldVal = detectionTimeCollection[currPos]; 
        detectionTimeCollection[currPos] = detectionTime;
        totalDetectionTime += detectionTime - oldVal;
        currPos++;
        if (currPos == maxNumDetectionTime) {
          currPos = 0;
        }
        if (totalCollecitonSize < maxNumDetectionTime) {
          totalCollecitonSize++;
        }
      }
    }
    
    public double getNumDetectionsPerSec() {
      synchronized(detectionTimeCollection) {
        if (totalCollecitonSize == 0) {
          return 0;
        } else {
          return ((double)totalCollecitonSize)*1000/totalDetectionTime
              * blockFixerScanThreads;
        }
      }
    }


    @Override
    protected Map<String, Integer> getLostFiles(FileSystem fs) throws IOException {
      Map<String, Integer> lostFiles = new HashMap<String, Integer>();
      RemoteIterator<Path> cfb = fs.listCorruptFileBlocks(new Path("/"));
      while (cfb.hasNext()) {
        String lostFile = cfb.next().toString();
        Integer count = lostFiles.get(lostFile);
        if (count == null) {
          lostFiles.put(lostFile, 1);
        } else {
          lostFiles.put(lostFile, count+1);
        }
      }
      LOG.info("ListCorruptFileBlocks returned " + lostFiles.size() + " files");
      RaidUtils.filterTrash(getConf(), lostFiles.keySet().iterator());
      LOG.info("getLostFiles returning " + lostFiles.size() + " files");


      return lostFiles;
    }
    
    public void addToScanSet(String p, int numCorrupt, String monitorDir,
        ConcurrentHashMap<String, CorruptFile> newScanSet, FileSystem fs,
        long detectTime)
            throws IOException {
      CorruptFile cf = new CorruptFile(p, numCorrupt, detectTime);
      MonitorSet monitorSet = monitorSets.get(monitorDir);
      CorruptFile oldCf = monitorSet.toScanFiles.get(p);
      FileCheckRunnable fcr = new FileCheckRunnable(cf, monitorSet, fs,
          detectTime, this);
      if (oldCf == null) {
        newScanSet.put(p, cf);
        monitorSet.toScanFiles.put(p,  cf);
        // Check the file
        cf.lastSubmitTime = System.currentTimeMillis();
        monitorSet.executor.submit(fcr);
      } else {
        if (oldCf.numCorrupt == numCorrupt) {
          newScanSet.put(p, oldCf);
          if (System.currentTimeMillis() - oldCf.lastSubmitTime >
              this.blockFixScanSubmissionInterval) {
            // if a block hasn't been checked for a while, check it again.
            oldCf.lastSubmitTime = System.currentTimeMillis();
            monitorSet.executor.submit(fcr);
          }
        } else {
          cf.detectTime = oldCf.detectTime;
          newScanSet.put(p, cf);
          cf.lastSubmitTime = System.currentTimeMillis();
          monitorSet.executor.submit(fcr);  
        }
      }
    }
    
    /**
     * In JobSubmitRunnable, a mapreduce job to fix files under tmpJobFiles will
     * be created and submitted to mapreduce cluster. 
     * If it fails to do that, numFilesDropped will be updated and files under
     * tmpJobFiles will be move back to the original jobFiles so that they could
     * be fixed in the next job.
     */
    public class JobSubmitRunnable implements Runnable {
      private final Priority priority;
      private final HashSet<String> tmpJobFiles;
      private final HashSet<String> jobFiles;
      private final long detectTime;
      private final AtomicLong lastCheckingTime;
      private final UpdateNumFilesDropped type;
      public JobSubmitRunnable(Priority newPriority, HashSet<String> tmpJobFiles, 
          HashSet<String> originalJobFiles, long newDetectTime, 
          AtomicLong newLastCheckingTime, UpdateNumFilesDropped newType) {
        this.priority = newPriority;
        this.tmpJobFiles = tmpJobFiles;
        this.jobFiles = originalJobFiles;
        this.detectTime = newDetectTime;
        this.lastCheckingTime = newLastCheckingTime;
        this.type = newType;
      }
      
      public void run() {
        boolean succeed = false;
        try {
          succeed = startOneJob(CorruptionWorker.this, priority, tmpJobFiles, detectTime,
              numFilesSubmitted, lastCheckingTime, maxPendingJobs) != null;
        } catch (Throwable ex) {
          LOG.error("Get Error in blockSubmitRunnable", ex);
        } finally {
          if (!succeed) {
            if (type == UpdateNumFilesDropped.SET) {
              numFilesDropped.set(tmpJobFiles.size());
            } else if (type == UpdateNumFilesDropped.ADD) {
              numFilesDropped.addAndGet(tmpJobFiles.size());
            } else {
              LOG.error("Hit an unexpected type:" + type.name());
            }
            // add back to original job files
            synchronized(jobFiles) {
              this.jobFiles.addAll(tmpJobFiles);
            }
          }
        }
      }
    }
    
    // Return used time 
    public long addToJobFilesMap(
        HashMap<Priority, HashSet<String>> jobFilesMap,
        Priority priority, String path, long detectTime)
            throws IOException, InterruptedException, ClassNotFoundException {
      long startTime = System.currentTimeMillis();
      HashSet<String> jobFiles = jobFilesMap.get(priority);
      
      synchronized(jobFiles) {
        if (!jobFiles.add(path)) {
          return System.currentTimeMillis() - startTime;
        }
        totalFilesToSubmit.incrementAndGet();
        // Check if we have hit the threshold for number of files in a job.
      
        AtomicLong lastCheckingTime = lastCheckingTimes.get(priority);
        if ((jobFiles.size() >= filesPerTask * TASKS_PER_JOB)) {
          // Collect enough files
          this.asyncSubmitJob(jobFiles, priority, detectTime,
              UpdateNumFilesDropped.ADD);
        } else if (System.currentTimeMillis() - lastCheckingTime.get()
            > this.blockFixSubmissionInterval && jobFiles.size() > 0) {
          // Wait enough time
          this.asyncSubmitJob(jobFiles, priority, detectTime,
              UpdateNumFilesDropped.SET);
        }
      }
      return System.currentTimeMillis() - startTime;
    }
    
    @Override
    public void shutdown() {
      for (MonitorSet ms : monitorSets.values()) {
        ms.executor.shutdownNow();
      }
      this.jobSubmitExecutor.shutdownNow();
    }
    
    public Map<String, Map<CorruptFileStatus, Long>> getCounterMap() {
      TreeMap<String, Map<CorruptFileStatus, Long>> results = 
          new TreeMap<String, Map<CorruptFileStatus, Long>>();
      for (String monitorDir: monitorSets.keySet()) {
        MonitorSet ms = monitorSets.get(monitorDir);
        HashMap<CorruptFileStatus, Long> counters =
            new HashMap<CorruptFileStatus, Long>();
        for (CorruptFileStatus cfs: CorruptFileStatus.values()) {
          counters.put(cfs, 0L);
        }
        for (CorruptFile cf: ms.toScanFiles.values()) {
          Long counter = counters.get(cf.fileStatus);
          if (counter == null) {
            counter = 0L;
          }
          counters.put(cf.fileStatus, counter + 1);
        }
        results.put(monitorDir, counters);
      }
      return results;
    }
    
    public ArrayList<CorruptFile> getCorruptFileList(String monitorDir, 
        CorruptFileStatus cfs) { 
      ArrayList<CorruptFile> corruptFiles = new ArrayList<CorruptFile>();
      MonitorSet ms = monitorSets.get(monitorDir);
      if (ms == null) {
        return corruptFiles;
      }
      for (CorruptFile cf: ms.toScanFiles.values()) {
        if (cf.fileStatus == cfs) {
          corruptFiles.add(cf);
        }
      }
      return corruptFiles;
    }
    
    public Map<String, Map<CorruptFileStatus, Long>> getCorruptFilesCounterMap() {
      return this.getCounterMap();
    }
    
    public class FileCheckRunnable implements Runnable {
      CorruptFile corruptFile;
      MonitorSet monitorSet;
      FileSystem fs;
      CorruptionWorker worker;
      long detectTime;
      
      public FileCheckRunnable(CorruptFile newCorruptFile,
          MonitorSet newMonitorSet,
          FileSystem newFs, long newDetectTime, CorruptionWorker newWorker) {
        corruptFile = newCorruptFile;
        monitorSet = newMonitorSet;
        fs = newFs;
        detectTime = newDetectTime;
        worker = newWorker;
      }
      
      public void run() {
        long startTime = System.currentTimeMillis();
        try {
          if (corruptFile.numCorrupt <=0) {
            // Not corrupt
            return;
          }
          ConcurrentHashMap<String, CorruptFile> toScanFiles = 
              monitorSet.toScanFiles;
          // toScanFiles could be switched before the task get executed
          CorruptFile cf = toScanFiles.get(corruptFile.path);
          if (cf == null || cf.numCorrupt != corruptFile.numCorrupt) {
            // Not exist or doesn't match
            return;
          }
          FileStatus stat = null;
          try {
            stat = fs.getFileStatus(new Path(corruptFile.path));
          } catch (FileNotFoundException fnfe) {
            cf.fileStatus = CorruptFileStatus.NOT_EXIST;
            return;
          }
          Codec codec = BlockIntegrityMonitor.isParityFile(corruptFile.path);
          long addJobTime = 0;
          if (codec == null) {
            if (stat.getReplication() >= notRaidedReplication) {
              cf.fileStatus = CorruptFileStatus.NOT_RAIDED_UNRECOVERABLE;
              return;
            }
            if (BlockIntegrityMonitor.doesParityDirExist(fs, corruptFile.path)) {
              Priority priority = Priority.LOW;
              if (stat.getReplication() > 1) {
                // If we have a missing block when replication > 1, it is high pri.
                priority = Priority.HIGH;
              } else {
                // Replication == 1. Assume Reed Solomon parity exists.
                // If we have more than one missing block when replication == 1, then
                // high pri.
                priority = (corruptFile.numCorrupt > 1) ? Priority.HIGH : Priority.LOW;
              }
              LostFileInfo fileInfo = fileIndex.get(corruptFile.path);
              if (fileInfo == null || priority.higherThan(
                  fileInfo.getHighestPriority())) {
                addJobTime = addToJobFilesMap(jobFilesMap, priority,
                    corruptFile.path, detectTime);
              }
            }
          } else {
            // Dikang: for parity files, we use the total numbers for now.
            Priority priority = (corruptFile.numCorrupt > 1) ?
                Priority.HIGH : (codec.parityLength == 1)? Priority.HIGH: Priority.LOW;
            LostFileInfo fileInfo = fileIndex.get(corruptFile.path);
            if (fileInfo == null || priority.higherThan(
                fileInfo.getHighestPriority())) {
              addJobTime = addToJobFilesMap(jobFilesMap, priority, corruptFile.path,
                  detectTime);
            }
          }
          boolean isFileCorrupt = RaidShell.isFileCorrupt((DistributedFileSystem)fs,
              stat, false, getConf(), null, null);
          if (isFileCorrupt) {
            cf.fileStatus = CorruptFileStatus.RAID_UNRECOVERABLE;
          } else {
            cf.fileStatus = CorruptFileStatus.RECOVERABLE;
          }
          long elapseTime = System.currentTimeMillis() - startTime - addJobTime;
          worker.putDetectionTime(elapseTime);
        } catch (Exception e) {
          LOG.error("Get Exception ", e);
        }
      }
    }
    
    /**
     * Acquire a lock and dump files of jobFiles into a tmpJobFiles
     * Then it clears the jobFiles and submits a jobSubmitRunnable to the thread pool 
     * to submit a mapreduce job in the background. 
     * No need to wait for job submission to finish.
     */
    void asyncSubmitJob(HashSet<String> jobFiles, Priority pri,
        long detectTime, UpdateNumFilesDropped type) throws IOException {
      synchronized(jobFiles) {
        if (jobFiles.size() == 0)
          return;
        HashSet<String> tmpJobFiles = new HashSet<String>();
        tmpJobFiles.addAll(jobFiles);
        jobFiles.clear();
        JobSubmitRunnable jsr = new JobSubmitRunnable(pri, tmpJobFiles,
            jobFiles, detectTime, lastCheckingTimes.get(pri), type);
        this.jobSubmitExecutor.submit(jsr);
      }
    }


    @Override
    // Compute integer priority and start jobs. Urgency is indicated by higher numbers.
    void computePrioritiesAndStartJobs(
        FileSystem fs, Map<String, Integer> corruptFiles, long detectTime)
            throws IOException, InterruptedException, ClassNotFoundException {


      HashMap<String, ConcurrentHashMap<String, CorruptFile>>
          newToScanSet = new HashMap<String, ConcurrentHashMap<String,
          CorruptFile>>();
      // Include "others"
      for (String monitorDir: this.monitorSets.keySet()) {
        newToScanSet.put(monitorDir, new ConcurrentHashMap<String,
            CorruptFile>());
      }
      numFilesSubmitted.set(0);
      totalFilesToSubmit.set(0);
      for (Iterator<String> it = corruptFiles.keySet().iterator(); it.hasNext(); ) {
        String p = it.next();
        int numCorrupt = corruptFiles.get(p);
        // Filter through monitor dirs
        boolean match = false;
        for (String monitorDir: this.corruptMonitorDirs) {
          if (p.startsWith(monitorDir)) {
            match = true;
            addToScanSet(p, numCorrupt, monitorDir, newToScanSet.get(monitorDir),
                fs, detectTime);
          }
        }
        if (match == false) {
          addToScanSet(p, numCorrupt, OTHERS, newToScanSet.get(OTHERS), fs,
              detectTime);
        }
      }
      // switch to new toScanSet
      for (String monitorDir : this.monitorSets.keySet()) {
        MonitorSet ms = this.monitorSets.get(monitorDir);
        ms.toScanFiles = newToScanSet.get(monitorDir);
      }
      for (Priority pri : Priority.values()) {
        HashSet<String> jobFiles = jobFilesMap.get(pri);
        if (System.currentTimeMillis() - lastCheckingTimes.get(pri).get()
            > this.blockFixSubmissionInterval && jobFiles.size() > 0) {
          this.asyncSubmitJob(jobFiles, pri, detectTime,
              UpdateNumFilesDropped.SET);
        }
      }
    }


    @Override
    protected void updateRaidNodeMetrics() {
      RaidNodeMetrics rnm = RaidNodeMetrics.getInstance(
          RaidNodeMetrics.DEFAULT_NAMESPACE_ID); 
      
      rnm.corruptFilesHighPri.set(lastStatus.highPriorityFiles);
      rnm.corruptFilesLowPri.set(lastStatus.lowPriorityFiles);
      rnm.numFilesToFix.set(this.fileIndex.size());
      rnm.numFilesToFixDropped.set(this.numFilesDropped.get());
      
      // Flush statistics out to the RaidNode
      incrFilesFixed(this.recentNumFilesSucceeded.get());
      incrFileFixFailures(this.recentNumFilesFailed.get());
      incrNumBlockFixSimulationFailures(this.recentNumBlockFixSimulationFailed.get());
      incrNumBlockFixSimulationSuccess(this.recentNumBlockFixSimulationSucceeded.get());
      incrFileFixReadBytesRemoteRack(this.recentNumReadBytesRemoteRack.get());
      LogUtils.incrLogMetrics(this.recentLogMetrics);


      rnm.blockFixSlotSeconds.inc(this.recentSlotSeconds.get());
      this.recentNumFilesSucceeded.set(0);
      this.recentNumFilesFailed.set(0);
      this.recentSlotSeconds.set(0);
      this.recentNumBlockFixSimulationFailed.set(0);
      this.recentNumBlockFixSimulationSucceeded.set(0);
      this.recentNumReadBytesRemoteRack.set(0);
      this.recentLogMetrics.clear();
      
      Map<String, Map<CorruptFileStatus, Long>> corruptFilesCounterMap = 
          this.getCounterMap();
      if (rnm.corruptFiles == null) {
        return;
      }
      for (String dir: this.corruptMonitorDirs) {
        if (corruptFilesCounterMap.containsKey(dir) &&
            rnm.corruptFiles.containsKey(dir)) {
          Map<CorruptFileStatus, Long> maps = corruptFilesCounterMap.get(dir);
          Long raidUnrecoverable = maps.get(CorruptFileStatus.RAID_UNRECOVERABLE);
          Long notRaidUnrecoverable = maps.get(
              CorruptFileStatus.NOT_RAIDED_UNRECOVERABLE);
          if (raidUnrecoverable == null) {
            raidUnrecoverable = 0L;
          }
          if (notRaidUnrecoverable == null) {
            notRaidUnrecoverable = 0L;
          }
          rnm.corruptFiles.get(dir).set(raidUnrecoverable + notRaidUnrecoverable);
        } else {
          rnm.corruptFiles.get(dir).set(-1L);
        }
      }
    }
  }


  public class DecommissioningWorker extends Worker {


    DecommissioningWorker() {
      super(LogFactory.getLog(DecommissioningWorker.class), 
          BlockReconstructor.DecommissioningBlockReconstructor.class, 
          "blockcopier");
    }




    /**
     * gets a list of decommissioning files from the namenode
     * and filters out files that are currently being regenerated or
     * that were recently regenerated
     */
    @Override
    protected Map<String, Integer> getLostFiles(FileSystem fs) throws IOException {
      return DistBlockIntegrityMonitor.this.getLostFiles(LIST_DECOMMISSION_FILE_PATTERN,
          new String[]{"-list-corruptfileblocks",
          "-list-decommissioningblocks",
          "-limit",
          new Integer(lostFilesLimit).toString()});
    }


    @Override
    void computePrioritiesAndStartJobs(
        FileSystem fs, Map<String, Integer> decommissioningFiles, long detectTime)
            throws IOException, InterruptedException, ClassNotFoundException {


      Map<String, Priority> fileToPriority =
          new HashMap<String, Priority>(decommissioningFiles.size());


      for (String file : decommissioningFiles.keySet()) {


        // Replication == 1. Assume Reed Solomon parity exists.
        // Files with more than 4 blocks being decommissioned get a bump.
        // Otherwise, copying jobs have the lowest priority. 
        Priority priority = ((decommissioningFiles.get(file)
            > Codec.getCodec("rs").parityLength) ? 
            Priority.LOW : Priority.LOWEST);


        LostFileInfo fileInfo = fileIndex.get(file);
        if (fileInfo == null || priority.higherThan(fileInfo.getHighestPriority())) {
          fileToPriority.put(file, priority);
        }
      }
      LOG.info("Found " + fileToPriority.size() + " new lost files");


      startJobs(fileToPriority, detectTime);
    }


    @Override
    protected void updateRaidNodeMetrics() {
      RaidNodeMetrics.getInstance(RaidNodeMetrics.DEFAULT_NAMESPACE_ID).decomFilesLowPri.set(lastStatus.highPriorityFiles);
      RaidNodeMetrics.getInstance(RaidNodeMetrics.DEFAULT_NAMESPACE_ID).decomFilesLowestPri.set(lastStatus.lowPriorityFiles);
      RaidNodeMetrics.getInstance(RaidNodeMetrics.DEFAULT_NAMESPACE_ID).numFilesToCopy.set(fileIndex.size());


      incrFilesCopied(recentNumFilesSucceeded.get());
      incrFileCopyFailures(recentNumFilesFailed.get());
      incrNumBlockFixSimulationFailures(this.recentNumBlockFixSimulationFailed.get());
      incrNumBlockFixSimulationSuccess(this.recentNumBlockFixSimulationSucceeded.get());
      LogUtils.incrLogMetrics(this.recentLogMetrics);
      
      RaidNodeMetrics.getInstance(RaidNodeMetrics.DEFAULT_NAMESPACE_ID).blockCopySlotSeconds.inc(recentSlotSeconds.get());


      // Reset temporary values now that they've been flushed
      recentNumFilesSucceeded.set(0);
      recentNumFilesFailed.set(0);
      recentSlotSeconds.set(0);
      recentNumBlockFixSimulationFailed.set(0);
      recentNumBlockFixSimulationSucceeded.set(0);
      recentLogMetrics.clear();
    }


  }




  // ---- Methods which can be overridden by tests ----


  /**
   * Gets a list of lost files from the name node via DFSck
   * 
   * @param pattern A pattern matching a single file in DFSck's output
   * @param dfsckArgs Arguments to pass to DFSck
   * @return A map of lost files' filenames to num lost blocks for that file 
   */
  protected Map<String, Integer> getLostFiles(
      Pattern pattern, String[] dfsckArgs) throws IOException {


    Map<String, Integer> lostFiles = new HashMap<String, Integer>();
    BufferedReader reader = getLostFileReader(dfsckArgs);
    String line = reader.readLine(); // remove the header line
    while ((line = reader.readLine()) != null) {
      Matcher m = pattern.matcher(line);
      if (!m.find()) {
        continue;
      }
      
      String fileName = m.group(1).trim();
      Integer numLost = lostFiles.get(fileName);
      numLost = numLost == null ? 0 : numLost;
      numLost += 1;
      lostFiles.put(fileName, numLost);
    } 
    LOG.info("FSCK returned " + lostFiles.size() + " files with args " +
        Arrays.toString(dfsckArgs));
    RaidUtils.filterTrash(getConf(), lostFiles.keySet().iterator());
    LOG.info("getLostFiles returning " + lostFiles.size() + " files with args " +
        Arrays.toString(dfsckArgs));
    return lostFiles;
  }


  private BufferedReader getLostFileReader(String[] dfsckArgs) 
      throws IOException {


    ByteArrayOutputStream bout = new ByteArrayOutputStream();
    PrintStream ps = new PrintStream(bout, true);
    DFSck dfsck = new DFSck(getConf(), ps);
    try {
      dfsck.run(dfsckArgs);
    } catch (Exception e) {
      throw new IOException(e);
    }
    ByteArrayInputStream bin = new ByteArrayInputStream(bout.toByteArray());
    return new BufferedReader(new InputStreamReader(bin));
  }


  public void configureJob(Job job, 
      Class<? extends BlockReconstructor> reconstructorClass) {


    ((JobConf)job.getConfiguration()).setUser(RaidNode.JOBUSER);
    ((JobConf)job.getConfiguration()).setClass(
        ReconstructionMapper.RECONSTRUCTOR_CLASS_TAG, 
        reconstructorClass,
        BlockReconstructor.class);
  }


  void submitJob(Job job, List<String> filesInJob, Priority priority, 
      Map<Job, List<LostFileInfo>> jobIndex, Map<JobID, TrackingUrlInfo> 
      idToTrackingUrlMap) throws IOException, InterruptedException, 
          ClassNotFoundException {
    job.submit();
    LOG.info("Job " + job.getID() + "(" + job.getJobName() +
        ") started");
    jobIndex.put(job, null);
    idToTrackingUrlMap.put(job.getID(),
        new TrackingUrlInfo(job.getTrackingURL(), System.currentTimeMillis()));
  }


  /**
   * returns the number of map reduce jobs running
   */
  public int jobsRunning() {
    return (corruptionWorker.numJobsRunning.get() 
        + decommissioningWorker.numJobsRunning.get());
  }


  static class ReconstructionInputFormat
  extends SequenceFileInputFormat<LongWritable, Text> {


    protected static final Log LOG = 
        LogFactory.getLog(ReconstructionMapper.class);


    /**
     * splits the input files into tasks handled by a single node
     * we have to read the input files to do this based on a number of 
     * items in a sequence
     */
    @Override
    public List <InputSplit> getSplits(JobContext job) 
        throws IOException {
      long filesPerTask = DistBlockIntegrityMonitor.getFilesPerTask(job.getConfiguration());


      Path[] inPaths = getInputPaths(job);


      List<InputSplit> splits = new ArrayList<InputSplit>();


      long fileCounter = 0;


      for (Path inPath: inPaths) {


        FileSystem fs = inPath.getFileSystem(job.getConfiguration());      


        if (!fs.getFileStatus(inPath).isDir()) {
          throw new IOException(inPath.toString() + " is not a directory");
        }


        FileStatus[] inFiles = fs.listStatus(inPath);


        for (FileStatus inFileStatus: inFiles) {
          Path inFile = inFileStatus.getPath();


          if (!inFileStatus.isDir() &&
              (inFile.getName().equals(job.getJobName() + IN_FILE_SUFFIX))) {


            fileCounter++;
            SequenceFile.Reader inFileReader = 
                new SequenceFile.Reader(fs, inFile, job.getConfiguration());


            long startPos = inFileReader.getPosition();
            long counter = 0;


            // create an input split every filesPerTask items in the sequence
            LongWritable key = new LongWritable();
            Text value = new Text();
            try {
              while (inFileReader.next(key, value)) {
                if (counter % filesPerTask == filesPerTask - 1L) {
                  splits.add(new FileSplit(inFile, startPos, 
                      inFileReader.getPosition() - 
                      startPos,
                      null));
                  startPos = inFileReader.getPosition();
                }
                counter++;
              }


              // create input split for remaining items if necessary
              // this includes the case where no splits were created by the loop
              if (startPos != inFileReader.getPosition()) {
                splits.add(new FileSplit(inFile, startPos,
                    inFileReader.getPosition() - startPos,
                    null));
              }
            } finally {
              inFileReader.close();
            }
          }
        }
      }


      LOG.info("created " + splits.size() + " input splits from " +
          fileCounter + " files");


      return splits;
    }


    /**
     * indicates that input file can be split
     */
    @Override
    public boolean isSplitable (JobContext job, Path file) {
      return true;
    }
  }


  /**
   * Mapper for reconstructing stripes with lost blocks
   */
  static class ReconstructionMapper
  extends Mapper<LongWritable, Text, Text, Text> {


    protected static final Log LOG = 
        LogFactory.getLog(ReconstructionMapper.class);


    public static final String RECONSTRUCTOR_CLASS_TAG =
        "hdfs.blockintegrity.reconstructor";
    private BlockReconstructor reconstructor;
    public RaidProtocol raidnode;
    private UnixUserGroupInformation ugi;
    RaidProtocol rpcRaidnode;
    private long detectTimeInput;
    private String taskId;


    void initializeRpc(Configuration conf, InetSocketAddress address) throws IOException {
      try {
        this.ugi = UnixUserGroupInformation.login(conf, true);
      } catch (LoginException e) {
        throw (IOException)(new IOException().initCause(e));
      }


      this.rpcRaidnode = RaidShell.createRPCRaidnode(address, conf, ugi);
      this.raidnode = RaidShell.createRaidnode(rpcRaidnode);
    }


    @Override
    protected void setup(Context context) 
        throws IOException, InterruptedException {


      super.setup(context);


      Configuration conf = context.getConfiguration();
      taskId = conf.get("mapred.task.id");
      Codec.initializeCodecs(conf);
      initializeRpc(conf, RaidNode.getAddress(conf));


      Class<? extends BlockReconstructor> reconstructorClass = 
          context.getConfiguration().getClass(RECONSTRUCTOR_CLASS_TAG, 
                                            null, 
                                            BlockReconstructor.class);
      
      if (reconstructorClass == null) {
        LOG.error("No class supplied for reconstructor " +
                "(prop " + RECONSTRUCTOR_CLASS_TAG + ")");
        context.progress();
        return;
      }


      // We dynamically instantiate the helper based on the helperClass member
      try {
        Constructor<? extends BlockReconstructor> ctor =
            reconstructorClass.getConstructor(new Class[]{Configuration.class});


        reconstructor = ctor.newInstance(conf);


      } catch (Exception ex) {
        throw new IOException("Could not instantiate a block reconstructor " +
                          "based on class " + reconstructorClass, ex);
      }
      
      detectTimeInput = Long.parseLong(conf.get("corrupt_detect_time"));
    }
    
    @Override
    protected void cleanup(Context context) throws IOException,
        InterruptedException {
      RPC.stopProxy(rpcRaidnode);
    }


    /**
     * Reconstruct a stripe
     */
    @Override
    public void map(LongWritable key, Text fileText, Context context)
      throws IOException, InterruptedException {
      long sTime = System.currentTimeMillis();
      String fileStr = fileText.toString();
      Path file = new Path(fileStr);
      String prefix = "[" + fileStr + "]      ";
      LOG.info("");
      LOG.info(prefix + "============================= BEGIN =============================");
      LOG.info(prefix + "Reconstruct File: " + fileStr);
      LOG.info(prefix + "Block Missing Detection Time: " +
          dateFormat.format(detectTimeInput));
      long waitTime = sTime - detectTimeInput;
      LOG.info(prefix + "Scheduling Time: " + (waitTime/1000) + " seconds");
      FileSystem fs = file.getFileSystem(context.getConfiguration());
      LogUtils.logWaitTimeMetrics(waitTime, getMaxPendingJobs(
          context.getConfiguration()), 
          getFilesPerTask(context.getConfiguration()),
          LOGTYPES.FILE_FIX_WAITTIME,
          fs,
          context);
      long recoveryTime = -1;


      try {
        boolean reconstructed = reconstructor.reconstructFile(file, context);
        if (reconstructed) {
          recoveryTime = System.currentTimeMillis() - detectTimeInput;
          context.getCounter(RaidCounter.FILES_SUCCEEDED).increment(1L);
          LogUtils.logRaidReconstructionMetrics(LOGRESULTS.SUCCESS, 0, null,
              file, -1, LOGTYPES.OFFLINE_RECONSTRUCTION_FILE, 
              fs, null, context, recoveryTime); 
          LOG.info(prefix + "File Reconstruction Time: " + 
              ((System.currentTimeMillis() - sTime)/1000) + " seconds");
          LOG.info(prefix + "Total Recovery Time: " + 
              (recoveryTime/1000) + " seconds");
        } else {
          LOG.info(prefix + "File has already been fixed, No action");
          context.getCounter(RaidCounter.FILES_NOACTION).increment(1L);
        }
      } catch (Throwable e) {
        LOG.error(prefix + "Reconstructing file " + file + " failed", e);
        LogUtils.logRaidReconstructionMetrics(LOGRESULTS.FAILURE, 0, null,
            file, -1, LOGTYPES.OFFLINE_RECONSTRUCTION_FILE, 
            fs, e, context, -1);
        recoveryTime = Integer.MAX_VALUE;
        // report file as failed
        context.getCounter(RaidCounter.FILES_FAILED).increment(1L);
        String outkey = DistBlockIntegrityMonitor.FAILED_FILE + "," + fileStr;
        context.write(new Text(outkey), new Text(taskId));
      } finally {
        if (recoveryTime > 0) {
          // Send recoveryTime to raidnode
          try {
            raidnode.sendRecoveryTime(fileStr, recoveryTime, taskId);
          } catch (Exception e) {
            LOG.error(prefix + "Failed to send recovery time ", e);
          }
        }
        LOG.info(prefix + "============================= END =============================");
        LOG.info("");
      }
      context.progress();
    }
  }


  /**
   * Get the status of the entire block integrity monitor.
   * The status returned represents the aggregation of the statuses of all the 
   * integrity monitor's components.
   * 
   * @return The status of the block integrity monitor 
   */
  @Override
  public BlockIntegrityMonitor.Status getAggregateStatus() {
    Status fixer = corruptionWorker.getStatus();
    Status copier = decommissioningWorker.getStatus();


    List<JobStatus> jobs = new ArrayList<JobStatus>();
    List<JobStatus> simFailedJobs = new ArrayList<JobStatus>();
    List<JobStatus> failedJobs = new ArrayList<JobStatus>();
    List<String> highPriFileNames = new ArrayList<String>();
    int numHighPriFiles = 0;
    int numLowPriFiles = 0;
    int numLowestPriFiles = 0;
    if (fixer != null) {
      jobs.addAll(fixer.jobs);
      simFailedJobs.addAll(fixer.simFailJobs);
      failedJobs.addAll(fixer.failJobs);
      if (fixer.highPriorityFileNames != null) {
        highPriFileNames.addAll(fixer.highPriorityFileNames);
      }
      numHighPriFiles += fixer.highPriorityFiles;
      numLowPriFiles += fixer.lowPriorityFiles;
      numLowestPriFiles += fixer.lowestPriorityFiles;
    }
    if (copier != null) {
      jobs.addAll(copier.jobs);
      simFailedJobs.addAll(copier.simFailJobs);
      failedJobs.addAll(copier.failJobs);
      if (copier.highPriorityFileNames != null) {
        highPriFileNames.addAll(copier.highPriorityFileNames);
      }
      numHighPriFiles += copier.highPriorityFiles;
      numLowPriFiles += copier.lowPriorityFiles;
      numLowestPriFiles += copier.lowestPriorityFiles;
    }


    return new Status(numHighPriFiles, numLowPriFiles, numLowestPriFiles,
                      jobs, highPriFileNames,failedJobs, simFailedJobs);
  }
  
  public Worker getCorruptionMonitor() {
    return this.corruptionWorker;
  }


  @Override
  public Worker getDecommissioningMonitor() {
    return this.decommissioningWorker;
  }
  
  @Override
  public Runnable getCorruptFileCounter() {
    return this.corruptFileCounterWorker;
  }
}
Source Code of org.apache.hadoop.raid.DistBlockIntegrityMonitor$ReconstructionInputFormat

Related Classes of org.apache.hadoop.raid.DistBlockIntegrityMonitor$ReconstructionInputFormat