new HashMap<String, List<MapOutputLocation>>();
int totalFailures = 0;
int numInFlight = 0, numCopied = 0;
long bytesTransferred = 0;
DecimalFormat mbpsFormat = new DecimalFormat("0.00");
final Progress copyPhase =
reduceTask.getProgress().phase();
LocalFSMerger localFSMergerThread = null;
InMemFSMergeThread inMemFSMergeThread = null;
for (int i = 0; i < numMaps; i++) {
copyPhase.addPhase(); // add sub-phase per file
}
copiers = new ArrayList<MapOutputCopier>(numCopiers);
Reporter reporter = getReporter(umbilical);
// start all the copying threads
for (int i=0; i < numCopiers; i++) {
MapOutputCopier copier = new MapOutputCopier(conf, reporter);
copiers.add(copier);
copier.start();
}
//start the on-disk-merge thread
localFSMergerThread = new LocalFSMerger((LocalFileSystem)localFileSys);
//start the in memory merger thread
inMemFSMergeThread = new InMemFSMergeThread();
localFSMergerThread.start();
inMemFSMergeThread.start();
// start the clock for bandwidth measurement
long startTime = System.currentTimeMillis();
long currentTime = startTime;
long lastProgressTime = startTime;
long lastOutputTime = 0;
IntWritable fromEventId = new IntWritable(0);
//List of unique hosts containing map outputs
List<String> hostList = new ArrayList<String>();
// loop until we get all required outputs
while (copiedMapOutputs.size() < numMaps && mergeThrowable == null) {
currentTime = System.currentTimeMillis();
boolean logNow = false;
if (currentTime - lastOutputTime > MIN_LOG_TIME) {
lastOutputTime = currentTime;
logNow = true;
}
if (logNow) {
LOG.info(reduceTask.getTaskID() + " Need another "
+ (numMaps - copiedMapOutputs.size()) + " map output(s) "
+ "where " + numInFlight + " is already in progress");
}
try {
// Put the hash entries for the failed fetches.
Iterator<MapOutputLocation> locItr = retryFetches.iterator();
while (locItr.hasNext()) {
MapOutputLocation loc = locItr.next();
List<MapOutputLocation> locList =
mapLocations.get(loc.getHost());
if (locList == null) {
locList = new LinkedList<MapOutputLocation>();
mapLocations.put(loc.getHost(), locList);
hostList.add(loc.getHost());
}
//Add to the beginning of the list so that this map is
//tried again before the others and we can hasten the
//re-execution of this map should there be a problem
locList.add(0, loc);
}
// The call getMapCompletionEvents will update fromEventId to
// used for the next call to getMapCompletionEvents
int currentNumObsoleteMapIds = obsoleteMapIds.size();
int numNewOutputs = getMapCompletionEvents(fromEventId,
mapLocations,
hostList);
if (numNewOutputs > 0 || logNow) {
LOG.info(reduceTask.getTaskID() + ": " +
"Got " + numNewOutputs +
" new map-outputs");
}
int numNewObsoleteMaps = obsoleteMapIds.size()-currentNumObsoleteMapIds;
if (numNewObsoleteMaps > 0) {
LOG.info(reduceTask.getTaskID() + ": " +
"Got " + numNewObsoleteMaps +
" obsolete map-outputs from tasktracker ");
}
if (retryFetches.size() > 0) {
LOG.info(reduceTask.getTaskID() + ": " +
"Got " + retryFetches.size() +
" map-outputs from previous failures");
}
// clear the "failed" fetches hashmap
retryFetches.clear();
}
catch (IOException ie) {
LOG.warn(reduceTask.getTaskID() +
" Problem locating map outputs: " +
StringUtils.stringifyException(ie));
}
// now walk through the cache and schedule what we can
int numScheduled = 0;
int numDups = 0;
synchronized (scheduledCopies) {
// Randomize the map output locations to prevent
// all reduce-tasks swamping the same tasktracker
Collections.shuffle(hostList, this.random);
Iterator<String> hostsItr = hostList.iterator();
while (hostsItr.hasNext()) {
String host = hostsItr.next();
List<MapOutputLocation> knownOutputsByLoc =
mapLocations.get(host);
//Identify duplicate hosts here
if (uniqueHosts.contains(host)) {
numDups += knownOutputsByLoc.size() -1;
continue;
}
Long penaltyEnd = penaltyBox.get(host);
boolean penalized = false;
if (penaltyEnd != null) {
if (currentTime < penaltyEnd.longValue()) {
penalized = true;
} else {
penaltyBox.remove(host);
}
}
if (penalized)
continue;
Iterator<MapOutputLocation> locItr =
knownOutputsByLoc.iterator();
while (locItr.hasNext()) {
MapOutputLocation loc = locItr.next();
// Do not schedule fetches from OBSOLETE maps
if (obsoleteMapIds.contains(loc.getTaskAttemptId())) {
locItr.remove();
continue;
}
uniqueHosts.add(host);
scheduledCopies.add(loc);
locItr.remove(); // remove from knownOutputs
numInFlight++; numScheduled++;
break; //we have a map from this host
}
if (knownOutputsByLoc.size() == 0) {
mapLocations.remove(host);
hostsItr.remove();
}
}
scheduledCopies.notifyAll();
}
if (numScheduled > 0 || logNow) {
LOG.info(reduceTask.getTaskID() + " Scheduled " + numScheduled +
" outputs (" + penaltyBox.size() +
" slow hosts and" + numDups + " dup hosts)");
}
if (penaltyBox.size() > 0 && logNow) {
LOG.info("Penalized(slow) Hosts: ");
for (String host : penaltyBox.keySet()) {
LOG.info(host + " Will be considered after: " +
((penaltyBox.get(host) - currentTime)/1000) + " seconds.");
}
}
// if we have no copies in flight and we can't schedule anything
// new, just wait for a bit
try {
if (numInFlight == 0 && numScheduled == 0) {
// we should indicate progress as we don't want TT to think
// we're stuck and kill us
reporter.progress();
Thread.sleep(5000);
}
} catch (InterruptedException e) { } // IGNORE
while (numInFlight > 0 && mergeThrowable == null) {
LOG.debug(reduceTask.getTaskID() + " numInFlight = " +
numInFlight);
//the call to getCopyResult will either
//1) return immediately with a null or a valid CopyResult object,
// or
//2) if the numInFlight is above maxInFlight, return with a
// CopyResult object after getting a notification from a
// fetcher thread,
//So, when getCopyResult returns null, we can be sure that
//we aren't busy enough and we should go and get more mapcompletion
//events from the tasktracker
CopyResult cr = getCopyResult(numInFlight);
if (cr == null) {
break;
}
if (cr.getSuccess()) { // a successful copy
numCopied++;
lastProgressTime = System.currentTimeMillis();
bytesTransferred += cr.getSize();
long secsSinceStart =
(System.currentTimeMillis()-startTime)/1000+1;
float mbs = ((float)bytesTransferred)/(1024*1024);
float transferRate = mbs/secsSinceStart;
copyPhase.startNextPhase();
copyPhase.setStatus("copy (" + numCopied + " of " + numMaps
+ " at " +
mbpsFormat.format(transferRate) + " MB/s)");
// Note successfull fetch for this mapId to invalidate
// (possibly) old fetch-failures