public boolean fetchOutputs() throws IOException {
int totalFailures = 0;
int totalCopyResultsReceived = 0;
int numInFlight = 0, numCopied = 0;
DecimalFormat mbpsFormat = new DecimalFormat("0.00");
final Progress copyPhase =
reduceTask.getProgress().phase();
LocalFSMerger localFSMergerThread = null;
InMemFSMergeThread inMemFSMergeThread = null;
GetMapEventsThread getMapEventsThread = null;
for (int i = 0; i < numMaps; i++) {
copyPhase.addPhase(); // add sub-phase per file
}
copiers = new ArrayList<MapOutputCopier>(numCopiers);
// start all the copying threads
for (int i=0; i < numCopiers; i++) {
MapOutputCopier copier = new MapOutputCopier(conf, reporter);
copiers.add(copier);
copier.start();
// register the copier to jmxThreadInfoTracker
jmxThreadInfoTracker.registerThreadToTask(
"REDUCE_COPY_TASK", copier.getId());
}
//start the on-disk-merge thread
localFSMergerThread = new LocalFSMerger((LocalFileSystem)localFileSys);
//start the in memory merger thread
inMemFSMergeThread = new InMemFSMergeThread();
localFSMergerThread.start();
inMemFSMergeThread.start();
jmxThreadInfoTracker.registerThreadToTask(
"REDUCE_COPY_TASK", localFSMergerThread.getId());
jmxThreadInfoTracker.registerThreadToTask(
"REDUCE_COPY_TASK", inMemFSMergeThread.getId());
// start the map events thread
getMapEventsThread = new GetMapEventsThread();
getMapEventsThread.start();
jmxThreadInfoTracker.registerThreadToTask(
"REDUCE_COPY_TASK", getMapEventsThread.getId());
// start the clock for bandwidth measurement
long startTime = System.currentTimeMillis();
long currentTime = startTime;
long lastProgressTime = startTime;
long lastOutputTime = 0;
// loop until we get all required outputs
while (getNumMapsCopyCompleted() < numMaps && mergeThrowable == null) {
currentTime = System.currentTimeMillis();
boolean logNow = false;
if (currentTime - lastOutputTime > MIN_LOG_TIME) {
lastOutputTime = currentTime;
logNow = true;
}
if (logNow) {
LOG.info(
reduceTask.getTaskID() + " Need another " +
(numMaps - getNumMapsCopyCompleted()) + " map output(s) " +
", total " + numMaps + " where " + numInFlight + " is " +
"already in progress");
}
// Put the hash entries for the failed fetches.
Iterator<MapOutputLocation> locItr = retryFetches.iterator();
while (locItr.hasNext()) {
MapOutputLocation loc = locItr.next();
loc.reset();
List<MapOutputLocation> locList =
mapLocations.get(loc.getHost());
// Check if the list exists. Map output location mapping is cleared
// once the jobtracker restarts and is rebuilt from scratch.
// Note that map-output-location mapping will be recreated and hence
// we continue with the hope that we might find some locations
// from the rebuild map.
if (locList != null) {
// Add to the beginning of the list so that this map is
//tried again before the others and we can hasten the
//re-execution of this map should there be a problem
locList.add(0, loc);
}
}
if (retryFetches.size() > 0) {
LOG.info(reduceTask.getTaskID() + ": " +
"Got " + retryFetches.size() +
" map-outputs from previous failures");
}
// clear the "failed" fetches hashmap
retryFetches.clear();
// now walk through the cache and schedule what we can
int numScheduled = 0;
int numHostDups = 0;
int numDups = 0;
synchronized (scheduledCopies) {
// Map of http host to list of output locations (http is unique,
// even if there are multiple task trackers on the same machine
Map<String, List<MapOutputLocation>> chosenLocationMap =
new HashMap<String, List<MapOutputLocation>>();
// Randomize the map output locations to prevent
// all reduce-tasks swamping the same tasktracker
List<String> hostList = new ArrayList<String>();
hostList.addAll(mapLocations.keySet());
Iterator<String> hostsItr = hostList.iterator();
while (hostsItr.hasNext()) {
String host = hostsItr.next();
LOG.debug("fetchOutputs: Looking at host " + host + ", " +
"total " + mapLocations.keySet().size());
List<MapOutputLocation> knownOutputsByLoc =
mapLocations.get(host);
// Check if the list exists. Map output location mapping is
// cleared once the jobtracker restarts and is rebuilt from
// scratch.
// Note that map-output-location mapping will be recreated and
// hence we continue with the hope that we might find some
// locations from the rebuild map and add then for fetching.
if (knownOutputsByLoc == null || knownOutputsByLoc.size() == 0) {
continue;
}
//Identify duplicate hosts here
if (uniqueHosts.contains(host)) {
LOG.debug("fetchOutputs: Duplicate " + host +
", numDups= " + numDups);
numDups += knownOutputsByLoc.size();
++numHostDups;
continue;
}
Long penaltyEnd = penaltyBox.get(host);
boolean penalized = false;
if (penaltyEnd != null) {
if (currentTime < penaltyEnd.longValue()) {
penalized = true;
} else {
penaltyBox.remove(host);
}
}
if (penalized)
continue;
synchronized (knownOutputsByLoc) {
locItr = knownOutputsByLoc.iterator();
while (locItr.hasNext()) {
MapOutputLocation loc = locItr.next();
// Do not schedule fetches from OBSOLETE maps
if (obsoleteMapIds.contains(loc.getTaskAttemptId())) {
locItr.remove();
continue;
}
uniqueHosts.add(host);
List<MapOutputLocation> locationList =
chosenLocationMap.get(loc.getHttpTaskTracker());
if (locationList == null) {
locationList = new ArrayList<MapOutputLocation>();
chosenLocationMap.put(loc.getHttpTaskTracker(),
locationList);
}
locationList.add(loc);
LOG.info("fetchOutputs: Scheduling location " + loc);
locItr.remove(); // remove from knownOutputs
numInFlight++; numScheduled++;
//
// Comment out this break allows fetching all the shards at
// once from a host, instead of fetching one at a time.
// See MAPREDUCE-318.
//
// break; //we have a map from this host
//
}
}
// Add the HostMapOutputLocations to scheduled copies in chunks
// of maxMapOutputsPerFetch
List<HostMapOutputLocations> tmpScheduledCopies =
new ArrayList<HostMapOutputLocations>();
for (Map.Entry<String, List<MapOutputLocation>> entry :
chosenLocationMap.entrySet()) {
final List<MapOutputLocation> outputList = entry.getValue();
int remaining = outputList.size();
int index = 0;
while (remaining >= maxMapOutputsPerFetch) {
tmpScheduledCopies.add(
new HostMapOutputLocations(entry.getKey(),
new ArrayList<MapOutputLocation>(
outputList.subList(
index, index + maxMapOutputsPerFetch))));
index += maxMapOutputsPerFetch;
remaining -= maxMapOutputsPerFetch;
}
if (remaining > 0) {
tmpScheduledCopies.add(
new HostMapOutputLocations(entry.getKey(),
new ArrayList<MapOutputLocation>(
outputList.subList(index, index + remaining))));
}
}
chosenLocationMap.clear();
Collections.shuffle(tmpScheduledCopies, this.random);
scheduledCopies.addAll(tmpScheduledCopies);
}
scheduledCopies.notifyAll();
}
if (numScheduled > 0 || logNow) {
LOG.info(reduceTask.getTaskID() + " Scheduled " + numScheduled +
" outputs (" + penaltyBox.size() +
" slow hosts and " + numDups + " dup because hosts " +
"dup hosts " + numHostDups + ")");
}
if (penaltyBox.size() > 0 && logNow) {
LOG.info("Penalized(slow) Hosts: ");
for (String host : penaltyBox.keySet()) {
LOG.info(host + " Will be considered after: " +
((penaltyBox.get(host) - currentTime)/1000) + " seconds.");
}
}
// if we have no copies in flight and we can't schedule anything
// new, just wait for a bit
try {
if (numInFlight == 0 && numScheduled == 0) {
// we should indicate progress as we don't want TT to think
// we're stuck and kill us
reporter.progress();
synchronized (mapLocations) {
mapLocations.wait(5000);
}
}
} catch (InterruptedException e) { } // IGNORE
while (numInFlight > 0 && mergeThrowable == null) {
LOG.debug(reduceTask.getTaskID() + " numInFlight = " +
numInFlight);
//the call to getCopyResult will either
//1) return immediately with a null or a valid CopyResult object,
// or
//2) if the numInFlight is above maxInFlight, return with a
// CopyResult object after getting a notification from a
// fetcher thread,
//So, when getCopyResult returns null, we can be sure that
//we aren't busy enough and we should go and get more mapcompletion
//events from the tasktracker
CopyResult cr = getCopyResult(numInFlight);
if (cr == null) {
break;
}
LOG.info("Got new copy result - " + (++totalCopyResultsReceived)
+ " " + cr);
if (cr.getSuccess()) { // a successful copy
numCopied++;
lastProgressTime = System.currentTimeMillis();
reduceShuffleBytes.increment(cr.getSize());
long secsSinceStart =
(System.currentTimeMillis()-startTime)/1000+1;
float mbs = ((float)reduceShuffleBytes.getCounter())/(1024*1024);
float transferRate = mbs/secsSinceStart;
copyPhase.startNextPhase();
copyPhase.setStatus("copy (" + numCopied + " of " + numMaps
+ " at " +
mbpsFormat.format(transferRate) + " MB/s)");
// Note successful fetch for this mapId to invalidate
// (possibly) old fetch-failures