// adequate memory to keep all segments in memory.
Path outputPath = mapOutputFile.getInputFileForWrite(
srcTaskIdentifier.getInputIdentifier().getInputIndex(),
mergeOutputSize).suffix(Constants.MERGED_OUTPUT_PREFIX);
Writer writer = null;
try {
writer =
new Writer(conf, rfs, outputPath,
(Class)ConfigUtils.getIntermediateInputKeyClass(conf),
(Class)ConfigUtils.getIntermediateInputValueClass(conf),
codec, null, null);
TezRawKeyValueIterator rIter = null;
LOG.info("Initiating in-memory merge with " + noInMemorySegments +
" segments...");
// Nothing actually materialized to disk - controlled by setting sort-factor to #segments.
rIter = TezMerger.merge(conf, rfs,
(Class)ConfigUtils.getIntermediateInputKeyClass(conf),
(Class)ConfigUtils.getIntermediateInputValueClass(conf),
inMemorySegments, inMemorySegments.size(),
new Path(inputContext.getUniqueIdentifier()),
(RawComparator)ConfigUtils.getIntermediateInputKeyComparator(conf),
nullProgressable, spilledRecordsCounter, null, additionalBytesRead, null);
// spilledRecordsCounter is tracking the number of keys that will be
// read from each of the segments being merged - which is essentially
// what will be written to disk.
if (null == combiner) {
TezMerger.writeFile(rIter, writer, nullProgressable, TezJobConfig.DEFAULT_RECORDS_BEFORE_PROGRESS);
} else {
// TODO Counters for Combine
runCombineProcessor(rIter, writer);
}
writer.close();
additionalBytesWritten.increment(writer.getCompressedLength());
writer = null;
LOG.info(inputContext.getUniqueIdentifier() +
" Merge of the " + noInMemorySegments +
" files in-memory complete." +
" Local file is " + outputPath + " of size " +
localFS.getFileStatus(outputPath).getLen());
} catch (IOException e) {
//make sure that we delete the ondisk file that we created
//earlier when we invoked cloneFileAttributes
localFS.delete(outputPath, true);
throw e;
} finally {
if (writer != null) {
writer.close();
}
}
// Note the output of the merge
closeOnDiskFile(outputPath);