if (numSpills == 0) {
//create dummy files
for (int i = 0; i < partitions; i++) {
segmentStart = finalOut.getPos();
Writer writer = SequenceFile.createWriter(job, finalOut,
job.getMapOutputKeyClass(),
job.getMapOutputValueClass(),
compressionType, codec);
finalIndexOut.writeLong(segmentStart);
finalIndexOut.writeLong(finalOut.getPos() - segmentStart);
writer.close();
}
finalOut.close();
finalIndexOut.close();
return;
}
{
//create a sorter object as we need access to the SegmentDescriptor
//class and merge methods
Sorter sorter = new Sorter(localFs, job.getOutputKeyComparator(), valClass, job);
sorter.setProgressable(reporter);
for (int parts = 0; parts < partitions; parts++){
List<SegmentDescriptor> segmentList =
new ArrayList<SegmentDescriptor>(numSpills);
for(int i = 0; i < numSpills; i++) {
FSDataInputStream indexIn = localFs.open(indexFileName[i]);
indexIn.seek(parts * 16);
long segmentOffset = indexIn.readLong();
long segmentLength = indexIn.readLong();
indexIn.close();
SegmentDescriptor s = sorter.new SegmentDescriptor(segmentOffset,
segmentLength, filename[i]);
s.preserveInput(true);
s.doSync();
segmentList.add(i, s);
}
segmentStart = finalOut.getPos();
RawKeyValueIterator kvIter = sorter.merge(segmentList, new Path(getTaskId()));
SequenceFile.Writer writer = SequenceFile.createWriter(job, finalOut,
job.getMapOutputKeyClass(), job.getMapOutputValueClass(),
compressionType, codec);
sorter.writeFile(kvIter, writer);
//close the file - required esp. for block compression to ensure
//partition data don't span partition boundaries
writer.close();
//when we write the offset/length to the final index file, we write
//longs for both. This helps us to reliably seek directly to the
//offset/length for a partition when we start serving the byte-ranges
//to the reduces. We probably waste some space in the file by doing
//this as opposed to writing VLong but it helps us later on.