final long minSize = getMinSplitSize(conf);
final long EPSILON = (long) (minSize * (SPLIT_SLOP - 1));
long goalSize = totalBytes / n;
long batchSize = 0;
BlockDistribution bd = new BlockDistribution();;
RawComparable prevKey = null;
long minStepSize = -1;
FSDataInputStream nextFsdis = null;
TFile.Reader nextReader = null;
for (int i = 0; i < paths.length; ++i) {
FileStatus fstatus = tfileStatus[i];
long blkSize = fstatus.getBlockSize();
long fileLen = fstatus.getLen();
long stepSize = Math.max(minSize,
(goalSize < blkSize) ? goalSize : blkSize);
if (minStepSize== -1 || minStepSize > stepSize)
minStepSize = stepSize;
// adjust the block size by the scaling factor
blkSize /= nTables;
stepSize = Math.max(minSize,
(goalSize < blkSize) ? goalSize : blkSize);
FSDataInputStream fsdis = null;
TFile.Reader reader = null;
long remainLen = fileLen;
try {
if (nextReader == null)
{
fsdis = fs.open(paths[i]);
reader = new TFile.Reader(fsdis, fileLen, conf);
} else {
fsdis = nextFsdis;
reader = nextReader;
}
BlockLocation[] locations =
fs.getFileBlockLocations(fstatus, 0, fileLen);
if (locations.length == 0) {
throw new AssertionError(
"getFileBlockLocations returns 0 location");
}
Arrays.sort(locations, new Comparator<BlockLocation>() {
@Override
public int compare(BlockLocation o1, BlockLocation o2) {
long diff = o1.getOffset() - o2.getOffset();
if (diff < 0) return -1;
if (diff > 0) return 1;
return 0;
}
});
long[] startOffsets = new long[locations.length];
for (int ii = 0; ii < locations.length; ii++)
startOffsets[ii] = locations[ii].getOffset();
boolean done = false;
while ((remainLen > 0) && !done) {
long splitBytes =
remainLen > stepSize ? stepSize : remainLen;
long offsetBegin = fileLen - remainLen;
long offsetEnd = offsetBegin + splitBytes;
int indexBegin = getStartBlockIndex(startOffsets, offsetBegin);
int indexEnd = getEndBlockIndex(startOffsets, offsetEnd);
BlockLocation firstBlock = locations[indexBegin];
BlockLocation lastBlock = locations[indexEnd-1];
long lastBlockOffsetBegin = lastBlock.getOffset();
long lastBlockOffsetEnd =
lastBlockOffsetBegin + lastBlock.getLength();
if ((firstBlock.getOffset() > offsetBegin)
|| (lastBlockOffsetEnd < offsetEnd)) {
throw new AssertionError(
"Block locations returned by getFileBlockLocations do not cover requested range");
}
// Adjust offsets
if ((offsetEnd > lastBlockOffsetBegin)
&& (offsetEnd - lastBlockOffsetBegin < EPSILON)) {
// the split includes a bit of the next block, remove it.
if (offsetEnd != fileLen)
{
// only if this is not the last chunk
offsetEnd = lastBlockOffsetBegin;
splitBytes = offsetEnd - offsetBegin;
indexEnd--;
}
}
else if ((lastBlockOffsetEnd > offsetEnd)
&& (lastBlockOffsetEnd - offsetEnd < EPSILON)) {
// the split includes almost the whole block, fill it.
offsetEnd = lastBlockOffsetEnd;
splitBytes = offsetEnd - offsetBegin;
}
RawComparable key = reader.getKeyNear(offsetEnd);
if (key == null) {
offsetEnd = fileLen;
splitBytes = offsetEnd - offsetBegin;
if (i < paths.length-1)
{