Path file = fileSplit.getPath();
fileSize = file.getFileSystem(context.getConfiguration())
.getFileStatus(file).getLen();
LzoIndex lzoBlockIndex = LzoIndex.readIndex(
file.getFileSystem(context.getConfiguration()), file);
if (lzoBlockIndex.isEmpty()) {
throw new IOException("No LZO index file exists for the input file "
+ file.toString() + " cannot index the input file");
}
int num_lzo_blocks = lzoBlockIndex.getNumberOfBlocks();
lastLZOBlockStartOffset = lzoBlockIndex.getPosition(num_lzo_blocks - 1);
LOG.info(context.getTaskAttemptID() + " splitStart= " + splitStart
+ " splitEnd=" + splitEnd + " splitLength=" + splitLength);
LOG.info(context.getTaskAttemptID() + ":total LZOblocks in this file: "
+ num_lzo_blocks);
// first loop to get the range of block offsets in lzoBlockIndex this mapper
// is responsible for;
int startPos = 0;
int endPos = 0;
boolean foundStartPos = false;
boolean foundEndPos = false;
for (int i = 0; i < num_lzo_blocks; i++) {
long currentBlockOffset = lzoBlockIndex.getPosition(i);
if (currentBlockOffset >= splitStart) {
if (!foundStartPos) {
startPos = i;
foundStartPos = true;
}
}
if (currentBlockOffset >= splitEnd) {
if (!foundEndPos) {
endPos = i;
foundEndPos = true;
}
}
if (foundStartPos && foundEndPos)
break;
}
if (!foundEndPos) {
endPos = num_lzo_blocks - 1;
totalLZOBlocks = endPos - startPos + 1;
// the last split, we need to copy from startPos to the end and additional
// add the end of the file to the array lzoBlockOffset
} else {
if (endPos < num_lzo_blocks - 1)
endPos++;
if (endPos == num_lzo_blocks - 1) // treat as if it's the last split;
{
totalLZOBlocks = endPos - startPos + 1;
foundEndPos = false;
} else
totalLZOBlocks = endPos - startPos;
}
// special treatment for the first lzoblock offset, due to the current
// lzoindex implementation, we have to
// use 0 for the first lzo block in any lzo compressed file though in fact
// the actual start offset to the first lzoblock is not 0.
// later we may consider to change the lzo related package to make sure all
// lzo block start offsets are treated the same way.
lzoOffsets = new long[totalLZOBlocks + 1];
if (foundEndPos) {
for (int i = 0; i <= totalLZOBlocks; i++)
lzoOffsets[i] = lzoBlockIndex.getPosition(i + startPos);
} else {
// treat the last InputSplit differently
if (LOG.isDebugEnabled()) {
LOG.debug("read the last lzo block offset, add the file end offset to the last element in the index array");
}
for (int i = 0; i < totalLZOBlocks; i++)
lzoOffsets[i] = lzoBlockIndex.getPosition(i + startPos);
lzoOffsets[totalLZOBlocks] = fileSize;
}
if (splitStart == 0) {
lzoOffsets[0] = 0;