// The input stream will freak out if we try to seek past the EOF
if (currentPosition >= fileSize) {
currentPosition = fileSize;
endOfFile = true;
final FileSplit fileSplit = new FileSplit(fileName, splitStart, currentPosition - splitStart, new String[] {});
splitsList.add(fileSplit);
break;
}
// Every time we seek to the new approximate split point,
// we need to create a new CSVLineReader around the stream.
inputStream.seek(currentPosition);
final CSVLineReader csvLineReader = new CSVLineReader(inputStream, this.bufferSize, this.inputFileEncoding,
this.openQuoteChar, this.closeQuoteChar, this.escapeChar);
// This line is potentially garbage because we most likely just sought to
// the middle of a line. Read the rest of the line and leave it for the
// previous split. Then reset the multi-line CSV record boolean, because
// the partial line will have a very high chance of falsely triggering the
// class wide multi-line logic.
currentPosition += csvLineReader.readFileLine(new Text());
csvLineReader.resetMultiLine();
// Now, we may still be in the middle of a multi-line CSV record.
currentPosition += csvLineReader.readFileLine(new Text());
// If we are, read until we are not.
while (csvLineReader.isInMultiLine()) {
final int bytesRead = csvLineReader.readFileLine(new Text());
// End of file
if (bytesRead <= 0) {
break;
}
currentPosition += bytesRead;
}
// We're out of the multi-line CSV record, so it's safe to end the
// previous split.
splitsList.add(new FileSplit(fileName, splitStart, currentPosition - splitStart, new String[] {}));
}
return splitsList;
}