long end = start + length;
FileSystem fs = file.getFileSystem(conf);
FSDataInputStream filestream = fs.open(file);
CompressionCodec codec = (new CompressionCodecFactory(conf)).getCodec(file);
LineReader filereader;
Seekable fileseeker = filestream;
// Hadoop 1.0 does not have support for custom record delimiter and thus
// we
// are supporting only default one.
// We might add another "else if" case for SplittableCompressionCodec once
// we drop support for Hadoop 1.0.
if (codec == null) {
filestream.seek(start);
filereader = new LineReader(filestream);
} else {
filereader = new LineReader(codec.createInputStream(filestream,
codec.createDecompressor()), conf);
fileseeker = filestream;
}
if (start != 0) {
// always throw away first record because
// one extra line is read in previous split
start += filereader.readLine(new Text(), 0);
}
int size;
LOG.info("Start position: " + String.valueOf(start));
long next = start;
while (next <= end) {
Text line = new Text();
size = filereader.readLine(line, Integer.MAX_VALUE);
if (size == 0) {
break;
}
if (codec == null) {
next += size;
} else {
next = fileseeker.getPos();
}
rowRead++;
dataWriter.writeCsvRecord(line.toString());
}
LOG.info("Extracting ended on position: " + fileseeker.getPos());
filestream.close();
}