for (FileStatus file: listStatus(job)) {
Path path = file.getPath();
FileSystem fs = path.getFileSystem(job.getConfiguration());
FSDataInputStream fileIn = fs.open(path);
LineReader in = new LineReader(fileIn, job.getConfiguration());
int lineLen = 0;
while(true) {
Text lineText = new Text();
lineLen = in.readLine(lineText);
if(lineLen <= 0) {
break;
}
Matcher m = LINE_PATTERN.matcher(lineText.toString());
if((m != null) && m.matches()) {
int startRow = Integer.parseInt(m.group(1));
int rows = Integer.parseInt(m.group(2));
int totalRows = Integer.parseInt(m.group(3));
float sampleRate = Float.parseFloat(m.group(4));
int clients = Integer.parseInt(m.group(5));
boolean flushCommits = Boolean.parseBoolean(m.group(6));
boolean writeToWAL = Boolean.parseBoolean(m.group(7));
boolean reportLatency = Boolean.parseBoolean(m.group(8));
LOG.debug("split["+ splitList.size() + "] " +
" startRow=" + startRow +
" rows=" + rows +
" totalRows=" + totalRows +
" sampleRate=" + sampleRate +
" clients=" + clients +
" flushCommits=" + flushCommits +
" writeToWAL=" + writeToWAL +
" reportLatency=" + reportLatency);
PeInputSplit newSplit =
new PeInputSplit(startRow, rows, totalRows, sampleRate, clients,
flushCommits, writeToWAL, reportLatency);
splitList.add(newSplit);
}
}
in.close();
}
LOG.info("Total # of splits: " + splitList.size());
return splitList;
}