* @return
* @throws IOException if file cannot be opened for reading.
*/
public List<SOBIBlock> getBlocks(File sobiFile) throws IOException
{
SOBIBlock block = null;
List<SOBIBlock> blocks = new ArrayList<SOBIBlock>();
List<String> lines = FileUtils.readLines(sobiFile);
lines.add(""); // Add a trailing line to end the last block and remove edge cases
for(int lineNum = 0; lineNum < lines.size(); lineNum++) {
// Replace NULL bytes with spaces to properly format lines.
String line = lines.get(lineNum).replace('\0', ' ');
// Source file is not assumed to be 100% SOBI so we filter out other lines
Matcher headerMatcher = SOBIBlock.blockPattern.matcher(line);
if (headerMatcher.find()) {
if (block == null) {
// No active block with a new matching line: create new block
block = new SOBIBlock(sobiFile, lineNum, line);
}
else if (block.getHeader().equals(headerMatcher.group()) && block.isMultiline()) {
// active multi-line block with a new matching line: extend block
block.extend(line);
}
else {
// active block does not match new line or can't be extended: create new block
blocks.add(block);
SOBIBlock newBlock = new SOBIBlock(sobiFile, lineNum, line);
// Handle certain SOBI grouping edge cases.
if (newBlock.getBillHeader().equals(block.getBillHeader())) {
// The law code line can be omitted when blank but it always precedes the 'C' line
if (newBlock.getType() == 'C' && block.getType() != 'B') {
blocks.add(new SOBIBlock(sobiFile, lineNum, block.getBillHeader()+"B"));
}
}
// Start a new block
block = newBlock;