nextSentCarryover.clear();
boolean seenBoundary = false;
while (tokenizer.hasNext()) {
HasWord token = tokenizer.next();
if (splitTag != null) {
String[] toks = splitTag.apply(token.word());
token.setWord(toks[0]);
if(toks.length == 2 && token instanceof HasTag) {
//wsg2011: Some of the underlying tokenizers return old
//JavaNLP labels. We could convert to CoreLabel here, but
//we choose a conservative implementation....
((HasTag) token).setTag(toks[1]);
}
}
if (sentDelims.contains(token.word())) {
seenBoundary = true;
} else if (seenBoundary && !delimFollowers.contains(token.word())) {
nextSentCarryover.add(token);
break;
}
if ( ! (token.word().matches("\\s+") //||
/*token.word().equals(PTBLexer.NEWLINE_TOKEN)*/)) {
nextSent.add(token);
}
// If there are no words that can follow a sentence delimiter,