// Will trim leading or trailing whitespace when check for end-of-line
// characters
for (int i = 0; i < numSentences; i++) {
sentEnd = sentenceBreaks[i] + b; // OpenNLP tools 1.5 returns Spans rather than offsets that 1.4 did
String coveredText = text.substring(sentStart, sentEnd);
potentialSentSpans[i] = new SentenceSpan(sentStart, sentEnd,
coveredText);
sentStart = sentEnd;
}
// If detector didn't find any sentence-endings,
// or there was text after the last sentence-ending found,
// create a sentence from what's left, as long as it's not all
// whitespace.
// Will trim leading or trailing whitespace when check for end-of-line
// characters
if (sentEnd < e) {
String coveredText = text.substring(sentEnd, e);
if (coveredText.trim() != "") {
potentialSentSpans[numSentences] = new SentenceSpan(sentEnd, e,
coveredText);
numSentences++;
}
}
// Copy potentialSentSpans into sentenceSpans,
// ignoring any that are entirely whitespace,
// trimming the rest,
// and splitting any of those that contain an end-of-line character.
// Then trim any leading or trailing whitespace of ones that were split.
ArrayList<SentenceSpan> sentenceSpans = new ArrayList<SentenceSpan>(0);
for (int i = 0; i < potentialSentSpans.length; i++) {
if (potentialSentSpans[i] != null) {
sentenceSpans.addAll(potentialSentSpans[i]
.splitAtLineBreaksAndTrim(NEWLINE)); // TODO Determine
// line break
// type
}
}
// Add sentence annotations to the CAS
int previousEnd = -1;
for (int i = 0; i < sentenceSpans.size(); i++) {
SentenceSpan span = sentenceSpans.get(i);
if (span.getStart() != span.getEnd()) { // skip empty lines
Sentence sa = new Sentence(jcas);
sa.setBegin(span.getStart());
sa.setEnd(span.getEnd());
if (previousEnd <= sa.getBegin()) {
// System.out.println("Adding Sentence Annotation for " +
// span.toString());
sa.setSentenceNumber(sentenceCount);
sa.addToIndexes();
sentenceCount++;
previousEnd = span.getEnd();
} else {
logger.error("Skipping sentence from " + span.getStart()
+ " to " + span.getEnd());
logger.error("Overlap with previous sentence that ended at "
+ previousEnd);
}
}
}