@Override
public void execute() throws ExecutionException {
boolean isSentenceSplitted = false;
// text doc annotations
AnnotationSet annotations;
if (annotationSetName != null && annotationSetName.length() > 0)
annotations = document.getAnnotations(annotationSetName);
else
annotations = document.getAnnotations();
// getdoc.get text
String text = document.getContent().toString();
// run tokenizer
int[] spans = splitter.sentPosDetect(text);
// compare the resulting
// sentences and add annotations
int prevSpan = 0;
for (int i = 0; i < spans.length; i++) {
FeatureMap fm = Factory.newFeatureMap();
// type
fm.put("source", "openNLP");
// source
// fm.put("type", "urn:lsid:ontotext.com:kim:iextraction:Sentence");
try {
// annotations.add(Long.valueOf(spans[i].getStart()),
// Long.valueOf(spans[i].getEnd()), "Sentence", fm);
// annotations.add(i == 0 ? Long.valueOf(prevSpan) : Long
// .valueOf(prevSpan + countSpaces(prevSpan - 1)),
// i == (spans.length - 1) ? Long.valueOf(spans[i]) : Long
// .valueOf(spans[i] - 1), "Sentence", fm);
int start = prevSpan;
int end = spans[i];
// remove leading spaces of a sentence
for (int j = start; j < end
&& Character.isWhitespace(text.charAt(j)); j++) {
start = j + 1;
}
// remove trailing spaces of a sentence
if (end > 1) {
for (int j = end; j > start
&& Character.isWhitespace(text.charAt(j - 1)); j--) {
end = j - 1;
}
}
annotations.add(Long.valueOf(start), Long.valueOf(end),
"Sentence", fm);
if(!isSentenceSplitted)
isSentenceSplitted = true;
} catch (InvalidOffsetException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
prevSpan = spans[i];
}
if(!isSentenceSplitted){
FeatureMap fm = Factory.newFeatureMap();
// type
fm.put("source", "openNLP");
try {
annotations.add(new Long(0), new Long(text.length()),
"Sentence", fm);
} catch (InvalidOffsetException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}