for (String id : patterns.keySet()) {
Pattern p = patterns.get(id);
// System.out.println("Pattern" + p);
Matcher m = p.matcher(text);
while (m.find()) {
Segment segment = new Segment(jCas);
segment.setBegin(m.start());
segment.setEnd(m.end());
segment.setId(id);
sorted_segments.add(segment);
}
}
// If there are non segments, create a simple one that spans the
// entire doc
if (sorted_segments.size() <= 0) {
Segment header = new Segment(jCas);
header.setBegin(0);
header.setEnd(text.length());
header.setId(SIMPLE_SEGMENT);
sorted_segments.add(header);
}
// TODO: this is kinda redundant, but needed the sections in sorted
// Order to determine the end of section which is assumed to be the
// beginning of the next section
Collections.sort(sorted_segments, new Comparator<Segment>() {
public int compare(Segment s1, Segment s2) {
return s1.getBegin() - (s2.getBegin());
}
});
int index = 0;
for (Segment s : sorted_segments) {
int prevEnd = s.getEnd();
int nextBegin = text.length();
if (index > 0) {
// handle case for first section
sorted_segments.get(index - 1).getEnd();
}
if (index + 1 < sorted_segments.size()) {
// handle case for last section
nextBegin = sorted_segments.get(index + 1).getBegin();
}
// Only create a segment if there is some text.
// Handle the case where it's an empty segement
if (nextBegin > prevEnd) {
Segment segment = new Segment(jCas);
segment.setBegin(prevEnd);
segment.setEnd(nextBegin);
segment.setId(s.getId());
segment.addToIndexes();
segment.setPreferredText(section_names.get(s.getId()));
index++;
}
// handle case where there is only a single SIMPLE_SEGMENT
else if (nextBegin == prevEnd && nextBegin > 0 && index == 0) {
Segment segment = new Segment(jCas);
segment.setBegin(0);
segment.setEnd(nextBegin);
segment.setId(s.getId());
segment.addToIndexes();
index++;
}
}
}
}