int startOffset = startOffset(range);
int endOffset = endOffset(range);
// Find all the offsets where we element annotations start
List<Integer> sortedOffsets = new ArrayList<Integer>();
AnnotationSet elements = gate.Utils.getContainedAnnotations(inputAS, range,
type);
// System.out.println("Number of elements: "+elements.size());
Set<Integer> offsets = new HashSet<Integer>();
for (Annotation ann : elements) {
Integer off = ann.getStartNode().getOffset().intValue();
offsets.add(off);
}
// create the sorted list of offsets
sortedOffsets.addAll(offsets);
Collections.sort(sortedOffsets);
// System.out.println("Sorted Offsets: "+sortedOffsets);
Set<AnnotationChain> chains = new HashSet<AnnotationChain>();
// At each offset we either try to make the existing chains longer
// or we try to start new chains.
for (Integer offset : sortedOffsets) {
AnnotationSet offsetAnns = getAnnsStartingAt(elements, type, offset);
// first check all the chains we already have to see if we can
// add one or more of the annotations here:
// For each chain
// if the end of the chain is beyond the offset, ignore the chain
// find all annotations that match the chain value
// if there is exactly one annotation, add it to the chain,
// also add mark as added
// otherwise, make as many total copies of the chain as there are
// annotations and add each annotation to its corresponding chain
// also mark as added
// start a new chain at each annotation that was not already added
// to a chain.
Set<Annotation> addedAnnotations = new HashSet<Annotation>();
for (AnnotationChain chain : chains) {
Annotation lastEl = chain.getLast();
// is the end of the last element not beyond the current offset?
// then the chain can potentially be made longer with annotations
// from this offset.
Object value = lastEl.getFeatures().get(feature);
if (endOffset(lastEl) <= offset) {
Set<Annotation> candidates = new HashSet<Annotation>();
for (Annotation offsetAnn : offsetAnns) {
Object annValue = offsetAnn.getFeatures().get(feature);
if (annValue == null) {
continue;
} // ignore anns with no value
if (annValue.equals(value)) {
candidates.add(offsetAnn);
}
}
if (candidates.isEmpty()) {
continue;
} // no candidates for this chain
if (candidates.size() == 1) {
// add the annotation to the current chain
Annotation a = candidates.iterator().next();
chain.addLast(a);
addedAnnotations.add(a);
} else {
// more than one canditate, we need to duplicate the current chain
System.err.println("Need to duplicate chain, not yet implemented!");
Annotation a = candidates.iterator().next();
chain.addLast(a);
addedAnnotations.add(a);
}
} // offset compatible with current chain
} // for each chain
// Now that we have made all the chains longer where possible, try
// to start new chains here but only if the sequenceType does not ask
// for only sequences starting at the beginning
if (
// either we have requested all chains from any offset ...
(sequenceType == SequenceType.ALL || sequenceType == SequenceType.LONGEST)
// or we are at the starting offset anyways
|| (offset == startOffset)) {
for (Annotation offsetAnn : offsetAnns) {
if (!addedAnnotations.contains(offsetAnn)) {
AnnotationChain newChain = new AnnotationChain();
newChain.addLast(offsetAnn);
chains.add(newChain);
}
}
}
} // for offsets in sorted offsets
int longestLength = 0;
Iterator<AnnotationChain> chainIt = chains.iterator();
while (chainIt.hasNext()) {
AnnotationChain chain = chainIt.next();
// if we must cover the full range, remove all chains that do not
// end at the end of the range.
// Note that if we must start at the beginning, we already made
// sure that only chains starting at the beginning were generated
if (sequenceType == SequenceType.ALL_FULLRANGE) {
if (endOffset(chain.getLast()) != endOffset)
chainIt.remove();
} else {
// if this is an acceptable chain, find the longest one
int length = chain.getLength();
if (length > longestLength) {
longestLength = length;
}
}
}
// if we just want the longest chains, iterate again and remove the
// non-longest ones
if (sequenceType == SequenceType.LONGEST
|| sequenceType == SequenceType.LONGEST_FROMSTART) {
chainIt = chains.iterator();
while (chainIt.hasNext()) {
AnnotationChain chain = chainIt.next();
if (chain.getLength() < longestLength) {
chainIt.remove();
}
}
}
// now we are left with all chains we want to return
Set<AnnotationSet> returnSet = new HashSet<AnnotationSet>();
for (AnnotationChain chain : chains) {
AnnotationSet aset = new ImmutableAnnotationSetImpl(range.getDocument(),
chain.getChain()) {
private static final long serialVersionUID = -6703131102439043539L;
};
returnSet.add(aset);
}