throw new ExecutionException(
"No document to process!"
);
}
AnnotationSet inputAS = null;
if(inputAnnotationSet == null ||
inputAnnotationSet.equals("")) inputAS = theDocument.getAnnotations();
else inputAS = theDocument.getAnnotations(inputAnnotationSet);
outputAS = null;
if(outputAnnotationSet == null ||
outputAnnotationSet.equals("")) outputAS = theDocument.getAnnotations();
else outputAS = theDocument.getAnnotations(outputAnnotationSet);
AnnotationSet processAnns = null;
if(wordAnnotationType == null || wordAnnotationType.isEmpty()) {
throw new GateRuntimeException("Word annotation type must not be empty!");
}
if(spaceAnnotationType == null || spaceAnnotationType.isEmpty()) {
throw new GateRuntimeException("Space annotation type must not be empty!");
}
Set<String> typeSet = new HashSet<String>();
typeSet.add(wordAnnotationType);
typeSet.add(spaceAnnotationType);
processAnns = inputAS.get(typeSet);
AnnotationSet containingAnns = null;
if(containingAnnotationType == null || containingAnnotationType.isEmpty()) {
// leave the containingAnns null to indicate we do not use containing annotations
} else {
containingAnns = inputAS.get(containingAnnotationType);
//System.out.println("DEBUG: got containing annots: "+containingAnns.size()+" type is "+containingAnnotationType);
}
AnnotationSet splitAnns = null;
if(splitAnnotationType == null || splitAnnotationType.isEmpty()) {
// leave the splitAnns null to indicate we do not use containing annotations
} else {
splitAnns = inputAS.get(splitAnnotationType);
//System.out.println("DEBUG: got split annots: "+splitAnns.size()+" type is "+splitAnnotationType);
if(splitAnns.size() == 0) {
splitAnns = null;
}
}
fireStatusChanged("Performing look-up in " + theDocument.getName() + "...");
long endOffset = theDocument.getContent().size();
// now split the document into chunks if necessary:
// = for each containing annotation we create a chunk,
// = each split annotation forces the end of a chunk
// Each chunk is represented by an instance of Chunk
if(containingAnns == null) {
if(splitAnns != null) { // we need to do some additional chunking
List<Annotation> splitAnnsList = Utils.inDocumentOrder(splitAnns);
long lastOffset = 0;
for(Annotation splitAnn : splitAnnsList) {
long splitOffset = splitAnn.getStartNode().getOffset();
if(splitOffset > lastOffset) {
doAnnotateChunk(Chunk.makeChunk(
document,lastOffset,splitOffset,!caseSensitive,
processAnns,wordAnnotationType,textFeature,spaceAnnotationType,
matchAtWordStartOnly,matchAtWordEndOnly,matchStartFeature,matchEndFeature,matchTypeFeature));
}
lastOffset = splitOffset;
} // for
// anything left?
if(lastOffset < endOffset) {
doAnnotateChunk(Chunk.makeChunk(document,lastOffset,endOffset,!caseSensitive,
processAnns,wordAnnotationType,textFeature,spaceAnnotationType,
matchAtWordStartOnly,matchAtWordEndOnly,matchStartFeature,matchEndFeature,matchTypeFeature));
}
} else {
// create a chunk from the whole document
doAnnotateChunk(Chunk.makeChunk(document,0,endOffset,!caseSensitive,
processAnns,wordAnnotationType,textFeature,spaceAnnotationType,
matchAtWordStartOnly,matchAtWordEndOnly,matchStartFeature,matchEndFeature,matchTypeFeature));
}
} else {
for(Annotation containingAnn : containingAnns) {
//System.out.println("processing containing annot "+containingAnn);
// if we do have split annotations and we have split annotations within the range
// of this containing annotation, we need to do further chunking
if(splitAnns != null) {
AnnotationSet containedSplits = Utils.getContainedAnnotations(splitAnns, containingAnn);
if(containedSplits.size() > 0) {
// we need to split
List<Annotation> splitAnnsList = Utils.inDocumentOrder(containedSplits);
long lastOffset = containingAnn.getStartNode().getOffset();