/*
* To change this template, choose Tools | Templates and open the template in
* the editor.
*/
package at.ofai.gate.japeutils;
import gate.Annotation;
import gate.AnnotationSet;
import gate.Controller;
import gate.Factory;
import gate.FeatureMap;
import gate.Utils;
import gate.annotation.AnnotationSetImpl;
import gate.annotation.ImmutableAnnotationSetImpl;
import gate.jape.ActionContext;
import gate.util.GateRuntimeException;
import gate.util.InvalidOffsetException;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.Deque;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* A collection of static methods that make common tasks in a JAPE Java RHS
* easier or less verbose.
* <p>
* NOTE: the set of methods and their parameters are not stable yet and may
* change in the future.
*
* @author Johann Petrak
*/
public class JapeUtils {
public static final String version = "$Revision: 62 $";
/**
* Returns a new feature map which is a copy of the existing map. The
* individual values in the map are not cloned.
*
* @param otherFeatureMap
* the other feature map to copy
* @return a new FeatureMap object that is a copy of the otherFeatureMap
*/
public static FeatureMap copyFeatureMap(FeatureMap otherFeatureMap) {
FeatureMap fm = Factory.newFeatureMap();
fm.putAll(otherFeatureMap);
return fm;
}
/**
* Returns a new feature map which is a copy of the feature map of the
* annotation ann. The values themselves are not cloned.
*
* @param ann
* the annotation from which to copy the FeatureMap from
* @return a new FeatureMap object that is a copy of the annotation's feature
* map
*/
public static FeatureMap copyFeatureMap(Annotation ann) {
FeatureMap fm = Factory.newFeatureMap();
fm.putAll(ann.getFeatures());
return fm;
}
/**
* Copies the value of a specific feature from the longest annotation of type
* fromType within the binding set fromBindings to the longest annotation of
* type toType in the binding set toBindings.
* <p>
* This can be useful to copy a specific feature from an annotation in one
* matched part in a rule to another matched part in the same rule.
*
* @param bindings
* @param fromBindings
* @param fromType
* @param toBindings
* @param toType
* @param featurename
*/
public static void copyFeature(Map<String, AnnotationSet> bindings,
String fromBindings, String fromType, String toBindings, String toType,
String featurename) {
AnnotationSet toSet = getAnnsForType(getBindings(bindings, toBindings),
toType);
AnnotationSet fromSet = getAnnsForType(getBindings(bindings, fromBindings),
fromType);
Annotation toAnn = getLongestAnn(toSet);
Annotation fromAnn = getLongestAnn(fromSet);
FeatureMap toFm = toAnn.getFeatures();
FeatureMap fromFm = fromAnn.getFeatures();
toFm.put(featurename, fromFm.get(featurename));
}
/**
* Copies the value of a specific feature from the longest annotation of type
* fromType within the binding set fromBindings to the target annotation.
* <p>
* This can be used to enrich an existing annotation with a feature from an
* annotation in a matched part.
*
* @param bindings
* @param fromBindings
* @param fromType
* @param toAnn
* @param featurename
*/
public static void copyFeature(Map<String, AnnotationSet> bindings,
String fromBindings, String fromType, Annotation toAnn, String featurename) {
AnnotationSet fromSet = getAnnsForType(getBindings(bindings, fromBindings),
fromType);
Annotation fromAnn = getLongestAnn(fromSet);
FeatureMap toFm = toAnn.getFeatures();
FeatureMap fromFm = fromAnn.getFeatures();
toFm.put(featurename, fromFm.get(featurename));
}
/**
* Retrieve the annotation set for a named binding. This method throws a
* GateRuntimeException instead of just returning null if the named binding
* does not exist. The annotation set returned is immutable.
*
* @param bindings
* @param bindingName
* @return
*/
public static AnnotationSet getBindings(Map<String, AnnotationSet> bindings,
String bindingName) {
AnnotationSet set = bindings.get(bindingName);
if (set == null) {
throw new GateRuntimeException("No bindings named " + bindingName);
}
return new ImmutableAnnotationSetImpl(set.getDocument(), set) {
private static final long serialVersionUID = -6703131102439043539L;
};
}
/**
* Return just the annotations of a specific type from a set or throw an
* exception if there are none. This ensures that the set indeed contains at
* least one annotation of the given type. The annotation set returned is
* immutable.
* <p>
* TODO: this should probably get renamed -- in general we should distinguish
* between methods that throw an exception and those that just return an
* empty set (and we should never use null....).
* Better name e.g. getAnnsForTypeOrError
*
* @param annset
* @param typeName
* @return
*/
public static AnnotationSet getAnnsForType(AnnotationSet annset,
String typeName) {
AnnotationSet set = annset.get(typeName);
if (set == null) {
throw new GateRuntimeException("Got a null set for type " + typeName);
}
return new ImmutableAnnotationSetImpl(annset.getDocument(), set) {
private static final long serialVersionUID = -6703131102439043539L;
};
}
public static AnnotationSet getAnnsForTypes(AnnotationSet set, String... types) {
Set<String> typeset = new HashSet<String>();
for(String type : types) {
typeset.add(type);
}
return set.get(typeset);
}
/**
* Return the annotation set of annotations that are from the provided
* annotation set and start at the given offset.
*
* @param annset
* @param offset
* @return
*/
public static AnnotationSet getAnnsStartingAt(AnnotationSet annset, int offset) {
// First get all annotations that overlap with the given offset
// This may use an index of annset and therefore be faster than we are
return getAnnsStartingAtWorker(
annset.get((long) offset, (long) offset + 1), offset);
}
/**
* Return the annotation set of annotations that are from the provided
* annotation set and start at the given offset and have the given type.
*
* @param annset
* @param type
* @param offset
* @return
*/
public static AnnotationSet getAnnsStartingAt(AnnotationSet annset,
String type, int offset) {
return getAnnsStartingAtWorker(
annset.get(type, (long) offset, (long) offset + 1), offset);
}
/**
* Return the annotation set of annotations that are from the provided
* annotation set and start at the same offset as the given annotation.
*
* @param annset
* @param ann
* @return
*/
public static AnnotationSet getAnnsStartingAt(AnnotationSet annset,
Annotation ann) {
return getAnnsStartingAtWorker(
annset.get((long) startOffset(ann), (long) startOffset(ann) + 1),
startOffset(ann));
}
/**
* Return the annotation set of annotations that are from the provided
* annotation set, are of the specified type, and start at the same offset as
* the given annotation.
*
* @param annset
* @param type
* @param ann
* @return
*/
public static AnnotationSet getAnnsStartingAt(AnnotationSet annset,
String type, Annotation ann) {
return getAnnsStartingAtWorker(
annset.get(type, (long) startOffset(ann), (long) startOffset(ann) + 1),
startOffset(ann));
}
/**
* Return the annotation set of annotations that are from the provided source
* annotation set and start at the same offset as the given annotation set.
*
* @param annset
* @param set
* @return
*/
public static AnnotationSet getAnnsStartingAt(AnnotationSet annset,
AnnotationSet set) {
return getAnnsStartingAtWorker(
annset.get((long) startOffset(annset), (long) startOffset(annset) + 1),
startOffset(set));
}
/**
* Return the annotation set of annotations that are from the provided source
* annotation set, are of the specified type, and start at the same offset as
* the given annotation set.
*
* @param annset
* @param type
* @param set
* @return
*/
public static AnnotationSet getAnnsStartingAt(AnnotationSet annset,
String type, AnnotationSet set) {
return getAnnsStartingAtWorker(annset.get(type, (long) startOffset(annset),
(long) startOffset(annset) + 1), startOffset(set));
}
private static AnnotationSet getAnnsStartingAtWorker(AnnotationSet annset,
int offset) {
List<Annotation> annotationsToAdd = new ArrayList<Annotation>();
for (Annotation ann : annset) {
if (startOffset(ann) == offset) {
annotationsToAdd.add(ann);
}
}
return new ImmutableAnnotationSetImpl(annset.getDocument(),
annotationsToAdd) {
private static final long serialVersionUID = -6703131102439043539L;
};
}
public static AnnotationSet getAnnsStartingAfter(AnnotationSet inputSet,
String type, AnnotationSet afterThis) {
int offset = startOffset(afterThis)+1;
return getAnnsStartingAtWorker(
inputSet.get(
(long) startOffset(inputSet), (long) startOffset(inputSet) + 1),
offset);
}
public static AnnotationSet getAnnsStartingAfter(AnnotationSet inputSet,
String type, Annotation afterThis) {
int offset = startOffset(afterThis)+1;
return getAnnsStartingAtWorker(
inputSet.get(
(long) startOffset(inputSet), (long) startOffset(inputSet) + 1),
offset);
}
public static AnnotationSet getAnnsEndingBefore(AnnotationSet inputSet,
String type, AnnotationSet beforeThis) {
int offset = startOffset(beforeThis)-1;
return getAnnsEndingAtWorker(
inputSet.get(
(long) endOffset(inputSet)-1, (long) endOffset(inputSet)),
offset);
}
public static AnnotationSet getAnnsEndingBefore(AnnotationSet inputSet,
String type, Annotation beforeThis) {
int offset = startOffset(beforeThis)-1;
return getAnnsEndingAtWorker(
inputSet.get(
(long) endOffset(inputSet)-1, (long) endOffset(inputSet)),
offset);
}
public static AnnotationSet getAnnsEndingAt(AnnotationSet annset, int offset) {
// First get all annotations that overlap with the given offset
// This may use an index of annset and therefore be faster than we are
return getAnnsEndingAtWorker(
annset.get((long) endOffset(annset) - 1, (long) endOffset(annset)),
offset);
}
public static AnnotationSet getAnnsEndingAt(AnnotationSet annset,
String type, int offset) {
return getAnnsEndingAtWorker(annset.get(type, (long) endOffset(annset) - 1,
(long) endOffset(annset)), offset);
}
public static AnnotationSet getAnnsEndingAt(AnnotationSet annset,
Annotation ann) {
// First get all annotations that overlap with the given offset
// This may use an index of annset and therefore be faster than we are
return getAnnsEndingAtWorker(
annset.get((long) endOffset(annset) - 1, (long) endOffset(annset)),
endOffset(ann));
}
public static AnnotationSet getAnnsEndingAt(AnnotationSet annset,
String type, Annotation ann) {
// First get all annotations that overlap with the given offset
// This may use an index of annset and therefore be faster than we are
return getAnnsEndingAtWorker(annset.get(type, (long) endOffset(annset) - 1,
(long) endOffset(annset)), endOffset(ann));
}
public static AnnotationSet getAnnsEndingAt(AnnotationSet annset,
AnnotationSet set) {
// First get all annotations that overlap with the given offset
// This may use an index of annset and therefore be faster than we are
return getAnnsEndingAtWorker(
annset.get((long) endOffset(annset) - 1, (long) endOffset(annset)),
endOffset(set));
}
public static AnnotationSet getAnnsEndingAt(AnnotationSet annset,
String type, AnnotationSet set) {
// First get all annotations that overlap with the given offset
// This may use an index of annset and therefore be faster than we are
return getAnnsEndingAtWorker(annset.get(type, (long) endOffset(annset) - 1,
(long) endOffset(annset)), endOffset(set));
}
private static AnnotationSet getAnnsEndingAtWorker(AnnotationSet annset,
int offset) {
List<Annotation> annotationsToAdd = new ArrayList<Annotation>();
for (Annotation ann : annset) {
if (endOffset(ann) == offset) {
annotationsToAdd.add(ann);
}
}
return new ImmutableAnnotationSetImpl(annset.getDocument(),
annotationsToAdd) {
private static final long serialVersionUID = -6703131102439043539L;
};
}
public static AnnotationSet getAnnsCoextensive(AnnotationSet source,
Annotation ann) {
return getAnnsCoextensiveWorker(source, null, startOffset(ann),
endOffset(ann));
}
public static AnnotationSet getAnnsCoextensive(AnnotationSet source,
String type, Annotation ann) {
return getAnnsCoextensiveWorker(source, type, startOffset(ann),
endOffset(ann));
}
public static AnnotationSet getAnnsCoextensive(AnnotationSet source,
AnnotationSet anns) {
return getAnnsCoextensiveWorker(source, null, startOffset(anns),
endOffset(anns));
}
public static AnnotationSet getAnnsCoextensive(AnnotationSet source,
String type, AnnotationSet anns) {
return getAnnsCoextensiveWorker(source, type, startOffset(anns),
endOffset(anns));
}
public static AnnotationSet getAnnsCoextensiveWorker(AnnotationSet source,
String type, int start, int end) {
if (source instanceof gate.annotation.AnnotationSetImpl) {
AnnotationSet ret = ((AnnotationSetImpl) source).getStrict((long) start,
(long) end);
if (type != null) {
return ret.get(type);
} else {
return ret;
}
} else {
AnnotationSet annset = source.getContained((long) start, (long) end);
List<Annotation> annotationsToAdd = new ArrayList<Annotation>();
for (Annotation ann : annset) {
if (startOffset(ann) == start && endOffset(ann) == end) {
if (type == null || (type != null && ann.getType().equals(type))) {
annotationsToAdd.add(ann);
}
}
}
return new ImmutableAnnotationSetImpl(annset.getDocument(),
annotationsToAdd) {
private static final long serialVersionUID = -6703131102439043539L;
};
}
}
public static int startOffset(Annotation ann) {
return ann.getStartNode().getOffset().intValue();
}
public static Long startOffsetLong(Annotation ann) {
return ann.getStartNode().getOffset();
}
public static int startOffset(AnnotationSet annset) {
return annset.firstNode().getOffset().intValue();
}
public static Long startOffsetLong(AnnotationSet annset) {
return annset.firstNode().getOffset();
}
public static int endOffset(Annotation ann) {
return ann.getEndNode().getOffset().intValue();
}
public static Long endOffsetLong(Annotation ann) {
return ann.getEndNode().getOffset();
}
public static int endOffset(AnnotationSet annset) {
return annset.lastNode().getOffset().intValue();
}
public static Long endOffsetLong(AnnotationSet annset) {
return annset.lastNode().getOffset();
}
public static int length(AnnotationSet set) {
return endOffset(set) - startOffset(set);
}
public static int length(Annotation ann) {
return endOffset(ann) - startOffset(ann);
}
/**
*
* Return the longest annotation from a set; if there is more than one, an
* arbitrary one.
*
* @param annset
* @return
*/
public static Annotation getLongestAnn(AnnotationSet annset) {
Annotation ann = null;
if (annset.size() == 0) {
throw new GateRuntimeException("Annotation set is empty");
}
for (Annotation a : annset) {
if (ann == null) {
ann = a;
} else if (Utils.length(a) > Utils.length(ann)) {
ann = a;
}
}
return ann;
}
/**
* Return the leftmost annotation ,if more than one, the longest among them,
* if more than one, an unspecified one
*
*
*/
public static Annotation getLongestLeftmostAnn(AnnotationSet annset) {
Annotation ann = null;
if (annset.size() == 0) {
throw new GateRuntimeException("Annotation set is empty");
}
int offset = Integer.MAX_VALUE;
for (Annotation a : annset) {
if (startOffset(a) < offset) {
offset = startOffset(a);
ann = a;
}
}
// now we have a leftmost annotation but we want the longest:
for (Annotation a : annset) {
if (startOffset(a) == offset) {
if (Utils.length(a) > Utils.length(ann)) {
ann = a;
}
}
}
return ann;
}
/**
* Return the rightmost annotation within the given set, if there is more than
* one, the longest of all rightmost annotations.
*
* @param annset
* @return
*/
public static Annotation getLongestRightmostAnn(AnnotationSet annset) {
Annotation ann = null;
if (annset.size() == 0) {
throw new GateRuntimeException("Annotation set is empty");
}
int offset = 0;
for (Annotation a : annset) {
if (startOffset(a) > offset) {
offset = startOffset(a);
ann = a;
}
}
// now we have a rightmost annotation but we want the longest:
for (Annotation a : annset) {
if (a.getStartNode().getOffset().intValue() == offset) {
if (Utils.length(a) > Utils.length(ann)) {
ann = a;
}
}
}
return ann;
}
/**
* Return the set of leftmost annotations within the given annotation set. The
* set returned is immutable.
*
* @param annset
* @return
*/
public static AnnotationSet getLeftmostAnns(AnnotationSet annset) {
List<Annotation> anns = new LinkedList<Annotation>();
if (annset.size() == 0) {
throw new GateRuntimeException("Annotation set is empty");
}
int offset = Integer.MAX_VALUE;
for (Annotation a : annset) {
if (a.getStartNode().getOffset().intValue() < offset) {
offset = startOffset(a);
}
}
// now we have the offset, get all the annotations
for (Annotation a : annset) {
if (startOffset(a) == offset) {
anns.add(a);
}
}
return new ImmutableAnnotationSetImpl(annset.getDocument(), anns) {
private static final long serialVersionUID = -6703131102439043539L;
};
}
/**
* Return the set of rightmost annotations within the given annotation set.
* The set returned is immutable.
*
* @param annset
* @return
*/
public static AnnotationSet getRightmostAnns(AnnotationSet annset) {
List<Annotation> anns = new LinkedList<Annotation>();
if (annset.size() == 0) {
throw new GateRuntimeException("Annotation set is empty");
}
int offset = 0;
for (Annotation a : annset) {
if (startOffset(a) > offset) {
offset = startOffset(a);
}
}
// now we have the offset, get all the annotations
for (Annotation a : annset) {
if (startOffset(a) == offset) {
anns.add(a);
}
}
return new ImmutableAnnotationSetImpl(annset.getDocument(), anns) {
private static final long serialVersionUID = -6703131102439043539L;
};
}
/**
* Return the set of longest annotations from the given set. The returned set
* is immutable.
*
* @param annset
* @return
*/
public static AnnotationSet getLongestAnns(AnnotationSet annset) {
List<Annotation> anns = new LinkedList<Annotation>();
if (annset.size() == 0) {
throw new GateRuntimeException("Annotation set is empty");
}
int length = 0;
for (Annotation a : annset) {
if (Utils.length(a) > length) {
length = Utils.length(a);
}
}
// now we have the offset, get all the annotations
for (Annotation a : annset) {
if (Utils.length(a) == length) {
anns.add(a);
}
}
return new ImmutableAnnotationSetImpl(annset.getDocument(), anns) {
private static final long serialVersionUID = -6703131102439043539L;
};
}
/**
* Return the set of shortest annotations from the given set. (This could even
* be annotations of zero length!). The returned set is immutable.
*
* @param annset
* @return
*/
public static AnnotationSet getShortestAnns(AnnotationSet annset) {
List<Annotation> anns = new LinkedList<Annotation>();
if (annset.size() == 0) {
throw new GateRuntimeException("Annotation set is empty");
}
int length = Integer.MAX_VALUE;
for (Annotation a : annset) {
if (Utils.length(a) < length) {
length = Utils.length(a);
}
}
// now we have the offset, get all the annotations
for (Annotation a : annset) {
if (Utils.length(a) == length) {
anns.add(a);
}
}
return new ImmutableAnnotationSetImpl(annset.getDocument(), anns) {
private static final long serialVersionUID = -6703131102439043539L;
};
}
/**
* Return the set of all annotations that are longer than the given length
* from the given set. The returned set is immutable.
*
* @param annset
* @param longerthanthis
* @return
*/
public static AnnotationSet getAnnsLongerThan(AnnotationSet annset,
int longerthanthis) {
List<Annotation> anns = new LinkedList<Annotation>();
if (annset.size() == 0) {
throw new GateRuntimeException("Annotation set is empty");
}
for (Annotation a : annset) {
if (Utils.length(a) > longerthanthis) {
anns.add(a);
}
}
return new ImmutableAnnotationSetImpl(annset.getDocument(), anns) {
private static final long serialVersionUID = -6703131102439043539L;
};
}
/**
* Return the set of all annotations shorter than the given length from the
* given set. The returned set is immutable.
*
* @param annset
* @param shorterthanthis
* @return
*/
public static AnnotationSet getAnnsShorterThan(AnnotationSet annset,
int shorterthanthis) {
List<Annotation> anns = new LinkedList<Annotation>();
if (annset.size() == 0) {
throw new GateRuntimeException("Annotation set is empty");
}
for (Annotation a : annset) {
if (Utils.length(a) < shorterthanthis) {
anns.add(a);
}
}
return new ImmutableAnnotationSetImpl(annset.getDocument(), anns) {
private static final long serialVersionUID = -6703131102439043539L;
};
}
/**
* Returns the only annotation that annset is expected to have, throws an
* exception if there is not exactly one annotation. This is useful when a
* binding set is expected to contain exactly one interesting annotation.
*/
public static Annotation getOnlyAnn(AnnotationSet annset) {
if (annset.size() != 1) {
throw new GateRuntimeException(
"Annotation set does not contain exactly 1 annotation but "
+ annset.size());
} else {
return annset.iterator().next();
}
}
/**
* Add a new annotation to the output annotation set outSet, covering the same
* region as spanSet, and having the given type and feature map. The start and
* end nodes of the new annotation will be new nodes.
*
* @param outSet
* @param spanSet
* @param type
* @param fm
*/
public static void addAnn(AnnotationSet outSet, AnnotationSet spanSet,
String type, FeatureMap fm) {
try {
outSet.add(startOffsetLong(spanSet), endOffsetLong(spanSet), type, fm);
} catch (InvalidOffsetException ex) {
throw new GateRuntimeException("Offset error adding new annotation: ", ex);
}
}
/**
* Add a new annotation to the output annotation set outSet, covering the
* given offset range, and having the given type and feature map. The start and
* end nodes of the new annotation will be new nodes.
*
* @param outSet
* @param spanSet
* @param type
* @param fm
*/
public static void addAnn(AnnotationSet outSet, int startOffset, int endOffset,
String type, FeatureMap fm) {
try {
outSet.add((long)startOffset, (long)endOffset, type, fm);
} catch (InvalidOffsetException ex) {
throw new GateRuntimeException("Offset error adding new annotation: ", ex);
}
}
/**
* Add a new annotation to the output annotation set, having the range of the
* given range set, the specified type and a copy of the features from the
* given annotation. In other words, copy features to a new annotation with
* the new type.
*
* @param outSet
* @param spanSet
* @param type
* @param copyFeaturesFrom
*/
public static void addAnn(AnnotationSet outSet, AnnotationSet spanSet,
String type, Annotation copyFeaturesFrom) {
try {
FeatureMap fm = Factory.newFeatureMap();
fm.putAll(copyFeaturesFrom.getFeatures());
outSet.add(startOffsetLong(spanSet), endOffsetLong(spanSet), type, fm);
} catch (InvalidOffsetException ex) {
throw new GateRuntimeException("Offset error adding new annotation: ", ex);
}
}
/**
* Add a new annotation to the output annotation set outSet, covering the same
* region as spanAnn, and having the given type and feature map. The start and
* end nodes of the new annotation will be new nodes.
*
* @param outSet
* @param spanAnn
* @param type
* @param fm
*/
public static void addAnn(AnnotationSet outSet, Annotation spanAnn,
String type, FeatureMap fm) {
try {
outSet.add(startOffsetLong(spanAnn), endOffsetLong(spanAnn), type, fm);
} catch (InvalidOffsetException ex) {
throw new GateRuntimeException("Offset error adding new annotation: ", ex);
}
}
/**
* Add a new annotation to the output annotation set outSet, having the same
* extent and type and a copy of the original feature map (however the feature
* map is not a deep copy). In other words, create a copy of ann in the
* outSet.
*
* @param outSet
* @param spanSet
* @param type
* @param fm
*/
public static void addAnn(AnnotationSet outSet, Annotation ann) {
try {
FeatureMap fm = Factory.newFeatureMap();
fm.putAll(ann.getFeatures());
outSet.add(startOffsetLong(ann), endOffsetLong(ann), ann.getType(), fm);
} catch (InvalidOffsetException ex) {
throw new GateRuntimeException("Offset error adding new annotation: ", ex);
}
}
/**
* Copy all annotations from the inSet to outSet renaming to the name typeName
* if typeName is not null. If typeName is null, keep the original types.
*
* @param outSet
* @param inSet
* @param typeName
*/
public static void addAnns(AnnotationSet outSet, AnnotationSet inSet,
String typeName) {
for (Annotation ann : inSet) {
if (typeName == null) {
addAnn(outSet, ann);
} else {
FeatureMap fm = ann.getFeatures();
fm.put("original_ann_type", ann.getType());
addAnn(outSet, ann, typeName, ann.getFeatures());
}
}
}
/**
* Copy all annotations from the inSet to outSet renaming to the name typeName
* if typeName is not null. If typeName is null, keep the original types. The
* feature maps of the new annotations will be copied from the original annotations.
* In addition, if featureMap is not null, all features from that map will
* be copied to each of the new feature maps too. The copied feature maps
* will be new feature map objects, but the values will not be deep copies.
*
* @param outSet the annotation set where to add the new annotations
* @param inSet the annotation set from which to copy all annotations
* @param typeName the type name to use for the new annotations or null of their existing
* name should get used
* @param featureMap a featuremap containing feature that should get added (or overwrite)
* features in the featuremaps of all newly created annotations or null
* if the existing featuremaps should get copied unchanged.
*/
public static void addAnns(AnnotationSet outSet, AnnotationSet inSet,
String typeName, FeatureMap featureMap) {
for (Annotation ann : inSet) {
FeatureMap newfm = ann.getFeatures();
String type = typeName;
if (type == null) {
type = ann.getType();
}
if(featureMap != null) {
newfm.putAll(featureMap);
}
addAnn(outSet, ann, type, ann.getFeatures());
}
}
// / MANAGE A LOG STREAM
/**
* This is a helper method to simplify retrieving a print stream that is
* stored in the action context. This method always uses the feature
* "$$logstream" of the controller to store the print stream.
* <p>
* Throws a GateRuntimeException when there is no controller or when no print
* stream is set.
*
* @param ctx
* @return
*/
public static PrintStream getLogStream(ActionContext ctx) {
return getLogStream(ctx,"");
}
public static PrintStream getLogStream(ActionContext ctx, String nameaddition) {
if (nameaddition == null || nameaddition.isEmpty()) {
nameaddition = "";
} else {
nameaddition = "_" + nameaddition;
}
Controller ctl = ctx.getController();
if (ctl == null) {
throw new GateRuntimeException(
"Controller is null - cannot retrieve the ActionContext PrintStream");
}
PrintStream ps = (PrintStream) ctl.getFeatures().get("$$logstream"+nameaddition);
if (ps == null) {
throw new GateRuntimeException("No log outstream found!");
}
return ps;
}
public static PrintStream openLogStream(ActionContext ctx) {
return openLogStream(ctx, "");
}
public static PrintStream openLogStream(ActionContext ctx, String nameaddition) {
if (nameaddition == null || nameaddition.isEmpty()) {
nameaddition = "";
} else {
nameaddition = "_" + nameaddition;
}
DateFormat dateFormat = new SimpleDateFormat("yyyyMMdd_HHmmss");
Date date = new Date();
String name = "jape_" + ctx.getPRName() + nameaddition + "_"
+ dateFormat.format(date) + ".log";
File outfile = new File(getDefaultDir(), name);
System.out.println("Writing log to " + outfile.getAbsolutePath());
FileOutputStream fos;
PrintStream log;
try {
fos = new FileOutputStream(outfile);
} catch (Exception ex) {
ctx.endPhase();
throw new GateRuntimeException("Exception trying to open File stream to "
+ outfile, ex);
}
try {
log = new PrintStream(fos);
} catch (Exception ex) {
ctx.endPhase();
try {
fos.close();
} catch (IOException ex1) {
// ignore
}
throw new GateRuntimeException(
"Exception trying to open PrintStream for " + outfile, ex);
}
ctx.getController().getFeatures().put("$$logstream" + nameaddition, log);
return log;
}
public static void closeLogStream(ActionContext ctx) {
closeLogStream(ctx, "");
}
public static void closeLogStream(ActionContext ctx, String na) {
String nameaddition = na;
if (nameaddition == null || nameaddition.isEmpty()) {
nameaddition = "";
} else {
nameaddition = "_" + nameaddition;
}
PrintStream log = getLogStream(ctx,na);
if (log != null)
log.close();
// TODO: do we have to remember and close the fos too?
ctx.getController().getFeatures().remove("$$logstream" + nameaddition);
}
public static void setLogStream(ActionContext ctx, PrintStream ps) {
setLogStream(ctx, ps, "");
}
public static void setLogStream(ActionContext ctx, PrintStream ps,
String nameaddition) {
if (nameaddition == null || nameaddition.isEmpty()) {
nameaddition = "";
} else {
nameaddition = "_" + nameaddition;
}
Controller ctl = ctx.getController();
if (ctl == null) {
throw new GateRuntimeException(
"Controller is null - cannot set the ActionContext PrintStream");
}
ctl.getFeatures().put("$$logstream"+nameaddition, ps);
}
public static void clearLogStream(ActionContext ctx) {
clearLogStream(ctx, "");
}
public static void clearLogStream(ActionContext ctx, String nameaddition) {
if (nameaddition == null || nameaddition.isEmpty()) {
nameaddition = "";
} else {
nameaddition = "_" + nameaddition;
}
Controller ctl = ctx.getController();
if (ctl == null) {
throw new GateRuntimeException(
"Controller is null - cannot set the ActionContext PrintStream");
}
ctl.getFeatures().remove("$$logstream" + nameaddition);
}
/**
* Returns a default path for the currently running application. At the moment
* this path is the filechooser default directory (which is set to the current
* directory from which the shell script gate.sh is run under a Linux OS
* and/or whatever that property was last set to in the GUI) or, the user's
* "home" directory as a fallback.
*
* @return
*/
public static File getDefaultDir() {
String defdir = System.getProperty("gate.user.filechooser.defaultdir");
if (defdir == null) {
defdir = System.getProperty("user.home");
}
return new File(defdir);
}
/**
* Return the virtual String for the range covered by the given annotation
* set, based on all annotations from the input annotation set with the given
* type. The values of the given feature are concatenated in document order
* and separated by a single space to create the result string.
*
* @param inputAS
* @param range
* @param type
* @param feature
* @return
*/
public static String getVirtualStringFor(AnnotationSet inputAS,
AnnotationSet range, String type, String feature) {
List<Annotation> covered = gate.Utils.inDocumentOrder(gate.Utils
.getContainedAnnotations(inputAS, range, type));
StringBuilder sb = new StringBuilder();
boolean first = true;
for (Annotation ann : covered) {
String str = (String) ann.getFeatures().get(feature);
if (str == null) {
throw new GateRuntimeException("No feature " + feature
+ " for a contained annotation of type " + type);
}
if (first) {
first = false;
} else {
sb.append(" ");
}
sb.append(str);
}
return sb.toString();
}
// Find the longest chain of annotations of type type within the range
// where the value of feature matches between all the annotations.
// This will always return a List, if nothing is found, an empty list
public static AnnotationSet getLongestMatchingSequence(AnnotationSet inputAS,
AnnotationSet range, String type, String feature) {
List<Annotation> ret = new ArrayList<Annotation>();
int startOffset = startOffset(range);
int endOffset = endOffset(range);
AnnotationSet elements = gate.Utils.getContainedAnnotations(inputAS, range,
type);
// System.out.println("Number of elements: "+elements.size());
Set<Integer> offsets = new HashSet<Integer>();
for (Annotation ann : elements) {
Integer off = ann.getStartNode().getOffset().intValue();
offsets.add(off);
}
// create the sorted list of offsets
List<Integer> sortedOffsets = new ArrayList<Integer>();
sortedOffsets.addAll(offsets);
Collections.sort(sortedOffsets);
// System.out.println("Sorted Offsets: "+sortedOffsets);
int curlength = 0;
// At each offset where we have potential chain elements, try
// to start a chain there.
for (Integer offset : sortedOffsets) {
// We may already have a chain from some smaller offset, so check first
// if the longest chain from this offset can ever be longer than what
// we already have ...
// System.out.println("Checking at offset "+offset);
if (endOffset - offset > curlength) {
// Now, start building the longest chain starting at offset ...
// First, get all the annotations at this offset
AnnotationSet offsetAnns = getAnnsStartingAt(elements, type, offset);
// System.out.println("Annotations here: "+offsetAnns.size());
// For each annotation, try to find a chain with equal feature values
// we only form a chain if there is at least one more annotation
// (i.e. the minimum number of annotations in the chain is 2)
for (Annotation ann : offsetAnns) {
// The annotation ann is a candidate for starting a chain ...
// simple version of finding the chain: for each ann try to
// find as many annotations with identical feature at the next
// offset that is > than the end offset of the current ann
Object val = ann.getFeatures().get(feature);
// System.out.println("Finding chain for annotation: "+ann);
if (val == null) {
// If the candidate for the first element does not have
// the desired feature set, just ignore it.
// System.out.println("No feature found, ignoring");
continue;
}
// Make a feature map that contains the feature=value constraint
// which is needed to check the other elements of the chain.
FeatureMap constraint = Factory.newFeatureMap();
constraint.put(feature, val);
List<Annotation> chain = new ArrayList<Annotation>();
// add the candidate head to this chain candidate
chain.add(ann);
int chainlength = 0;
// curann is the annotation we process in a loop to find
// something to append
Annotation curann = ann;
while (true) {
// System.out.println("Chain finding: "+curann);
// Find the next offset that is a) behind the end of curann and
// b) a known offset where we have elements
int curend = curann.getEndNode().getOffset().intValue();
int nextOffset = offset;
for (int o : sortedOffsets) {
if (o >= curend) {
nextOffset = o;
break;
}
}
// if we are still at the old offset, no other offset with elements
// was found, end the loop
if (nextOffset == offset) {
break;
}
// Get all the elements at the next offset
AnnotationSet curset = getAnnsStartingAt(elements, nextOffset);
// System.out.println("Found anns at next offset: "+curset.size());
// And filter out just those we want: same type and feature value
curset = curset.get(type, constraint);
// System.out.println("Found anns that match feature: "+curset.size());
// if nothing remains, end the loop
if (curset == null || curset.size() == 0) {
break;
}
// we just pick an arbitrary element here to add to the chain,
// but instead we should use ALL elements to continue.
// This means that for each chain we already have, we create
// as many new chains as we find candidates here.
curann = curset.iterator().next();
chain.add(curann);
chainlength = endOffset(curann) - offset;
}
// if the new chain is longer ...
if (chainlength > curlength) {
ret = chain;
curlength = chainlength;
// System.out.println("Found a new chain of length "+curlength);
}
}
}
}
return new ImmutableAnnotationSetImpl(range.getDocument(), ret) {
private static final long serialVersionUID = -6703131102439043539L;
};
}
/**
* Find all the longest sequences of annotations where the value for a specific
* feature of the element annotations matches wetween all the elements of
* the sequence. Any non-overlapping element (i.e. element that does not overlap
* with any other element) can be part of such a sequence.
* The method returns a set of annotation sets, where each annotation set
* contains the element annotations of one sequence.
*
* @param inputAS
* @param range
* @param type
* @param feature
* @param sequenceType
* @return
*/
public static Set<AnnotationSet> getMatchingSequences(AnnotationSet inputAS,
AnnotationSet range, String type, String feature,
SequenceType sequenceType) {
// Helper class for finding chains. This class represents a chain of
// annotations and stores the chain itself and the current length of the
// chain
class AnnotationChain {
private Deque<Annotation> chain = new LinkedList<Annotation>();
private int length = 0;
private int startOffset;
public int getLength() {
return length;
}
public void addLast(Annotation ann) {
if (chain.isEmpty()) {
startOffset = startOffset(ann);
}
length = endOffset(ann) - startOffset;
chain.addLast(ann);
}
public Annotation getLast() {
return chain.getLast();
}
public Deque<Annotation> getChain() {
return chain;
}
}
int startOffset = startOffset(range);
int endOffset = endOffset(range);
// Find all the offsets where we element annotations start
List<Integer> sortedOffsets = new ArrayList<Integer>();
AnnotationSet elements = gate.Utils.getContainedAnnotations(inputAS, range,
type);
// System.out.println("Number of elements: "+elements.size());
Set<Integer> offsets = new HashSet<Integer>();
for (Annotation ann : elements) {
Integer off = ann.getStartNode().getOffset().intValue();
offsets.add(off);
}
// create the sorted list of offsets
sortedOffsets.addAll(offsets);
Collections.sort(sortedOffsets);
// System.out.println("Sorted Offsets: "+sortedOffsets);
Set<AnnotationChain> chains = new HashSet<AnnotationChain>();
// At each offset we either try to make the existing chains longer
// or we try to start new chains.
for (Integer offset : sortedOffsets) {
AnnotationSet offsetAnns = getAnnsStartingAt(elements, type, offset);
// first check all the chains we already have to see if we can
// add one or more of the annotations here:
// For each chain
// if the end of the chain is beyond the offset, ignore the chain
// find all annotations that match the chain value
// if there is exactly one annotation, add it to the chain,
// also add mark as added
// otherwise, make as many total copies of the chain as there are
// annotations and add each annotation to its corresponding chain
// also mark as added
// start a new chain at each annotation that was not already added
// to a chain.
Set<Annotation> addedAnnotations = new HashSet<Annotation>();
for (AnnotationChain chain : chains) {
Annotation lastEl = chain.getLast();
// is the end of the last element not beyond the current offset?
// then the chain can potentially be made longer with annotations
// from this offset.
Object value = lastEl.getFeatures().get(feature);
if (endOffset(lastEl) <= offset) {
Set<Annotation> candidates = new HashSet<Annotation>();
for (Annotation offsetAnn : offsetAnns) {
Object annValue = offsetAnn.getFeatures().get(feature);
if (annValue == null) {
continue;
} // ignore anns with no value
if (annValue.equals(value)) {
candidates.add(offsetAnn);
}
}
if (candidates.isEmpty()) {
continue;
} // no candidates for this chain
if (candidates.size() == 1) {
// add the annotation to the current chain
Annotation a = candidates.iterator().next();
chain.addLast(a);
addedAnnotations.add(a);
} else {
// more than one canditate, we need to duplicate the current chain
System.err.println("Need to duplicate chain, not yet implemented!");
Annotation a = candidates.iterator().next();
chain.addLast(a);
addedAnnotations.add(a);
}
} // offset compatible with current chain
} // for each chain
// Now that we have made all the chains longer where possible, try
// to start new chains here but only if the sequenceType does not ask
// for only sequences starting at the beginning
if (
// either we have requested all chains from any offset ...
(sequenceType == SequenceType.ALL || sequenceType == SequenceType.LONGEST)
// or we are at the starting offset anyways
|| (offset == startOffset)) {
for (Annotation offsetAnn : offsetAnns) {
if (!addedAnnotations.contains(offsetAnn)) {
AnnotationChain newChain = new AnnotationChain();
newChain.addLast(offsetAnn);
chains.add(newChain);
}
}
}
} // for offsets in sorted offsets
int longestLength = 0;
Iterator<AnnotationChain> chainIt = chains.iterator();
while (chainIt.hasNext()) {
AnnotationChain chain = chainIt.next();
// if we must cover the full range, remove all chains that do not
// end at the end of the range.
// Note that if we must start at the beginning, we already made
// sure that only chains starting at the beginning were generated
if (sequenceType == SequenceType.ALL_FULLRANGE) {
if (endOffset(chain.getLast()) != endOffset)
chainIt.remove();
} else {
// if this is an acceptable chain, find the longest one
int length = chain.getLength();
if (length > longestLength) {
longestLength = length;
}
}
}
// if we just want the longest chains, iterate again and remove the
// non-longest ones
if (sequenceType == SequenceType.LONGEST
|| sequenceType == SequenceType.LONGEST_FROMSTART) {
chainIt = chains.iterator();
while (chainIt.hasNext()) {
AnnotationChain chain = chainIt.next();
if (chain.getLength() < longestLength) {
chainIt.remove();
}
}
}
// now we are left with all chains we want to return
Set<AnnotationSet> returnSet = new HashSet<AnnotationSet>();
for (AnnotationChain chain : chains) {
AnnotationSet aset = new ImmutableAnnotationSetImpl(range.getDocument(),
chain.getChain()) {
private static final long serialVersionUID = -6703131102439043539L;
};
returnSet.add(aset);
}
return returnSet;
}
public static enum SequenceType {
ALL, LONGEST, ALL_FULLRANGE, ALL_FROMSTART, LONGEST_FROMSTART
}
/**
* Returns true if the given annotation set is coextensive (spans the same offset range)
* as the given other annotation set.
* @param ann
* @param annset
* @return
*/
public static boolean coextensive(AnnotationSet annset1, AnnotationSet annset2) {
return startOffset(annset1) == startOffset(annset2)
&& endOffset(annset1) == endOffset(annset2);
}
/**
* Returns true if the given annotation is coextensive (spans the same offset range)
* as the given other annotation.
* @param ann1
* @param ann1
* @return
*/
public static boolean coextensive(Annotation ann1, Annotation ann2) {
return ann1.coextensive(ann2);
}
/**
* Returns true if the given annotation is coextensive (spans the same offset range)
* as the given annotation set.
* @param ann
* @param annset
* @return
*/
public static boolean coextensive(AnnotationSet annset, Annotation ann) {
return startOffset(ann) == startOffset(annset)
&& endOffset(ann) == endOffset(annset);
}
public static boolean coextensive(Annotation ann, AnnotationSet annset) {
return startOffset(ann) == startOffset(annset)
&& endOffset(ann) == endOffset(annset);
}
/**
* Returns all runtime parameters for this JAPE transducer but only if
* called in the context of JapeExtended or JapePlusExtended. If used
* with another JAPE PR, an exception is thrown.
* @param ctx
* @return
*/
public static FeatureMap getParameters(ActionContext ctx) {
if(ctx instanceof ExtendedActionContext) {
return ((ExtendedActionContext)ctx).getParameters();
} else {
throw new GateRuntimeException("ActionContext is not an ExtendedActionContext");
}
}
/**
* Return the runtime parameter of the given name for this JAPE transducer, or null.
* This works only if the JAPE file is used by the JapeExtended PR or the
* JapePlusExtended PR and will return null otherwise.
*
* @param ctx
* @param name
* @return
*/
public static Object getParmValue(ActionContext ctx, String name) {
if(ctx instanceof ExtendedActionContext) {
FeatureMap fm = ((ExtendedActionContext)ctx).getParameters();
if(fm != null) {
return fm.get(name);
} else {
return null;
}
} else {
throw new GateRuntimeException("ActionContext is not an ExtendedActionContext");
}
}
/**
* A method that is supposed to simplify writing to an output file using
* a print stream. This method will either open the stream if it is not
* already open or return an already open stream for the given outFilePath.
* Internally, the open stream is saved in the controller's feature map using the
* key "$$fileprintstream:<outFilePath>".
* <p>
* NOTE: be sure to close the stream using closeFilePrintStream(ctx, outFilePAth)
* this will also ensure that the stream handle is removed from the controller's
* feature map (which is important to happen before the pipeline
* is saved because a stream handle cannot be serialized!)
*
* @param ctx
* @param outFilePath
* @return
*/
public static PrintStream getOrOpenFilePrintStream(ActionContext ctx, String outFilePath) {
File outfile = new File(getDefaultDir(), outFilePath);
FileOutputStream fos;
PrintStream ps;
try {
fos = new FileOutputStream(outfile);
} catch (Exception ex) {
ctx.endPhase();
throw new GateRuntimeException("Exception trying to open File stream to "
+ outfile, ex);
}
try {
ps = new PrintStream(fos);
} catch (Exception ex) {
ctx.endPhase();
try {
fos.close();
} catch (IOException ex1) {
// ignore
}
throw new GateRuntimeException(
"Exception trying to open PrintStream for " + outfile, ex);
}
ctx.getController().getFeatures().put("$$fileprintstream:" + outFilePath, ps);
return ps;
}
public static void closeFilePrintStream(ActionContext ctx, String outFilePath) {
PrintStream ps =
(PrintStream)
ctx.getController().getFeatures().get("$$fileprintstream:" + outFilePath);
if(ps != null) {
try {
ps.close();
} catch(Exception ex) {
// ignore it for now.
}
}
}
/**
* Remove all the annotations of the given annotation type from the whereFrom
* set which are fully contained inside the extent of the covering set.
*
* @param whereFrom Annotation set from which to remove the annotations
* @param covering Annotation set that indicates the range
* @param type Annotation type of the annotations to remove
*/
public static void removeContainedAnnotations(AnnotationSet whereFrom,
AnnotationSet covering, String type) {
removeContainedAnnotations(whereFrom,startOffset(covering),endOffset(covering),type);
}
/**
* Remove all the annotations of the given annotation type from the whereFrom
* set which are fully contained inside the extent of the covering annotation.
*
* @param whereFrom Annotation set from which to remove the annotations
* @param covering Annotation that indicates the range
* @param type Annotation type of the annotations to remove
*/
public static void removeContainedAnnotations(AnnotationSet whereFrom,
Annotation covering, String type) {
removeContainedAnnotations(whereFrom,startOffset(covering),endOffset(covering),type);
}
/**
* Remove all the annotations of the given annotation type from the whereFrom
* set which are fully contained inside the extent of the covering range.
*
* @param whereFrom Annotation set from which to remove the annotations
* @param fromOffset Start offset of covering range
* @param toOffset End offset of covering range
* @param type Annotation type of the annotations to remove
*/
public static void removeContainedAnnotations(AnnotationSet whereFrom,
int fromOffset, int toOffset, String type) {
AnnotationSet whatToRemove =
whereFrom.getContained((long)fromOffset, (long)toOffset);
whereFrom.removeAll(whatToRemove);
}
/**
* Return the String value of a feature from a feature map or an alternate value
* if no value is in the feature map (the entry is null).
* If the value in the feature map is not a String an exception will be
* thrown at runtime when the cast fails.
*
* @param fm
* @param featureName
* @param elseValue
* @return
*/
public static String getStringFeatureOrElse(
FeatureMap fm, String featureName, String elseValue) {
String val = (String)fm.get(featureName);
if(val == null) {
val = elseValue;
}
return val;
}
/**
* Add something of the given type to a set that is expected to be
* the value of the given feature. If the feature is still empty, a set
* will be created.
*
* @param fm
* @param featureName
* @param what
*/
public static <T> void addToFeatureList(FeatureMap fm, String featureName, T what) {
List<T> l = (List<T>)fm.get(featureName);
if(l == null) {
l = new ArrayList<T>();
}
l.add(what);
fm.put(featureName,l);
}
/**
* Add something of the given type to a set that is expected to be
* the value of the given feature. If the feature is still empty, a set
* will be created.
*
* @param fm
* @param featureName
* @param what
*/
public static <T> void addToFeatureSet(FeatureMap fm, String featureName, T what) {
Set<T> l = (Set<T>)fm.get(featureName);
if(l == null) {
l = new HashSet<T>();
}
l.add(what);
fm.put(featureName,l);
}
/**
* Interpret the object as true or false: all objects are interpreted as true unless
* the object is null, or the object's toString method returns one of the strings
* "false", "0", or the empty string. Note: the string is not trimmed so a value of
* e.g. " 0" is not interpreted to be false but true!
*
* @return the interpretation of the object as a boolean value
*/
public static boolean isTrue(Object theObject) {
if(theObject == null) {
return false;
}
String asString = theObject.toString();
if(asString.isEmpty() || asString.equals("0") || asString.equals("false")) {
return false;
}
return true;
}
/**
* Return true if both featuremap contain the same value for the given
* key. Values are considered identical if: both do not exist or if
* both do exist and are equal()
* @param fm1
* @param fm2
* @param key
* @return
*/
public static boolean isEqual(FeatureMap fm1, FeatureMap fm2, String key) {
Object v1 = fm1.get(key);
Object v2 = fm2.get(key);
if(v1 == null) {
if(v2 == null) {
return true;
} else {
return false;
}
} else { // v1 != null
return v1.equals(v2);
}
}
}