package edu.stanford.nlp.time;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.time.TimeAnnotations;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.CoreMap;
import java.util.function.Function;
import edu.stanford.nlp.util.Iterables;
public class TimexTreeAnnotator implements Annotator {
public static enum MatchType {ExactMatch, SmallestEnclosing}
private MatchType matchType;
public TimexTreeAnnotator(MatchType matchType) {
this.matchType = matchType;
}
public void annotate(Annotation document) {
for (CoreMap sentence: document.get(CoreAnnotations.SentencesAnnotation.class)) {
final List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
tree.indexSpans(0);
// add a tree to each timex annotation
for (CoreMap timexAnn: sentence.get(TimeAnnotations.TimexAnnotations.class)) {
Tree subtree;
final int timexBegin = beginOffset(timexAnn);
final int timexEnd = endOffset(timexAnn);
Iterable<Tree> possibleMatches;
switch (this.matchType) {
// only use trees that match exactly
case ExactMatch:
possibleMatches = Iterables.filter(tree, tree1 -> {
int treeBegin = beginOffset(tree, tokens);
int treeEnd = endOffset(tree, tokens);
return treeBegin == timexBegin && timexEnd == treeEnd;
});
Iterator<Tree> treeIter = possibleMatches.iterator();
subtree = treeIter.hasNext() ? treeIter.next() : null;
break;
// select the smallest enclosing tree
case SmallestEnclosing:
possibleMatches = Iterables.filter(tree, tree1 -> {
int treeBegin = beginOffset(tree, tokens);
int treeEnd = endOffset(tree, tokens);
return treeBegin <= timexBegin && timexEnd <= treeEnd;
});
List<Tree> sortedMatches = CollectionUtils.toList(possibleMatches);
Collections.sort(sortedMatches, (tree1, tree2) -> {
Integer width1 = endOffset(tree1, tokens) - beginOffset(tree1, tokens);
Integer width2 = endOffset(tree2, tokens) - endOffset(tree2, tokens);
return width1.compareTo(width2);
});
subtree = sortedMatches.get(0);
break;
// more cases could go here if they're added
default:
throw new RuntimeException("unexpected match type");
}
// add the subtree to the time annotation
if (subtree != null) {
timexAnn.set(TreeCoreAnnotations.TreeAnnotation.class, subtree);
}
}
}
}
private static int beginOffset(Tree tree, List<CoreLabel> tokens) {
CoreMap label = (CoreMap)tree.label();
int beginToken = label.get(CoreAnnotations.BeginIndexAnnotation.class);
return beginOffset(tokens.get(beginToken));
}
private static int endOffset(Tree tree, List<CoreLabel> tokens) {
CoreMap label = (CoreMap)tree.label();
int endToken = label.get(CoreAnnotations.EndIndexAnnotation.class);
if (endToken > tokens.size()) {
String msg = "no token %d in tree:\n%s\ntokens:\n%s";
throw new RuntimeException(String.format(msg, endToken - 1, tree, tokens));
}
return endOffset(tokens.get(endToken - 1));
}
private static int beginOffset(CoreMap map) {
return map.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
}
private static int endOffset(CoreMap map) {
return map.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
}
@Override
public Set<Requirement> requires() {
return TOKENIZE_AND_SSPLIT;
}
@Override
public Set<Requirement> requirementsSatisfied() {
// TODO: not sure what goes here
return Collections.emptySet();
}
}