Package edu.stanford.nlp.time

Source Code of edu.stanford.nlp.time.TimexTreeAnnotator

package edu.stanford.nlp.time;

import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.time.TimeAnnotations;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.CoreMap;
import java.util.function.Function;
import edu.stanford.nlp.util.Iterables;

public class TimexTreeAnnotator implements Annotator {
 
  public static enum MatchType {ExactMatch, SmallestEnclosing}
 
  private MatchType matchType;
 
  public TimexTreeAnnotator(MatchType matchType) {
    this.matchType = matchType;
  }
 
  public void annotate(Annotation document) {
    for (CoreMap sentence: document.get(CoreAnnotations.SentencesAnnotation.class)) {
      final List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
      Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
      tree.indexSpans(0);
     
      // add a tree to each timex annotation
      for (CoreMap timexAnn: sentence.get(TimeAnnotations.TimexAnnotations.class)) {
        Tree subtree;
        final int timexBegin = beginOffset(timexAnn);
        final int timexEnd = endOffset(timexAnn);
        Iterable<Tree> possibleMatches;
        switch (this.matchType) {
         
          // only use trees that match exactly
        case ExactMatch:
          possibleMatches = Iterables.filter(tree, tree1 -> {
            int treeBegin = beginOffset(tree, tokens);
            int treeEnd = endOffset(tree, tokens);
            return treeBegin == timexBegin && timexEnd == treeEnd;
          });
          Iterator<Tree> treeIter = possibleMatches.iterator();
          subtree = treeIter.hasNext() ? treeIter.next() : null;
          break;
         
          // select the smallest enclosing tree
        case SmallestEnclosing:
          possibleMatches = Iterables.filter(tree, tree1 -> {
            int treeBegin = beginOffset(tree, tokens);
            int treeEnd = endOffset(tree, tokens);
            return treeBegin <= timexBegin && timexEnd <= treeEnd;
          });
          List<Tree> sortedMatches = CollectionUtils.toList(possibleMatches);
          Collections.sort(sortedMatches, (tree1, tree2) -> {
            Integer width1 = endOffset(tree1, tokens) - beginOffset(tree1, tokens);
            Integer width2 = endOffset(tree2, tokens) - endOffset(tree2, tokens);
            return width1.compareTo(width2);
          });
          subtree = sortedMatches.get(0);
          break;
         
          // more cases could go here if they're added
        default:
          throw new RuntimeException("unexpected match type");
        }
   
        // add the subtree to the time annotation
        if (subtree != null) {
          timexAnn.set(TreeCoreAnnotations.TreeAnnotation.class, subtree);
        }
      }
    }
  }
 
  private static int beginOffset(Tree tree, List<CoreLabel> tokens) {
    CoreMap label = (CoreMap)tree.label();
    int beginToken = label.get(CoreAnnotations.BeginIndexAnnotation.class);
    return beginOffset(tokens.get(beginToken));
  }
 
  private static int endOffset(Tree tree, List<CoreLabel> tokens) {
    CoreMap label = (CoreMap)tree.label();
    int endToken = label.get(CoreAnnotations.EndIndexAnnotation.class);
    if (endToken > tokens.size()) {
      String msg = "no token %d in tree:\n%s\ntokens:\n%s";
      throw new RuntimeException(String.format(msg, endToken - 1, tree, tokens));
    }
    return endOffset(tokens.get(endToken - 1));
  }
 
  private static int beginOffset(CoreMap map) {
    return map.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
  }
 
  private static int endOffset(CoreMap map) {
    return map.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
  }

  @Override
  public Set<Requirement> requires() {
    return TOKENIZE_AND_SSPLIT;
  }

  @Override
  public Set<Requirement> requirementsSatisfied() {
    // TODO: not sure what goes here
    return Collections.emptySet();
  }
}
TOP

Related Classes of edu.stanford.nlp.time.TimexTreeAnnotator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.