Source Code of edu.stanford.nlp.pipeline.BinarizerAnnotator

package edu.stanford.nlp.pipeline;


import java.util.Collections;
import java.util.Properties;
import java.util.Set;


import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.parser.lexparser.TreebankLangParserParams;
import edu.stanford.nlp.parser.lexparser.TreeBinarizer;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.Trees;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.ReflectionLoading;


/**
 * This annotator takes unbinarized trees (from the parser annotator
 * or elsewhere) and binarizes them in the attachment.
 * <br>
 * Note that this functionality is also built in to the
 * ParserAnnotator.  However, this can be used in situations where the
 * trees come from somewhere other than the parser.  Conversely, the
 * ParserAnnotator may have more options for the binarizer which are
 * not implemented here.
 *
 * @author John Bauer
 */
public class BinarizerAnnotator implements Annotator {
  private static final String DEFAULT_TLPP_CLASS = "edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams";
  
  final TreeBinarizer binarizer;
  final String tlppClass;


  public BinarizerAnnotator(String annotatorName, Properties props) {
    this.tlppClass = props.getProperty(annotatorName + ".tlppClass", DEFAULT_TLPP_CLASS);
    TreebankLangParserParams tlpp = ReflectionLoading.loadByReflection(tlppClass);
    this.binarizer = TreeBinarizer.simpleTreeBinarizer(tlpp.headFinder(), tlpp.treebankLanguagePack());
  }


  public static String signature(String annotatorName, Properties props) {
    String tlppClass = props.getProperty(annotatorName + ".tlppClass", DEFAULT_TLPP_CLASS);
    return tlppClass;
  }


  @Override
  public void annotate(Annotation annotation) {
    if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
      for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
        doOneSentence(sentence);
      }
    } else {
      throw new RuntimeException("unable to find sentences in: " + annotation);
    }
  }


  private void doOneSentence(CoreMap sentence) {
    Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
    Tree binarized;
    if (isBinarized(tree)) {
      binarized = tree;
    } else {
      binarized = binarizer.transformTree(tree);
    }
    Trees.convertToCoreLabels(binarized);
    sentence.set(TreeCoreAnnotations.BinarizedTreeAnnotation.class, binarized);
  }  


  /**
   * Recursively check that a tree is not already binarized.
   */
  private boolean isBinarized(Tree tree) {
    if (tree.isLeaf()) {
      return true;
    }


    if (tree.children().length > 2) {
      return false;
    }


    for (Tree child : tree.children()) {
      if (!isBinarized(child)) {
        return false;
      }
    }


    return true;
  }


  @Override
  public Set<Requirement> requires() {
    return Collections.singleton(PARSE_REQUIREMENT);
  }


  @Override
  public Set<Requirement> requirementsSatisfied() {
    return Collections.singleton(BINARIZED_TREES_REQUIREMENT);
  }
}
Source Code of edu.stanford.nlp.pipeline.BinarizerAnnotator

Related Classes of edu.stanford.nlp.pipeline.BinarizerAnnotator