Package edu.stanford.nlp.pipeline

Source Code of edu.stanford.nlp.pipeline.BinarizerAnnotator

package edu.stanford.nlp.pipeline;

import java.util.Collections;
import java.util.Properties;
import java.util.Set;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.parser.lexparser.TreebankLangParserParams;
import edu.stanford.nlp.parser.lexparser.TreeBinarizer;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.Trees;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.ReflectionLoading;

/**
* This annotator takes unbinarized trees (from the parser annotator
* or elsewhere) and binarizes them in the attachment.
* <br>
* Note that this functionality is also built in to the
* ParserAnnotator.  However, this can be used in situations where the
* trees come from somewhere other than the parser.  Conversely, the
* ParserAnnotator may have more options for the binarizer which are
* not implemented here.
*
* @author John Bauer
*/
public class BinarizerAnnotator implements Annotator {
  private static final String DEFAULT_TLPP_CLASS = "edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams";
 
  final TreeBinarizer binarizer;
  final String tlppClass;

  public BinarizerAnnotator(String annotatorName, Properties props) {
    this.tlppClass = props.getProperty(annotatorName + ".tlppClass", DEFAULT_TLPP_CLASS);
    TreebankLangParserParams tlpp = ReflectionLoading.loadByReflection(tlppClass);
    this.binarizer = TreeBinarizer.simpleTreeBinarizer(tlpp.headFinder(), tlpp.treebankLanguagePack());
  }

  public static String signature(String annotatorName, Properties props) {
    String tlppClass = props.getProperty(annotatorName + ".tlppClass", DEFAULT_TLPP_CLASS);
    return tlppClass;
  }

  @Override
  public void annotate(Annotation annotation) {
    if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
      for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
        doOneSentence(sentence);
      }
    } else {
      throw new RuntimeException("unable to find sentences in: " + annotation);
    }
  }

  private void doOneSentence(CoreMap sentence) {
    Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
    Tree binarized;
    if (isBinarized(tree)) {
      binarized = tree;
    } else {
      binarized = binarizer.transformTree(tree);
    }
    Trees.convertToCoreLabels(binarized);
    sentence.set(TreeCoreAnnotations.BinarizedTreeAnnotation.class, binarized);
 

  /**
   * Recursively check that a tree is not already binarized.
   */
  private boolean isBinarized(Tree tree) {
    if (tree.isLeaf()) {
      return true;
    }

    if (tree.children().length > 2) {
      return false;
    }

    for (Tree child : tree.children()) {
      if (!isBinarized(child)) {
        return false;
      }
    }

    return true;
  }

  @Override
  public Set<Requirement> requires() {
    return Collections.singleton(PARSE_REQUIREMENT);
  }

  @Override
  public Set<Requirement> requirementsSatisfied() {
    return Collections.singleton(BINARIZED_TREES_REQUIREMENT);
  }
}
TOP

Related Classes of edu.stanford.nlp.pipeline.BinarizerAnnotator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.