Package edu.stanford.nlp.trees.international.tuebadz

Source Code of edu.stanford.nlp.trees.international.tuebadz.TueBaDZPennTreeNormalizer

package edu.stanford.nlp.trees.international.tuebadz;

import java.util.ArrayList;
import java.util.List;

import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.TreeNormalizer;
import edu.stanford.nlp.trees.TreebankLanguagePack;


/**
* Tree normalizer for the TueBaDZ treebank.
*
* (An adaptation of Roger Levy's NegraPennTreeNormalizer.)
*
* @author Wolfgang Maier (wmaier@sfs.uni-tuebingen.de)
*/
public class TueBaDZPennTreeNormalizer extends TreeNormalizer {

  /** How to clean up node labels: 0 = do nothing, 1 = keep category and
   *  function, 2 = just category.
   */
  private final int nodeCleanup;
  private final String root;
  protected final TreebankLanguagePack tlp;
  private List<TreeNormalizer> tns = new ArrayList<TreeNormalizer>();

  public String rootSymbol() {
    return root;
  }

//  public TueBaDZPennTreeNormalizer() {
//    this(new TueBaDZLanguagePack(), 0);
//  }

  public TueBaDZPennTreeNormalizer(TreebankLanguagePack tlp, int nodeCleanup) {
    this.tlp = tlp;
    this.nodeCleanup = nodeCleanup;
    root = tlp.startSymbol();
  }
 
  public TueBaDZPennTreeNormalizer(TreebankLanguagePack tlp, int nodeCleanup, List<TreeNormalizer> tns) {
    this.tlp = tlp;
    this.nodeCleanup = nodeCleanup;
    root = tlp.startSymbol();
    this.tns.addAll(tns);
  }

  /**
   * Normalizes a leaf contents.
   * This implementation interns the leaf.
   */
  @Override
  public String normalizeTerminal(String leaf) {
    // We could unquote * and / with backslash \ in front of them
    return leaf.intern();
  }


  /**
   * Normalizes a nonterminal contents.
   * This implementation strips functional tags, etc. and interns the
   * nonterminal.
   */
  @Override
  public String normalizeNonterminal(String category) {
    return cleanUpLabel(category).intern();
  }

  /**
   * Remove things like hyphened functional tags and equals from the
   * end of a node label.
   */
  protected String cleanUpLabel(String label) {
    if (label == null) {
      return root;
    } else if (nodeCleanup == 1) {
      return tlp.categoryAndFunction(label);
    } else if (nodeCleanup == 2) {
      return tlp.basicCategory(label);
    } else {
      return label;
    }
  }


  /**
   * Normalize a whole tree.
   * TueBa-D/Z adaptation. Fixes trees with non-unary roots, does nothing else.
   */
  @Override
  public Tree normalizeWholeTree(Tree tree, TreeFactory tf) {
    if (tree.label().value().equals(root) && tree.children().length > 1) {
      Tree underRoot = tree.treeFactory().newTreeNode(root, tree.getChildrenAsList());
      tree.setChildren(new Tree[1]);
      tree.setChild(0, underRoot);

    }
    // we just want the non-unary root fixed.
    return tree;
  }

  private static final long serialVersionUID = 8009544230321390490L;

}
TOP

Related Classes of edu.stanford.nlp.trees.international.tuebadz.TueBaDZPennTreeNormalizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.