Source Code of edu.stanford.nlp.trees.tregex.tsurgeon.AuxiliaryTree

package edu.stanford.nlp.trees.tregex.tsurgeon;


import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Pair;


import java.util.*;
import java.util.regex.Pattern;
import java.util.regex.Matcher;


/**
 * @author Roger Levy (rog@nlp.stanford.edu)
 */
class AuxiliaryTree {


  private final String originalTreeString;
  final Tree tree;
  Tree foot;
  private final IdentityHashMap<Tree,String> nodesToNames; // no one else should be able to get this one.
  private final Map<String,Tree> namesToNodes; // this one has a getter.




  public AuxiliaryTree(Tree tree, boolean mustHaveFoot) {
    originalTreeString = tree.toString();
    this.tree = tree;
    this.foot = findFootNode(tree);
    if (foot == null && mustHaveFoot) {
      throw new TsurgeonParseException("Error -- no foot node found for " + originalTreeString);
    }
    namesToNodes = Generics.newHashMap();
    nodesToNames = new IdentityHashMap<Tree,String>();
    initializeNamesNodesMaps(tree);
  }


  private AuxiliaryTree(Tree tree, Tree foot, Map<String, Tree> namesToNodes, String originalTreeString) {
    this.originalTreeString = originalTreeString;
    this.tree = tree;
    this.foot = foot;
    this.namesToNodes = namesToNodes;
    nodesToNames = null;
  }


  public Map<String, Tree> namesToNodes() {
    return namesToNodes;
  }


  @Override
  public String toString() {
    return originalTreeString;
  }


  /**
   * Copies the Auxiliary tree.  Also, puts the new names->nodes map in the TsurgeonMatcher that called copy.
   */
  public AuxiliaryTree copy(TsurgeonMatcher matcher) {
    Map<String,Tree> newNamesToNodes = Generics.newHashMap();
    Pair<Tree,Tree> result = copyHelper(tree,newNamesToNodes);
    //if(! result.first().dominates(result.second()))
      //System.err.println("Error -- aux tree copy doesn't dominate foot copy.");
    matcher.newNodeNames.putAll(newNamesToNodes);
    return new AuxiliaryTree(result.first(), result.second(), newNamesToNodes, originalTreeString);
  }


  // returns Pair<node,foot>
  private Pair<Tree,Tree> copyHelper(Tree node,Map<String,Tree> newNamesToNodes) {
    Tree clone;
    Tree newFoot = null;
    if (node.isLeaf()) {
      if (node == foot) { // found the foot node; pass it up.
        clone = node.treeFactory().newTreeNode(node.label(),new ArrayList<Tree>(0));
        newFoot = clone;
      } else {
        clone = node.treeFactory().newLeaf(node.label().labelFactory().newLabel(node.label()));
      }
    } else {
      List<Tree> newChildren = new ArrayList<Tree>(node.children().length);
      for (Tree child : node.children()) {
        Pair<Tree,Tree> newChild = copyHelper(child,newNamesToNodes);
        newChildren.add(newChild.first());
        if (newChild.second() != null) {
          if (newFoot != null) {
            System.err.println("Error -- two feet found when copying auxiliary tree " + tree.toString() + "; using last foot found.");
          }
          newFoot = newChild.second();
        }
      }
      clone = node.treeFactory().newTreeNode(node.label().labelFactory().newLabel(node.label()),newChildren);
    }


    if (nodesToNames.containsKey(node))
      newNamesToNodes.put(nodesToNames.get(node),clone);


    return new Pair<Tree,Tree>(clone,newFoot);
  }






  /***********************************************************/
  /* below here is init stuff for finding the foot node.     */
  /***********************************************************/




  private static final String footNodeCharacter = "@";
  private static final Pattern footNodeLabelPattern = Pattern.compile("^(.*)" + footNodeCharacter + '$');
  private static final Pattern escapedFootNodeCharacter = Pattern.compile('\\' + footNodeCharacter);


  /**
   * Returns the foot node of the adjunction tree, which is the terminal node
   * that ends in @.  In the process, turns the foot node into a TreeNode
   * (rather than a leaf), and destructively un-escapes all the escaped
   * instances of @ in the tree.  Note that final @ in a non-terminal node is
   * ignored, and left in.
   */
  private static Tree findFootNode(Tree t) {
    Tree footNode = findFootNodeHelper(t);
    Tree result = footNode;
    if (footNode != null) {
      Tree newFootNode = footNode.treeFactory().newTreeNode(footNode.label(), new ArrayList<Tree>());


      Tree parent = footNode.parent(t);
      if (parent != null) {
        int i = parent.objectIndexOf(footNode);
        parent.setChild(i, newFootNode);
      }


      result = newFootNode;
    }
    return result;
  }


  private static Tree findFootNodeHelper(Tree t) {
    Tree foundDtr = null;
    if (t.isLeaf()) {
      Matcher m = footNodeLabelPattern.matcher(t.label().value());
      if (m.matches()) {
        t.label().setValue(m.group(1));
        return t;
      } else {
        return null;
      }
    }
    for (Tree child : t.children()) {
      Tree thisFoundDtr = findFootNodeHelper(child);
      if (thisFoundDtr != null) {
        if (foundDtr != null) {
          throw new TsurgeonParseException("Error -- two foot nodes in subtree" + t.toString());
        } else {
          foundDtr = thisFoundDtr;
        }
      }
    }
    Matcher m = escapedFootNodeCharacter.matcher(t.label().value());
    t.label().setValue(m.replaceAll(footNodeCharacter));
    return foundDtr;
  }




  /***********************************************************
   * below here is init stuff for getting node -> names maps *
   ***********************************************************/


  // There are two ways in which you can can match the start of a name
  // expression.
  // The first is if you have any number of non-escaping characters
  // preceding an "=" and a name.  This is the ([^\\\\]*) part.
  // The second is if you have any number of any characters, followed
  // by a non-"\" character, as "\" is used to escape the "=".  After
  // that, any number of pairs of "\" are allowed, as we let "\" also
  // escape itself.  After that comes "=" and a name.
  static final Pattern namePattern = Pattern.compile("^((?:[^\\\\]*)|(?:(?:.*[^\\\\])?)(?:\\\\\\\\)*)=([^=]+)$");


  /**
   * Looks for new names, destructively strips them out.
   * Destructively unescapes escaped chars, including "=", as well.
   */
  private void initializeNamesNodesMaps(Tree t) {
    for (Tree node : t.subTreeList()) {
      Matcher m = namePattern.matcher(node.label().value());
      if (m.find()) {
        namesToNodes.put(m.group(2), node);
        nodesToNames.put(node, m.group(2));
        node.label().setValue(m.group(1));
      }
      node.label().setValue(unescape(node.label().value()));
    }
  }


  static String unescape(String input) {
    return input.replaceAll("\\\\(.)", "$1");
  }


}
Source Code of edu.stanford.nlp.trees.tregex.tsurgeon.AuxiliaryTree

Related Classes of edu.stanford.nlp.trees.tregex.tsurgeon.AuxiliaryTree