Package edu.stanford.nlp.trees.tregex

Source Code of edu.stanford.nlp.trees.tregex.TregexMatcher

// TregexMatcher
// Copyright (c) 2004-2007 The Board of Trustees of
// The Leland Stanford Junior University. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
//
//
// For more information, bug reports, fixes, contact:
//    Christopher Manning
//    Dept of Computer Science, Gates 1A
//    Stanford CA 94305-9010
//    USA
//    Support/Questions: parser-user@lists.stanford.edu
//    Licensing: parser-support@lists.stanford.edu
//    http://www-nlp.stanford.edu/software/tregex.shtml

package edu.stanford.nlp.trees.tregex;

import java.util.*;

import edu.stanford.nlp.trees.HasParent;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.Tree;

/**
* A TregexMatcher can be used to match a {@link TregexPattern} against a {@link edu.stanford.nlp.trees.Tree}.
* Usage should be similar to a {@link java.util.regex.Matcher}.
*
* @author Galen Andrew
*/
public abstract class TregexMatcher {

  final Tree root;
  Tree tree;
  IdentityHashMap<Tree, Tree> nodesToParents;
  final Map<String, Tree> namesToNodes;
  final VariableStrings variableStrings;

  // these things are used by "find"
  Iterator<Tree> findIterator;
  Tree findCurrent;

  final HeadFinder headFinder;

  TregexMatcher(Tree root, Tree tree, IdentityHashMap<Tree, Tree> nodesToParents, Map<String, Tree> namesToNodes, VariableStrings variableStrings, HeadFinder headFinder) {
    this.root = root;
    this.tree = tree;
    this.nodesToParents = nodesToParents;
    this.namesToNodes = namesToNodes;
    this.variableStrings = variableStrings;
    this.headFinder = headFinder;
  }

  public HeadFinder getHeadFinder() { return this.headFinder; }

  /**
   * Resets the matcher so that its search starts over.
   */
  public void reset() {
    findIterator = null;
    findCurrent = null;
    namesToNodes.clear();
    variableStrings.reset();
  }

  /**
   * Resets the matcher to start searching on the given tree for matching subexpressions.
   *
   * @param tree The tree to start searching on
   */
  void resetChildIter(Tree tree) {
    this.tree = tree;
    resetChildIter();
  }

  /**
   * Resets the matcher to restart search for matching subexpressions
   */
  void resetChildIter() {
  }

  /**
   * Does the pattern match the tree?  It's actually closer to java.util.regex's
   * "lookingAt" in that the root of the tree has to match the root of the pattern
   * but the whole tree does not have to be "accounted for".  Like with lookingAt
   * the beginning of the string has to match the pattern, but the whole string
   * doesn't have to be "accounted for".
   *
   * @return whether the tree matches the pattern
   */
  public abstract boolean matches();

  /** Rests the matcher and tests if it matches on the tree when rooted at <code>node</code>.
   *
   *  @param node The node where the match is checked
   *  @return whether the matcher matches at node
   */
  public boolean matchesAt(Tree node) {
    resetChildIter(node);
    return matches();
  }

  /**
   * Get the last matching tree -- that is, the tree node that matches the root node of the pattern.
   * Returns null if there has not been a match.
   *
   * @return last match
   */
  public abstract Tree getMatch();


  /**
   * Find the next match of the pattern on the tree
   *
   * @return whether there is a match somewhere in the tree
   */
  public boolean find() {
    if (findIterator == null) {
      findIterator = root.iterator();
    }
    if (findCurrent != null && matches()) {
      return true;
    }
    while (findIterator.hasNext()) {
      findCurrent = findIterator.next();
      resetChildIter(findCurrent);
      if (matches()) {
        return true;
      }
    }
    return false;
  }

  /**
   * Similar to {@code find()}, but matches only if {@code node} is
   * the root of the match.  All other matches are ignored.  If you
   * know you are looking for matches with a particular root, this is
   * much faster than iterating over all matches and taking only the
   * ones that work and faster than altering the tregex to match only
   * the correct node.
   * <br>
   * If called multiple times with the same node, this will return
   * subsequent matches in the same manner as find() returns
   * subsequent matches in the same tree.  If you want to call this using
   * the same TregexMatcher on more than one node, call reset() first;
   * otherwise, an AssertionError will be thrown.
   */
  public boolean findAt(Tree node) {
    if (findCurrent != null && findCurrent != node) {
      throw new AssertionError("Error: must call reset() before changing nodes for a call to findAt");
    }
    if (findCurrent != null) {
      return matches();
    }
    findCurrent = node;
    resetChildIter(findCurrent);
    return matches();
  }

  /**
   * Find the next match of the pattern on the tree such that the
   * matching node (that is, the tree node matching the root node of
   * the pattern) differs from the previous matching node.
   * @return true iff another matching node is found.
   */
  public boolean findNextMatchingNode() {
    Tree lastMatchingNode = getMatch();
    while(find()) {
      if(getMatch() != lastMatchingNode)
        return true;
    }
    return false;
  }

  /**
   * Returns the node labeled with <code>name</code> in the pattern.
   *
   * @param name the name of the node, specified in the pattern.
   * @return node labeled by the name
   */
  public Tree getNode(String name) {
    return namesToNodes.get(name);
  }

  public Set<String> getNodeNames() {
    return namesToNodes.keySet();
  }

  Tree getParent(Tree node) {
    if (node instanceof HasParent) {
      return node.parent();
    }
    if (nodesToParents == null) {
      nodesToParents = new IdentityHashMap<Tree, Tree>();
    }
    if (nodesToParents.size() == 0) {
      fillNodesToParents(root, null);
    }
    return nodesToParents.get(node);
  }

  private void fillNodesToParents(Tree node, Tree parent) {
    nodesToParents.put(node, parent);
    for (Tree child : node.children()) {
      fillNodesToParents(child, node);
    }
  }

  Tree getRoot() {
    return root;
  }

  /**
   * If there is a current match, and that match involves setting this
   * particular variable string, this returns that string.  Otherwise,
   * it returns null.
   */
  public String getVariableString(String var) {
    return variableStrings.getString(var);
  }
}
TOP

Related Classes of edu.stanford.nlp.trees.tregex.TregexMatcher

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.