Package edu.stanford.nlp.semgraph.semgrex.ssurgeon

Source Code of edu.stanford.nlp.semgraph.semgrex.ssurgeon.SsurgeonPattern

package edu.stanford.nlp.semgraph.semgrex.ssurgeon;

import java.io.*;
import java.util.*;

import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphFactory;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.semgraph.semgrex.ssurgeon.pred.SsurgPred;
import edu.stanford.nlp.semgraph.semgrex.*;
import edu.stanford.nlp.util.Generics;

/**
* This represents a source pattern and a subsequent edit script, or a sequence
* of successive in-place edits to perform on a SemanticGraph. 
*
* Though the SemgrexMatcher resulting from the Semgrex match over the
* SemanticGraph is available to the edit, currently the nodes and edges to be affected
* should be named, in order for the edits to identify nodes easily.  See the constructor
* for each edit type for appropriate syntax.
*
* NOTE: the edits are currently destructive.  If you wish to preserve your graph, make a copy.
* @author yeh1
*
*/
public class SsurgeonPattern {
  protected String UID;
  protected String notes = "";
  protected List<SsurgeonEdit> editScript;
  protected SemgrexPattern semgrexPattern;
  protected SemanticGraph semgrexGraph = null; // Source graph semgrex pattern was derived from (used for pattern learning)
  protected SsurgPred predicateTest = null; // Predicate tests to apply, if non-null, must return true to execute.

  // NodeMap is used to maintain a list of named nodes outside of the set in the SemgrexMatcher.
  // Primarily for newly inserted nodes.
  private Map<String, IndexedWord> nodeMap = null;
 
  public SsurgeonPattern(String UID, SemgrexPattern pattern, List<SsurgeonEdit> editScript) {
    semgrexPattern = pattern;
    this.UID = UID;
    this.editScript = editScript;
  }

  public SsurgeonPattern(String UID, SemgrexPattern pattern) {
    this.UID = UID;
    this.semgrexPattern = pattern;
    this.editScript = new ArrayList<SsurgeonEdit>();
  }

  public SsurgeonPattern(String UID, SemgrexPattern pattern, SemanticGraph patternGraph) {
    this(UID, pattern);
    this.semgrexGraph = patternGraph;
  }

  public SsurgeonPattern(SemgrexPattern pattern, List<SsurgeonEdit> editScript) {
    this(pattern.toString(), pattern, editScript);
  }

  public SsurgeonPattern(SemgrexPattern pattern) {
    this(pattern.toString(), pattern);
  }

  public SsurgeonPattern(SemgrexPattern pattern, SemanticGraph patternGraph) {
    this(pattern);
    this.semgrexGraph = patternGraph;
  }

  public void setPredicate(SsurgPred predicateTest) {
    this.predicateTest = predicateTest;
  }

  public void addEdit(SsurgeonEdit newEdit) {
    newEdit.setOwningPattern(this);
    editScript.add(newEdit);
  }

  /**
   * Adds the node to the set of named nodes registered, using the given name.
   */
  public void addNamedNode(IndexedWord node, String name) {
    nodeMap.put(name, node);
  }
 
  public IndexedWord getNamedNode(String name) {
    return nodeMap.get(name);
  }
 
  @Override
  public String toString() {
    StringWriter buf = new StringWriter();
    buf.append("Semgrex Pattern: UID=");
    buf.write(getUID());
    buf.write("\nNotes: ");
    buf.write(getNotes());
    buf.write("\n");
    buf.append(semgrexPattern.toString());
    if (predicateTest != null) {
      buf.write("\nPredicate: ");
      buf.write(predicateTest.toString());
    }
    buf.append("\nEdit script:\n");
    for (SsurgeonEdit edit : editScript) {
      buf.append("\t");
      buf.append(edit.toString());
      buf.append("\n");
    }
    return buf.toString();
  }

  /**
   * Executes the given sequence of edits against the SemanticGraph.
   *
   *  NOTE: because the graph could be destructively modified, the matcher may be invalid, and
   *  thus the pattern will only be executed against the first match.  Repeat this routine on the returned
   *  SemanticGraph to reapply on other matches.
   * 
   *  TODO: create variant that returns set of expansions while matcher.find() returns true
   * @param sg SemanticGraph to operate over (NOT destroyed/modified).
   * @return True if a match was found and executed, otherwise false.
   */
  public Collection<SemanticGraph> execute(SemanticGraph sg) throws Exception {
    Collection<SemanticGraph> generated = new ArrayList<SemanticGraph>();
    SemgrexMatcher matcher = semgrexPattern.matcher(sg);
    nextMatch:
    while (matcher.find()) {
      // NOTE: Semgrex can match two named nodes to the same node.  In this case, we simply,
      // check the named nodes, and if there are any collisions, we throw out this match.
      Set<String> nodeNames = matcher.getNodeNames();
      Set<IndexedWord> seen = Generics.newHashSet();
      for (String name : nodeNames) {
        IndexedWord curr = matcher.getNode(name);
        if (seen.contains(curr))
          break nextMatch;
        seen.add(curr);
//        System.out.println("REDUNDANT NODES FOUDN IN SEMGREX MATCH");
      }
     
      // if we do have to test, assemble the tests and arguments based off of the current
      // match and test.  If false, continue, else execute as normal.
      if (predicateTest != null) {       
        if (!predicateTest.test(matcher))
          continue;
      }
//      SemanticGraph tgt = new SemanticGraph(sg);
      // Generate a new graph, since we don't want to mutilate the original graph.
      // We use the same nodes, since the matcher operates off of those.
      SemanticGraph tgt = SemanticGraphFactory.duplicateKeepNodes(sg);
      nodeMap = Generics.newHashMap();
      for (SsurgeonEdit edit : editScript) {     
        edit.evaluate(tgt, matcher);
      }
      generated.add(tgt);
    }
    return generated;
  }

  /**
   * Executes the Ssurgeon edit, but with the given Semgrex Pattern, instead of the one attached to this
   * pattern.
   *
   * NOTE: Predicate tests are still active here, and any named nodes required for evaluation must be
   * present.
   */
  public Collection<SemanticGraph> execute(SemanticGraph sg, SemgrexPattern overridePattern) throws Exception {
    SemgrexMatcher matcher = overridePattern.matcher(sg);
    Collection<SemanticGraph> generated = new ArrayList<SemanticGraph>();
    while (matcher.find()) {
      if (predicateTest != null) {       
        if (!predicateTest.test(matcher))
          continue;
      }
      // We reset the named node map with each edit set, since these edits
      // should exist in a separate graph for each unique Semgrex match.
      nodeMap = Generics.newHashMap();
      SemanticGraph tgt = new SemanticGraph(sg);
      for (SsurgeonEdit edit : editScript) {     
        edit.evaluate(tgt, matcher);
      }
      generated.add(tgt);
    }
    return generated;
  }


  public SemgrexPattern getSemgrexPattern() {
    return semgrexPattern;
  }

  /* ------
   * XML output and input
   * ------ */
  public static final String ELT_LIST_TAG = "ssurgeon-pattern-list";
  public static final String UID_ELEM_TAG = "uid";
  public static final String RESOURCE_TAG = "resource";
  public static final String SSURGEON_ELEM_TAG = "ssurgeon-pattern";
  public static final String SEMGREX_ELEM_TAG = "semgrex";
  public static final String SEMGREX_GRAPH_ELEM_TAG = "semgrex-graph";
  public static final String PREDICATE_TAG = "predicate";
  public static final String PREDICATE_AND_TAG = "and";
  public static final String PREDICATE_OR_TAG = "or";
  public static final String PRED_WORDLIST_TEST_TAG = "wordlist-test";
  public static final String PRED_ID_ATTR = "id";
  public static final String NOTES_ELEM_TAG = "notes";
  public static final String EDIT_LIST_ELEM_TAG = "edit-list";
  public static final String EDIT_ELEM_TAG = "edit";
  public static final String ORDINAL_ATTR = "ordinal";

  public List<SsurgeonEdit> getEditScript() {
    return editScript;
  }

  public SemanticGraph getSemgrexGraph() {
    return semgrexGraph;
  }

  public String getNotes() {
    return notes;
  }

  public void setNotes(String notes) {
    this.notes = notes;
  }

  public String getUID() {
    return UID;
  }

  public void setUID(String uid) {
    UID = uid;
  }

  /**
   * Simply reads the given Ssurgeon pattern from file (args[0]), parses it, and prints it out.
   * Use this for debugging the class and patterns.
   */
  public static void main(String[] args) {
    if (args.length == 0) {
      System.out.println("Usage: SsurgeonPattern FILEPATH [\"COMPACT_SEMANTIC_GRAPH\"], FILEPATH=path to ssurgeon pattern to parse and print., SENTENCE=test sentence (in quotes)");
      System.exit(-1);
    }

    File tgtFile = new File(args[0]);
    try {
      Ssurgeon.inst().initLog(new File("./ssurgeon.log"));
      Ssurgeon.inst().setLogPrefix("SsurgeonPattern test");
      List<SsurgeonPattern> patterns = Ssurgeon.inst().readFromFile(tgtFile);
      for (SsurgeonPattern pattern : patterns) {
        System.out.println("- - - - -");
        System.out.println(pattern);
      }
      if (args.length > 1) {
        for (int i=1; i<args.length;i++) {
          String text = args[i];
          SemanticGraph sg = SemanticGraph.valueOf(text);
          Collection<SemanticGraph> generated = Ssurgeon.inst().exhaustFromPatterns(patterns, sg);
          System.out.println("\n= = = = = = = = = =\nSrc text = "+text);
          System.out.println(sg.toCompactString());
          System.out.println("# generated  = "+generated.size());
          for (SemanticGraph genSg : generated) {
            System.out.println(genSg);
            System.out.println(". . . . .");
          }
        }
      }
    } catch (Exception e) {
      e.printStackTrace();
    }
  }

}
TOP

Related Classes of edu.stanford.nlp.semgraph.semgrex.ssurgeon.SsurgeonPattern

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.