package edu.stanford.nlp.semgraph.semgrex.ssurgeon;
import java.io.*;
import java.util.*;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphFactory;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.semgraph.semgrex.ssurgeon.pred.SsurgPred;
import edu.stanford.nlp.semgraph.semgrex.*;
import edu.stanford.nlp.util.Generics;
/**
* This represents a source pattern and a subsequent edit script, or a sequence
* of successive in-place edits to perform on a SemanticGraph.
*
* Though the SemgrexMatcher resulting from the Semgrex match over the
* SemanticGraph is available to the edit, currently the nodes and edges to be affected
* should be named, in order for the edits to identify nodes easily. See the constructor
* for each edit type for appropriate syntax.
*
* NOTE: the edits are currently destructive. If you wish to preserve your graph, make a copy.
* @author yeh1
*
*/
public class SsurgeonPattern {
protected String UID;
protected String notes = "";
protected List<SsurgeonEdit> editScript;
protected SemgrexPattern semgrexPattern;
protected SemanticGraph semgrexGraph = null; // Source graph semgrex pattern was derived from (used for pattern learning)
protected SsurgPred predicateTest = null; // Predicate tests to apply, if non-null, must return true to execute.
// NodeMap is used to maintain a list of named nodes outside of the set in the SemgrexMatcher.
// Primarily for newly inserted nodes.
private Map<String, IndexedWord> nodeMap = null;
public SsurgeonPattern(String UID, SemgrexPattern pattern, List<SsurgeonEdit> editScript) {
semgrexPattern = pattern;
this.UID = UID;
this.editScript = editScript;
}
public SsurgeonPattern(String UID, SemgrexPattern pattern) {
this.UID = UID;
this.semgrexPattern = pattern;
this.editScript = new ArrayList<SsurgeonEdit>();
}
public SsurgeonPattern(String UID, SemgrexPattern pattern, SemanticGraph patternGraph) {
this(UID, pattern);
this.semgrexGraph = patternGraph;
}
public SsurgeonPattern(SemgrexPattern pattern, List<SsurgeonEdit> editScript) {
this(pattern.toString(), pattern, editScript);
}
public SsurgeonPattern(SemgrexPattern pattern) {
this(pattern.toString(), pattern);
}
public SsurgeonPattern(SemgrexPattern pattern, SemanticGraph patternGraph) {
this(pattern);
this.semgrexGraph = patternGraph;
}
public void setPredicate(SsurgPred predicateTest) {
this.predicateTest = predicateTest;
}
public void addEdit(SsurgeonEdit newEdit) {
newEdit.setOwningPattern(this);
editScript.add(newEdit);
}
/**
* Adds the node to the set of named nodes registered, using the given name.
*/
public void addNamedNode(IndexedWord node, String name) {
nodeMap.put(name, node);
}
public IndexedWord getNamedNode(String name) {
return nodeMap.get(name);
}
@Override
public String toString() {
StringWriter buf = new StringWriter();
buf.append("Semgrex Pattern: UID=");
buf.write(getUID());
buf.write("\nNotes: ");
buf.write(getNotes());
buf.write("\n");
buf.append(semgrexPattern.toString());
if (predicateTest != null) {
buf.write("\nPredicate: ");
buf.write(predicateTest.toString());
}
buf.append("\nEdit script:\n");
for (SsurgeonEdit edit : editScript) {
buf.append("\t");
buf.append(edit.toString());
buf.append("\n");
}
return buf.toString();
}
/**
* Executes the given sequence of edits against the SemanticGraph.
*
* NOTE: because the graph could be destructively modified, the matcher may be invalid, and
* thus the pattern will only be executed against the first match. Repeat this routine on the returned
* SemanticGraph to reapply on other matches.
*
* TODO: create variant that returns set of expansions while matcher.find() returns true
* @param sg SemanticGraph to operate over (NOT destroyed/modified).
* @return True if a match was found and executed, otherwise false.
*/
public Collection<SemanticGraph> execute(SemanticGraph sg) throws Exception {
Collection<SemanticGraph> generated = new ArrayList<SemanticGraph>();
SemgrexMatcher matcher = semgrexPattern.matcher(sg);
nextMatch:
while (matcher.find()) {
// NOTE: Semgrex can match two named nodes to the same node. In this case, we simply,
// check the named nodes, and if there are any collisions, we throw out this match.
Set<String> nodeNames = matcher.getNodeNames();
Set<IndexedWord> seen = Generics.newHashSet();
for (String name : nodeNames) {
IndexedWord curr = matcher.getNode(name);
if (seen.contains(curr))
break nextMatch;
seen.add(curr);
// System.out.println("REDUNDANT NODES FOUDN IN SEMGREX MATCH");
}
// if we do have to test, assemble the tests and arguments based off of the current
// match and test. If false, continue, else execute as normal.
if (predicateTest != null) {
if (!predicateTest.test(matcher))
continue;
}
// SemanticGraph tgt = new SemanticGraph(sg);
// Generate a new graph, since we don't want to mutilate the original graph.
// We use the same nodes, since the matcher operates off of those.
SemanticGraph tgt = SemanticGraphFactory.duplicateKeepNodes(sg);
nodeMap = Generics.newHashMap();
for (SsurgeonEdit edit : editScript) {
edit.evaluate(tgt, matcher);
}
generated.add(tgt);
}
return generated;
}
/**
* Executes the Ssurgeon edit, but with the given Semgrex Pattern, instead of the one attached to this
* pattern.
*
* NOTE: Predicate tests are still active here, and any named nodes required for evaluation must be
* present.
*/
public Collection<SemanticGraph> execute(SemanticGraph sg, SemgrexPattern overridePattern) throws Exception {
SemgrexMatcher matcher = overridePattern.matcher(sg);
Collection<SemanticGraph> generated = new ArrayList<SemanticGraph>();
while (matcher.find()) {
if (predicateTest != null) {
if (!predicateTest.test(matcher))
continue;
}
// We reset the named node map with each edit set, since these edits
// should exist in a separate graph for each unique Semgrex match.
nodeMap = Generics.newHashMap();
SemanticGraph tgt = new SemanticGraph(sg);
for (SsurgeonEdit edit : editScript) {
edit.evaluate(tgt, matcher);
}
generated.add(tgt);
}
return generated;
}
public SemgrexPattern getSemgrexPattern() {
return semgrexPattern;
}
/* ------
* XML output and input
* ------ */
public static final String ELT_LIST_TAG = "ssurgeon-pattern-list";
public static final String UID_ELEM_TAG = "uid";
public static final String RESOURCE_TAG = "resource";
public static final String SSURGEON_ELEM_TAG = "ssurgeon-pattern";
public static final String SEMGREX_ELEM_TAG = "semgrex";
public static final String SEMGREX_GRAPH_ELEM_TAG = "semgrex-graph";
public static final String PREDICATE_TAG = "predicate";
public static final String PREDICATE_AND_TAG = "and";
public static final String PREDICATE_OR_TAG = "or";
public static final String PRED_WORDLIST_TEST_TAG = "wordlist-test";
public static final String PRED_ID_ATTR = "id";
public static final String NOTES_ELEM_TAG = "notes";
public static final String EDIT_LIST_ELEM_TAG = "edit-list";
public static final String EDIT_ELEM_TAG = "edit";
public static final String ORDINAL_ATTR = "ordinal";
public List<SsurgeonEdit> getEditScript() {
return editScript;
}
public SemanticGraph getSemgrexGraph() {
return semgrexGraph;
}
public String getNotes() {
return notes;
}
public void setNotes(String notes) {
this.notes = notes;
}
public String getUID() {
return UID;
}
public void setUID(String uid) {
UID = uid;
}
/**
* Simply reads the given Ssurgeon pattern from file (args[0]), parses it, and prints it out.
* Use this for debugging the class and patterns.
*/
public static void main(String[] args) {
if (args.length == 0) {
System.out.println("Usage: SsurgeonPattern FILEPATH [\"COMPACT_SEMANTIC_GRAPH\"], FILEPATH=path to ssurgeon pattern to parse and print., SENTENCE=test sentence (in quotes)");
System.exit(-1);
}
File tgtFile = new File(args[0]);
try {
Ssurgeon.inst().initLog(new File("./ssurgeon.log"));
Ssurgeon.inst().setLogPrefix("SsurgeonPattern test");
List<SsurgeonPattern> patterns = Ssurgeon.inst().readFromFile(tgtFile);
for (SsurgeonPattern pattern : patterns) {
System.out.println("- - - - -");
System.out.println(pattern);
}
if (args.length > 1) {
for (int i=1; i<args.length;i++) {
String text = args[i];
SemanticGraph sg = SemanticGraph.valueOf(text);
Collection<SemanticGraph> generated = Ssurgeon.inst().exhaustFromPatterns(patterns, sg);
System.out.println("\n= = = = = = = = = =\nSrc text = "+text);
System.out.println(sg.toCompactString());
System.out.println("# generated = "+generated.size());
for (SemanticGraph genSg : generated) {
System.out.println(genSg);
System.out.println(". . . . .");
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}