Package edu.stanford.nlp.semgraph.semgrex

Source Code of edu.stanford.nlp.semgraph.semgrex.SemgrexTest

package edu.stanford.nlp.semgraph.semgrex;

import junit.framework.AssertionFailedError;
import junit.framework.TestCase;

import java.util.HashMap;
import java.util.Map;
import java.util.Set;

import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.stats.IntCounter;
import edu.stanford.nlp.trees.EnglishGrammaticalRelations;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphFactory;


/**
* @author John Bauer
*/
public class SemgrexTest extends TestCase {

  public void testMatchAll() {
    SemanticGraph graph =
      SemanticGraph.valueOf("[ate subj:Bill dobj:[muffins nn:blueberry]]");
    Set<IndexedWord> words = graph.vertexSet();

    SemgrexPattern pattern = SemgrexPattern.compile("{}");
    SemgrexMatcher matcher = pattern.matcher(graph);
    String[] expectedMatches = {"ate", "Bill", "muffins", "blueberry"};
    for (int i = 0; i < expectedMatches.length; ++i) {
      assertTrue(matcher.findNextMatchingNode());
    }
    assertFalse(matcher.findNextMatchingNode());
  }

  public void testTest() {
    runTest("{}", "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "ate", "Bill", "muffins", "blueberry");

    try {
      runTest("{}", "[ate subj:Bill dobj:[muffins nn:blueberry]]",
              "ate", "Bill", "muffins", "foo");
      throw new RuntimeException();
    } catch (AssertionFailedError e) {
      // yay
    }

    try {
      runTest("{}", "[ate subj:Bill dobj:[muffins nn:blueberry]]",
              "ate", "Bill", "muffins");
      throw new RuntimeException();
    } catch (AssertionFailedError e) {
      // yay
    }

    try {
      runTest("{}", "[ate subj:Bill dobj:[muffins nn:blueberry]]",
              "ate", "Bill", "muffins", "blueberry", "blueberry");
      throw new RuntimeException();
    } catch (AssertionFailedError e) {
      // yay
    }
  }

  /**
   * This also tests negated node matches
   */
  public void testWordMatch() {
    runTest("{word:Bill}", "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "Bill");
    runTest("!{word:Bill}",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "ate", "muffins", "blueberry");
    runTest("!{word:Fred}",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "ate", "Bill", "muffins", "blueberry");
    runTest("!{word:ate}",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "Bill", "muffins", "blueberry");
    runTest("{word:/^(?!Bill).*$/}",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "ate", "muffins", "blueberry");
    runTest("{word:/^(?!Fred).*$/}",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "ate", "Bill", "muffins", "blueberry");
    runTest("{word:/^(?!ate).*$/}",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "Bill", "muffins", "blueberry");
    runTest("{word:muffins} >nn {word:blueberry}",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "muffins");
    runTest("{} << {word:ate}=a",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "Bill", "muffins", "blueberry");
    runTest("{} << !{word:ate}=a",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "blueberry");
    // blueberry should match twice because it has two ancestors
    runTest("{} << {}=a",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "Bill", "muffins", "blueberry", "blueberry");
  }

  public void testSimpleDependency() {
    // blueberry has two ancestors
    runTest("{} << {}", "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "Bill", "muffins", "blueberry", "blueberry");
    // ate has three descendants
    runTest("{} >> {}", "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "ate", "ate", "ate", "muffins");
    runTest("{} < {}", "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "Bill", "muffins", "blueberry");
    runTest("{} > {}", "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "ate", "ate", "muffins");
  }

  public void testNamedDependency() {
    runTest("{} << {word:ate}",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "Bill", "muffins", "blueberry");
    runTest("{} >> {word:blueberry}",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "ate", "muffins");
    runTest("{} >> {word:Bill}",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "ate");
    runTest("{} < {word:ate}",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "Bill", "muffins");
    runTest("{} > {word:blueberry}",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "muffins");
    runTest("{} > {word:muffins}",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "ate");
  }

  public void testNamedGovernor() {
    runTest("{word:blueberry} << {}",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "blueberry");
    runTest("{word:ate} << {}",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]");
    runTest("{word:blueberry} >> {}",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]");
    runTest("{word:muffins} >> {}",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "muffins");
    runTest("{word:Bill} >> {}",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]");
    runTest("{word:muffins} < {}",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "muffins");
    runTest("{word:muffins} > {}",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "muffins");
  }

  public void testTwoDependencies() {
    runTest("{} >> ({} >> {})",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "ate");
    runTest("{} >> {word:Bill} >> {word:muffins}",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "ate");
    runTest("{}=a >> {}=b >> {word:muffins}=c",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "ate", "ate", "ate");
    runTest("{}=a >> {word:Bill}=b >> {}=c",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "ate", "ate", "ate");
    runTest("{}=a >> {}=b >> {}=c",
            "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "ate", "ate", "ate", "ate", "ate",
            "ate", "ate", "ate", "ate", "muffins");
  }

  public void testRegex() {
    runTest("{word:/Bill/}", "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "Bill");

    runTest("{word:/ill/}", "[ate subj:Bill dobj:[muffins nn:blueberry]]");

    runTest("{word:/.*ill/}", "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "Bill");

    runTest("{word:/.*il/}", "[ate subj:Bill dobj:[muffins nn:blueberry]]");

    runTest("{word:/.*il.*/}", "[ate subj:Bill dobj:[muffins nn:blueberry]]",
            "Bill");
  }

  public void testReferencedRegex() {
    runTest("{word:/Bill/}", "[ate subj:Bill dobj:[bill det:the]]",
            "Bill");

    runTest("{word:/.*ill/}", "[ate subj:Bill dobj:[bill det:the]]",
            "Bill", "bill");

    runTest("{word:/[Bb]ill/}", "[ate subj:Bill dobj:[bill det:the]]",
            "Bill", "bill");

    // TODO: implement referencing regexes
  }

  public static SemanticGraph makeComplicatedGraph() {
    SemanticGraph graph = new SemanticGraph();
    String[] words = {"A", "B", "C", "D", "E", "F", "G", "H", "I", "J"};
    IndexedWord[] nodes = new IndexedWord[words.length];
    for (int i = 0; i < words.length; ++i) {
      IndexedWord word = new IndexedWord("test", 1, i + 1);
      word.setWord(words[i]);
      word.setValue(words[i]);
      nodes[i] = word;
      graph.addVertex(word);
    }
    graph.setRoot(nodes[0]);
    // this graph isn't supposed to make sense
    graph.addEdge(nodes[0], nodes[1],
                  EnglishGrammaticalRelations.MODIFIER, 1.0, false);
    graph.addEdge(nodes[0], nodes[2],
                  EnglishGrammaticalRelations.DIRECT_OBJECT, 1.0, false);
    graph.addEdge(nodes[0], nodes[3],
                  EnglishGrammaticalRelations.INDIRECT_OBJECT, 1.0, false);
    graph.addEdge(nodes[1], nodes[4],
                  EnglishGrammaticalRelations.MARKER, 1.0, false);
    graph.addEdge(nodes[2], nodes[4],
                  EnglishGrammaticalRelations.EXPLETIVE, 1.0, false);
    graph.addEdge(nodes[3], nodes[4],
                  EnglishGrammaticalRelations.ADJECTIVAL_COMPLEMENT, 1.0, false);
    graph.addEdge(nodes[4], nodes[5],
                  EnglishGrammaticalRelations.ADJECTIVAL_MODIFIER, 1.0, false);
    graph.addEdge(nodes[4], nodes[6],
                  EnglishGrammaticalRelations.ADVERBIAL_MODIFIER, 1.0, false);
    graph.addEdge(nodes[4], nodes[8],
                  EnglishGrammaticalRelations.MODIFIER, 1.0, false);
    graph.addEdge(nodes[5], nodes[7],
                  EnglishGrammaticalRelations.POSSESSION_MODIFIER, 1.0, false);
    graph.addEdge(nodes[6], nodes[7],
                  EnglishGrammaticalRelations.POSSESSIVE_MODIFIER, 1.0, false);
    graph.addEdge(nodes[7], nodes[8],
                  EnglishGrammaticalRelations.AGENT, 1.0, false);
    graph.addEdge(nodes[8], nodes[9],
                  EnglishGrammaticalRelations.DETERMINER, 1.0, false);

    return graph;
  }

  /**
   * Test that governors, dependents, ancestors, descendants are all
   * returned with multiplicity 1 if there are multiple paths to the
   * same node.
   */
  public void testComplicatedGraph() {
    SemanticGraph graph = makeComplicatedGraph();

    runTest("{} < {word:A}", graph,
            "B", "C", "D");

    runTest("{} > {word:E}", graph,
            "B", "C", "D");

    runTest("{} > {word:J}", graph,
            "I");

    runTest("{} < {word:E}", graph,
            "F", "G", "I");

    runTest("{} < {word:I}", graph,
            "J");

    runTest("{} << {word:A}", graph,
            "B", "C", "D", "E", "F", "G", "H", "I", "J");

    runTest("{} << {word:B}", graph,
            "E", "F", "G", "H", "I", "J");

    runTest("{} << {word:C}", graph,
            "E", "F", "G", "H", "I", "J");

    runTest("{} << {word:D}", graph,
            "E", "F", "G", "H", "I", "J");

    runTest("{} << {word:E}", graph,
            "F", "G", "H", "I", "J");

    runTest("{} << {word:F}", graph,
            "H", "I", "J");

    runTest("{} << {word:G}", graph,
            "H", "I", "J");

    runTest("{} << {word:H}", graph,
            "I", "J");

    runTest("{} << {word:I}", graph,
            "J");

    runTest("{} << {word:J}", graph);

    runTest("{} << {word:K}", graph);

    runTest("{} >> {word:A}", graph);

    runTest("{} >> {word:B}", graph, "A");

    runTest("{} >> {word:C}", graph, "A");

    runTest("{} >> {word:D}", graph, "A");

    runTest("{} >> {word:E}", graph,
            "A", "B", "C", "D");

    runTest("{} >> {word:F}", graph,
            "A", "B", "C", "D", "E");

    runTest("{} >> {word:G}", graph,
            "A", "B", "C", "D", "E");

    runTest("{} >> {word:H}", graph,
            "A", "B", "C", "D", "E", "F", "G");

    runTest("{} >> {word:I}", graph,
            "A", "B", "C", "D", "E", "F", "G", "H");

    runTest("{} >> {word:J}", graph,
            "A", "B", "C", "D", "E", "F", "G", "H", "I");

    runTest("{} >> {word:K}", graph);
  }

  public void testRelationType() {
    SemanticGraph graph = makeComplicatedGraph();
    runTest("{} <<mod {}", graph,
            "B", "E", "F", "G", "H", "I", "I", "J", "J");

    runTest("{} >>det {}", graph,
            "A", "B", "C", "D", "E", "F", "G", "H", "I");

    runTest("{} >>det {word:J}", graph,
            "A", "B", "C", "D", "E", "F", "G", "H", "I");
  }

  public void testExactDepthRelations() {
    SemanticGraph graph = makeComplicatedGraph();
    runTest("{} 2,3<< {word:A}", graph, "E", "F", "G", "I");

    runTest("{} 2,2<< {word:A}", graph, "E");

    runTest("{} 1,2<< {word:A}", graph, "B", "C", "D", "E");

    runTest("{} 0,2<< {word:A}", graph, "B", "C", "D", "E");

    runTest("{} 0,10<< {word:A}", graph,
            "B", "C", "D", "E", "F", "G", "H", "I", "J");

    runTest("{} 0,10>> {word:J}", graph,
            "A", "B", "C", "D", "E", "F", "G", "H", "I");

    runTest("{} 2,3>> {word:J}", graph,
            "B", "C", "D", "E", "F", "G", "H");

    runTest("{} 2,2>> {word:J}", graph,
            "E", "H");

    // use this method to avoid the toString() test, since we expect it
    // to use 2,2>> instead of 2>>
    runTest(SemgrexPattern.compile("{} 2>> {word:J}"), graph,
            "E", "H");

    runTest("{} 1,2>> {word:J}", graph,
            "E", "H", "I");
  }

  /**
   * Tests that if there are different paths from A to I, those paths show up for exactly the right depths
   */
  public void testMultipleDepths() {
    SemanticGraph graph = makeComplicatedGraph();
    runTest("{} 3,3<< {word:A}", graph, "F", "G", "I");
    runTest("{} 4,4<< {word:A}", graph, "H", "J");
    runTest("{} 5,5<< {word:A}", graph, "I");
    runTest("{} 6,6<< {word:A}", graph, "J");
  }

  public void testNamedNode() {
    SemanticGraph graph = makeComplicatedGraph();

    runTest("{} >dobj ({} >expl {})", graph, "A");

    SemgrexPattern pattern =
      SemgrexPattern.compile("{} >dobj ({} >expl {}=foo)");
    SemgrexMatcher matcher = pattern.matcher(graph);
    assertTrue(matcher.find());
    assertEquals(1, matcher.getNodeNames().size());
    assertEquals("E", matcher.getNode("foo").toString());
    assertEquals("A", matcher.getMatch().toString());
    assertFalse(matcher.find());

    pattern = SemgrexPattern.compile("{} >dobj ({} >expl {}=foo) >mod {}");
    matcher = pattern.matcher(graph);
    assertTrue(matcher.find());
    assertEquals(1, matcher.getNodeNames().size());
    assertEquals("E", matcher.getNode("foo").toString());
    assertEquals("A", matcher.getMatch().toString());
    assertFalse(matcher.find());

    pattern =
      SemgrexPattern.compile("{} >dobj ({} >expl {}=foo) >mod ({} >mark {})");
    matcher = pattern.matcher(graph);
    assertTrue(matcher.find());
    assertEquals(1, matcher.getNodeNames().size());
    assertEquals("E", matcher.getNode("foo").toString());
    assertEquals("A", matcher.getMatch().toString());
    assertFalse(matcher.find());

    pattern =
      SemgrexPattern.compile("{} >dobj ({} >expl {}=foo) >mod ({} > {})");
    matcher = pattern.matcher(graph);
    assertTrue(matcher.find());
    assertEquals(1, matcher.getNodeNames().size());
    assertEquals("E", matcher.getNode("foo").toString());
    assertEquals("A", matcher.getMatch().toString());
    assertFalse(matcher.find());

    pattern =
      SemgrexPattern.compile("{} >dobj ({} >expl {}=foo) >mod ({} > {}=foo)");
    matcher = pattern.matcher(graph);
    assertTrue(matcher.find());
    assertEquals(1, matcher.getNodeNames().size());
    assertEquals("E", matcher.getNode("foo").toString());
    assertEquals("A", matcher.getMatch().toString());
    assertFalse(matcher.find());

    pattern =
      SemgrexPattern.compile("{} >dobj ({} >expl {}=foo) >mod ({}=foo > {})");
    matcher = pattern.matcher(graph);
    assertFalse(matcher.find());
  }

  public void testPartition() {
    SemanticGraph graph = makeComplicatedGraph();

    runTest("{}=a >> {word:E}", graph, "A", "B", "C", "D");
    runTest("{}=a >> {word:E} : {}=a >> {word:B}", graph, "A");
  }

  public void testEqualsRelation() {
    SemanticGraph graph = SemanticGraph.valueOf("[ate subj:Bill dobj:[muffins nn:blueberry]]");
    SemgrexPattern pattern = SemgrexPattern.compile("{} >> ({}=a == {}=b)");
    SemgrexMatcher matcher = pattern.matcher(graph);
    assertTrue(matcher.find());
    assertEquals(2, matcher.getNodeNames().size());
    assertEquals("ate", matcher.getMatch().toString());
    assertEquals("Bill", matcher.getNode("a").toString());
    assertEquals("Bill", matcher.getNode("b").toString());

    assertTrue(matcher.find());
    assertEquals(2, matcher.getNodeNames().size());
    assertEquals("ate", matcher.getMatch().toString());
    assertEquals("muffins", matcher.getNode("a").toString());
    assertEquals("muffins", matcher.getNode("b").toString());

    assertTrue(matcher.find());
    assertEquals(2, matcher.getNodeNames().size());
    assertEquals("ate", matcher.getMatch().toString());
    assertEquals("blueberry", matcher.getNode("a").toString());
    assertEquals("blueberry", matcher.getNode("b").toString());

    assertTrue(matcher.find());
    assertEquals(2, matcher.getNodeNames().size());
    assertEquals("muffins", matcher.getMatch().toString());
    assertEquals("blueberry", matcher.getNode("a").toString());
    assertEquals("blueberry", matcher.getNode("b").toString());

    assertFalse(matcher.find());

    // This split pattern should also work
    pattern = SemgrexPattern.compile("{} >> {}=a >> {}=b : {}=a == {}=b");
    matcher = pattern.matcher(graph);
    assertTrue(matcher.find());
    assertEquals(2, matcher.getNodeNames().size());
    assertEquals("ate", matcher.getMatch().toString());
    assertEquals("Bill", matcher.getNode("a").toString());
    assertEquals("Bill", matcher.getNode("b").toString());

    assertTrue(matcher.find());
    assertEquals(2, matcher.getNodeNames().size());
    assertEquals("ate", matcher.getMatch().toString());
    assertEquals("muffins", matcher.getNode("a").toString());
    assertEquals("muffins", matcher.getNode("b").toString());

    assertTrue(matcher.find());
    assertEquals(2, matcher.getNodeNames().size());
    assertEquals("ate", matcher.getMatch().toString());
    assertEquals("blueberry", matcher.getNode("a").toString());
    assertEquals("blueberry", matcher.getNode("b").toString());

    assertTrue(matcher.find());
    assertEquals(2, matcher.getNodeNames().size());
    assertEquals("muffins", matcher.getMatch().toString());
    assertEquals("blueberry", matcher.getNode("a").toString());
    assertEquals("blueberry", matcher.getNode("b").toString());

    assertFalse(matcher.find());
  }

  /**
   * In this test, the graph should find matches with pairs of nodes
   * which are different from each other.  Since "muffins" only has
   * one dependent, there should not be any matches with "muffins" as
   * the head, for example.
   */
  public void testNotEquals() {
    SemanticGraph graph = SemanticGraph.valueOf("[ate subj:Bill dobj:[muffins nn:blueberry]]");

    SemgrexPattern pattern = SemgrexPattern.compile("{} >> {}=a >> {}=b : {}=a !== {}=b");
    SemgrexMatcher matcher = pattern.matcher(graph);
    assertTrue(matcher.find());
    assertEquals(2, matcher.getNodeNames().size());
    assertEquals("ate", matcher.getMatch().toString());
    assertEquals("Bill", matcher.getNode("a").toString());
    assertEquals("muffins", matcher.getNode("b").toString());

    assertTrue(matcher.find());
    assertEquals(2, matcher.getNodeNames().size());
    assertEquals("ate", matcher.getMatch().toString());
    assertEquals("Bill", matcher.getNode("a").toString());
    assertEquals("blueberry", matcher.getNode("b").toString());

    assertTrue(matcher.find());
    assertEquals(2, matcher.getNodeNames().size());
    assertEquals("ate", matcher.getMatch().toString());
    assertEquals("muffins", matcher.getNode("a").toString());
    assertEquals("Bill", matcher.getNode("b").toString());

    assertTrue(matcher.find());
    assertEquals(2, matcher.getNodeNames().size());
    assertEquals("ate", matcher.getMatch().toString());
    assertEquals("muffins", matcher.getNode("a").toString());
    assertEquals("blueberry", matcher.getNode("b").toString());

    assertTrue(matcher.find());
    assertEquals(2, matcher.getNodeNames().size());
    assertEquals("ate", matcher.getMatch().toString());
    assertEquals("blueberry", matcher.getNode("a").toString());
    assertEquals("Bill", matcher.getNode("b").toString());

    assertTrue(matcher.find());
    assertEquals(2, matcher.getNodeNames().size());
    assertEquals("ate", matcher.getMatch().toString());
    assertEquals("blueberry", matcher.getNode("a").toString());
    assertEquals("muffins", matcher.getNode("b").toString());

    assertFalse(matcher.find());

    // same as the first test, essentially, but with a more compact expression
    pattern = SemgrexPattern.compile("{} >> {}=a >> ({}=b !== {}=a)");
    matcher = pattern.matcher(graph);
    assertTrue(matcher.find());
    assertEquals(2, matcher.getNodeNames().size());
    assertEquals("ate", matcher.getMatch().toString());
    assertEquals("Bill", matcher.getNode("a").toString());
    assertEquals("muffins", matcher.getNode("b").toString());

    assertTrue(matcher.find());
    assertEquals(2, matcher.getNodeNames().size());
    assertEquals("ate", matcher.getMatch().toString());
    assertEquals("Bill", matcher.getNode("a").toString());
    assertEquals("blueberry", matcher.getNode("b").toString());

    assertTrue(matcher.find());
    assertEquals(2, matcher.getNodeNames().size());
    assertEquals("ate", matcher.getMatch().toString());
    assertEquals("muffins", matcher.getNode("a").toString());
    assertEquals("Bill", matcher.getNode("b").toString());

    assertTrue(matcher.find());
    assertEquals(2, matcher.getNodeNames().size());
    assertEquals("ate", matcher.getMatch().toString());
    assertEquals("muffins", matcher.getNode("a").toString());
    assertEquals("blueberry", matcher.getNode("b").toString());

    assertTrue(matcher.find());
    assertEquals(2, matcher.getNodeNames().size());
    assertEquals("ate", matcher.getMatch().toString());
    assertEquals("blueberry", matcher.getNode("a").toString());
    assertEquals("Bill", matcher.getNode("b").toString());

    assertTrue(matcher.find());
    assertEquals(2, matcher.getNodeNames().size());
    assertEquals("ate", matcher.getMatch().toString());
    assertEquals("blueberry", matcher.getNode("a").toString());
    assertEquals("muffins", matcher.getNode("b").toString());

    assertFalse(matcher.find());
  }

  public void testInitialConditions() {
    SemanticGraph graph = makeComplicatedGraph();

    SemgrexPattern pattern =
      SemgrexPattern.compile("{}=a >> {}=b : {}=a >> {}=c");
    Map<String, IndexedWord> variables = new HashMap<String, IndexedWord>();
    variables.put("b", graph.getNodeByIndex(5));
    variables.put("c", graph.getNodeByIndex(2));
    SemgrexMatcher matcher = pattern.matcher(graph, variables);
    assertTrue(matcher.find());
    assertEquals(3, matcher.getNodeNames().size());
    assertEquals("A", matcher.getNode("a").toString());
    assertEquals("E", matcher.getNode("b").toString());
    assertEquals("B", matcher.getNode("c").toString());
    assertEquals("A", matcher.getMatch().toString());
    assertFalse(matcher.find());
  }

  /**
   * Test that a particular AnnotationLookup is honored
   */
  public void testIndex() {
    SemanticGraph graph = SemanticGraph.valueOf("[ate subj:Bill dobj:[muffins nn:blueberry]]");
    runTest("{idx:0}", graph, "ate");
    runTest("{idx:1}", graph, "Bill");
    runTest("{idx:2}", graph, "muffins");
    runTest("{idx:3}", graph, "blueberry");
    runTest("{idx:4}", graph);
  }

  public void testLemma() {
    SemanticGraph graph = SemanticGraph.valueOf("[ate subj:Bill dobj:[muffins nn:blueberry]]");
    for (IndexedWord word : graph.vertexSet()) {
      word.setLemma(word.word());
    }
    runTest("{lemma:ate}", graph, "ate");

    Tree tree = Tree.valueOf("(ROOT (S (NP (PRP I)) (VP (VBP love) (NP (DT the) (NN display))) (. .)))");
    graph = SemanticGraphFactory.generateCCProcessedDependencies(tree);
    for (IndexedWord word : graph.vertexSet()) {
      word.setLemma(word.word());
    }
    // This set of three tests also provides some coverage for a
    // bizarre error a user found where multiple copies of the same
    // IndexedWord were created
    runTest("{}=Obj <dobj {lemma:love}=Pred", graph, "display/NN");
    runTest("{}=Obj <dobj {}=Pred", graph, "display/NN");
    runTest("{lemma:love}=Pred >dobj {}=Obj ", graph, "love/VBP");
  }

  public void testNamedRelation() {
    SemanticGraph graph = SemanticGraph.valueOf("[ate subj:Bill dobj:[muffins nn:blueberry]]");
    SemgrexPattern pattern = SemgrexPattern.compile("{idx:0}=gov >>=foo {idx:3}=dep");
    SemgrexMatcher matcher = pattern.matcher(graph);
    assertTrue(matcher.find());
    assertEquals("ate", matcher.getNode("gov").toString());
    assertEquals("blueberry", matcher.getNode("dep").toString());
    assertEquals("nn", matcher.getRelnString("foo"));
    assertFalse(matcher.find());

    pattern = SemgrexPattern.compile("{idx:3}=dep <<=foo {idx:0}=gov");
    matcher = pattern.matcher(graph);
    assertTrue(matcher.find());
    assertEquals("ate", matcher.getNode("gov").toString());
    assertEquals("blueberry", matcher.getNode("dep").toString());
    assertEquals("dobj", matcher.getRelnString("foo"));
    assertFalse(matcher.find());

    pattern = SemgrexPattern.compile("{idx:3}=dep <=foo {idx:2}=gov");
    matcher = pattern.matcher(graph);
    assertTrue(matcher.find());
    assertEquals("muffins", matcher.getNode("gov").toString());
    assertEquals("blueberry", matcher.getNode("dep").toString());
    assertEquals("nn", matcher.getRelnString("foo"));
    assertFalse(matcher.find());

    pattern = SemgrexPattern.compile("{idx:2}=gov >=foo {idx:3}=dep");
    matcher = pattern.matcher(graph);
    assertTrue(matcher.find());
    assertEquals("muffins", matcher.getNode("gov").toString());
    assertEquals("blueberry", matcher.getNode("dep").toString());
    assertEquals("nn", matcher.getRelnString("foo"));
    assertFalse(matcher.find());
  }

  public static void outputResults(String pattern, String graph,
                                   String ... ignored) {
    outputResults(SemgrexPattern.compile(pattern),
                  SemanticGraph.valueOf(graph));
  }

  public static void outputResults(String pattern, SemanticGraph graph,
                                   String ... ignored) {
    outputResults(SemgrexPattern.compile(pattern), graph);
  }

  public static void outputResults(SemgrexPattern pattern, SemanticGraph graph,
                                   String ... ignored) {
    System.out.println("Matching pattern " + pattern + " to\n" + graph +
                       "  :" + (pattern.matcher(graph).matches() ?
                                "matches" : "doesn't match"));
    System.out.println();
    pattern.prettyPrint();
    System.out.println();
    SemgrexMatcher matcher = pattern.matcher(graph);
    while (matcher.find()) {
      System.out.println("  " + matcher.getMatch());
      Set<String> nodeNames = matcher.getNodeNames();
      if (nodeNames != null && nodeNames.size() > 0) {
        for (String name : nodeNames) {
          System.out.println("    " + name + ": " + matcher.getNode(name));
        }
      }

      Set<String> relNames = matcher.getRelationNames();
      if (relNames != null) {
        for (String name : relNames) {
          System.out.println("    " + name + ": " + matcher.getRelnString(name));
        }
      }
    }
  }

  public static void comparePatternToString(String pattern) {
    SemgrexPattern semgrex = SemgrexPattern.compile(pattern);
    String tostring = semgrex.toString();
    tostring = tostring.replaceAll(" +", " ");
    assertEquals(pattern.trim(), tostring.trim());
  }

  public static void runTest(String pattern, String graph,
                             String... expectedMatches) {
    comparePatternToString(pattern);
    runTest(SemgrexPattern.compile(pattern), SemanticGraph.valueOf(graph),
            expectedMatches);
  }

  public static void runTest(String pattern, SemanticGraph graph,
                             String... expectedMatches) {
    comparePatternToString(pattern);
    runTest(SemgrexPattern.compile(pattern), graph, expectedMatches);
  }

  public static void runTest(SemgrexPattern pattern, SemanticGraph graph,
                             String... expectedMatches) {
    // results are not in the order I would expect.  Using a counter
    // allows them to be in any order
    IntCounter<String> counts = new IntCounter<String>();
    for (int i = 0; i < expectedMatches.length; ++i) {
      counts.incrementCount(expectedMatches[i]);
    }
    IntCounter<String> originalCounts = new IntCounter<String>(counts);

    SemgrexMatcher matcher = pattern.matcher(graph);

    for (int i = 0; i < expectedMatches.length; ++i) {
      if (!matcher.find()) {
        throw new AssertionFailedError("Expected " + expectedMatches.length +
                                       " matches for pattern " + pattern +
                                       " on " + graph + ", only got " + i);
      }
      String match = matcher.getMatch().toString();
      if (!counts.containsKey(match)) {
        throw new AssertionFailedError("Unexpected match " + match +
                                       " for pattern " + pattern +
                                       " on " + graph);
      }
      counts.decrementCount(match);
      if (counts.getCount(match) < 0) {
        throw new AssertionFailedError("Found too many matches for " + match +
                                       " for pattern " + pattern +
                                       " on " + graph);
      }
    }
    if (matcher.findNextMatchingNode()) {
      throw new AssertionFailedError("Found more than " +
                                     expectedMatches.length +
                                     " matches for pattern " + pattern +
                                     " on " + graph + "... extra match is " +
                                     matcher.getMatch());
    }
  }

}
TOP

Related Classes of edu.stanford.nlp.semgraph.semgrex.SemgrexTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.