Examples of edu.stanford.nlp.trees.tregex.TregexPattern

edu.stanford.nlp.trees.tregex.TregexPattern
te a reusable pattern object
TregexPattern patternMW = TregexPattern.compile("/^MW/");
// Run the pattern on one particular tree
TregexMatcher matcher = patternMW.matcher(tree);
// Iterate over all of the subtrees that matched
while (matcher.findNextMatchingNode()) {
Tree match = matcher.getMatch();
// do what we want to with the subtree
}
Current known bugs/shortcomings:
- Tregex does not support disjunctions at the root level. For example, the pattern A | B will not work.
- Using multiple variable strings in one regex may not necessarily work. For example, suppose the first two regex patterns are /(.*)/#1%foo and /(.*)/#1%bar. You might then want to write a pattern that matches the concatenation of these patterns, /(.*)(.*)/#1%foo#2%bar, but that will not work.
@author Galen Andrew @author Roger Levy (rog@csli.stanford.edu) @author Anna Rafferty (filter mode) @author John Bauer (extensively tested and bugfixed)

    Macros.addAllMacros(tpc, getBufferedReader(macroStr));
    try {
      BufferedReader br = getBufferedReader(editStr);
      List<TsurgeonPattern> tsp = new ArrayList<TsurgeonPattern>();
      for (String line; (line = br.readLine()) != null; ) {
        TregexPattern matchPattern = tpc.compile(line);
        tsp.clear();
        if (DEBUG) System.err.println("Pattern is " + line + " [" + matchPattern + ']');
        while (continuing(line = br.readLine())) {
          TsurgeonPattern p = Tsurgeon.parseOperation(line);
          if (DEBUG) System.err.println("Operation is " + line + " [" + p + ']');

View Full Code Here

    try {
      BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
      TreeReaderFactory trf = new FrenchTreeReaderFactory();
      TreeReader tr = trf.newTreeReader(br);


      final TregexPattern pMWE = TregexPattern.compile("/^MW/");
      for(Tree t; (t = tr.readTree()) != null;) {
        //Count MWE statistics
        TregexMatcher m = pMWE.matcher(t);
        while(m.findNextMatchingNode()) {
          Tree match = m.getMatch();
          String label = match.value();
          List<CoreLabel> yield = match.taggedLabeledYield();
          StringBuilder termYield = new StringBuilder();

View Full Code Here

    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
    tree.indexLeaves();
    SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);


    TregexPattern tgrepPattern = npOrPrpMentionPattern;
    TregexMatcher matcher = tgrepPattern.matcher(tree);
    while (matcher.find()) {
      Tree t = matcher.getMatch();
      List<Tree> mLeaves = t.getLeaves();
      int beginIdx = ((CoreLabel)mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class)-1;
      int endIdx = ((CoreLabel)mLeaves.get(mLeaves.size()-1).label()).get(CoreAnnotations.IndexAnnotation.class);

View Full Code Here

  protected static void extractEnumerations(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
    SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);


    TregexPattern tgrepPattern = enumerationsMentionPattern;
    TregexMatcher matcher = tgrepPattern.matcher(tree);
    Map<IntPair, Tree> spanToMentionSubTree = Generics.newHashMap();
    while (matcher.find()) {
      matcher.getMatch();
      Tree m1 = matcher.getNode("m1");
      Tree m2 = matcher.getNode("m2");

View Full Code Here

  protected void compileAnnotations(HeadFinder hf) {
    TregexPatternCompiler compiler = new TregexPatternCompiler(hf);


    annotationPatterns.clear();
    for (Map.Entry<String, Pair<String, Function<TregexMatcher, String>>> annotation : annotations.entrySet()) {
      TregexPattern compiled;
      try {
        compiled = compiler.compile(annotation.getValue().first());
      } catch (TregexParseException e) {
        int nth = annotationPatterns.size() + 1;
        System.err.println("Parse exception on annotation pattern #" + nth + " initialization: " + e);

View Full Code Here

  }


  /** Find syntactic pattern in a sentence by tregex */
  private void findTreePattern(Tree tree, String tregex, Set<Pair<Integer, Integer>> foundPairs) {
    try {
      TregexPattern tgrepPattern = TregexPattern.compile(tregex);
      findTreePattern(tree, tgrepPattern, foundPairs);
    } catch (Exception e) {
      // shouldn't happen....
      throw new RuntimeException(e);
    }

View Full Code Here

      this.sourcePattern = null;
    }


    for (String pattern : targetPatterns) {
      try {
        TregexPattern p = tregexCompiler.compile(pattern);
        this.targetPatterns.add(p);
      } catch (edu.stanford.nlp.trees.tregex.TregexParseException pe) {
        throw new RuntimeException("Bad pattern: " + pattern, pe);
      }
    }

View Full Code Here

    try {
      BufferedReader br = new BufferedReader(new StringReader(editStr));
      List<TsurgeonPattern> tsp = new ArrayList<TsurgeonPattern>();
      while ((line = br.readLine()) != null) {
        if (DEBUG) System.err.print("Pattern is " + line);
        TregexPattern matchPattern = TregexPattern.compile(line);
        if (DEBUG) System.err.println(" [" + matchPattern + "]");
        tsp.clear();
        while (continuing(line = br.readLine())) {
          TsurgeonPattern p = Tsurgeon.parseOperation(line);
          if (DEBUG) System.err.println("Operation is " + line + " [" + p + "]");

View Full Code Here

    File f = new File(args[0]);
    try {
      //These bad trees in the Candito training set should be thrown out:
      //  (ROOT (SENT (" ") (. .)))
      //  (ROOT (SENT (. .)))
      TregexPattern pBadTree = TregexPattern.compile("@SENT <: @PUNC");
      TregexPattern pBadTree2 = TregexPattern.compile("@SENT <1 @PUNC <2 @PUNC !<3 __");
      
      BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(f), "UTF-8"));
      TreeReaderFactory trf = new FrenchTreeReaderFactory();
      TreeReader tr = trf.newTreeReader(br);
   
      int nTrees = 0;
      for(Tree t; (t = tr.readTree()) != null;nTrees++) {
        TregexMatcher m = pBadTree.matcher(t);
        TregexMatcher m2 = pBadTree2.matcher(t);
        if(m.find() || m2.find()) {
          System.err.println("Discarding tree: " + t.toString());
        } else {
          Tree fixedT = tt.transformTree(t);
          System.out.println(fixedT.toString());

View Full Code Here

    throws IOException
  {
    Queue<Integer> fSizeQueue = new LinkedList<Integer>(Arrays.asList(fSizes));
    Queue<String> fNameQueue = new LinkedList<String>(Arrays.asList(fNames));


    TregexPattern pBadTree = TregexPattern.compile("@SENT <: @PUNC");
    TregexPattern pBadTree2 = TregexPattern.compile("@SENT <1 @PUNC <2 @PUNC !<3 __");
    
    final TreeTransformer tt = new FTBCorrector();


    int size = fSizeQueue.remove();
    String filename = fNameQueue.remove();


    System.err.println("Outputing " + filename);


    PrintWriter writer =
      new PrintWriter(new BufferedWriter
                      (new OutputStreamWriter
                       (new FileOutputStream(filename), "UTF-8")));


    int outputCount = 0;
    for (String id : ids) {
      if (!treeMap.containsKey(id)) {
        System.err.println("Missing id: " + id);
        continue;
      }


      Tree tree = treeMap.get(id);
      TregexMatcher m = pBadTree.matcher(tree);
      TregexMatcher m2 = pBadTree2.matcher(tree);
      if(m.find() || m2.find()) {
        System.err.println("Discarding tree: " + tree.toString());
        continue;
      }

View Full Code Here

0 1

TOP

Related Classes of edu.stanford.nlp.trees.tregex.TregexPattern

edu.stanford.nlp.dcoref.Mention

edu.stanford.nlp.dcoref.MentionExtractor

edu.stanford.nlp.dcoref.RuleBasedCorefMentionFinder

edu.stanford.nlp.international.arabic.pipeline.ATBCorrector

edu.stanford.nlp.international.arabic.pipeline.MWETreeVisitor

edu.stanford.nlp.international.french.pipeline.FTBCorrector

edu.stanford.nlp.international.french.scripts.MWEFrequencyDist

edu.stanford.nlp.international.french.scripts.SplitCanditoTrees

edu.stanford.nlp.ling.StringLabelFactory

edu.stanford.nlp.parser.lexparser.TregexPoweredTreebankParserParams

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.

Examples of edu.stanford.nlp.trees.tregex.TregexPattern

Current known bugs/shortcomings:

Related Classes of edu.stanford.nlp.trees.tregex.TregexPattern