Package edu.stanford.nlp.trees.tregex

Examples of edu.stanford.nlp.trees.tregex.TregexPattern$TRegexTreeReaderFactory


    Macros.addAllMacros(tpc, getBufferedReader(macroStr));
    try {
      BufferedReader br = getBufferedReader(editStr);
      List<TsurgeonPattern> tsp = new ArrayList<TsurgeonPattern>();
      for (String line; (line = br.readLine()) != null; ) {
        TregexPattern matchPattern = tpc.compile(line);
        tsp.clear();
        if (DEBUG) System.err.println("Pattern is " + line + " [" + matchPattern + ']');
        while (continuing(line = br.readLine())) {
          TsurgeonPattern p = Tsurgeon.parseOperation(line);
          if (DEBUG) System.err.println("Operation is " + line + " [" + p + ']');
View Full Code Here


    try {
      BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
      TreeReaderFactory trf = new FrenchTreeReaderFactory();
      TreeReader tr = trf.newTreeReader(br);

      final TregexPattern pMWE = TregexPattern.compile("/^MW/");
      for(Tree t; (t = tr.readTree()) != null;) {
        //Count MWE statistics
        TregexMatcher m = pMWE.matcher(t);
        while(m.findNextMatchingNode()) {
          Tree match = m.getMatch();
          String label = match.value();
          List<CoreLabel> yield = match.taggedLabeledYield();
          StringBuilder termYield = new StringBuilder();
View Full Code Here

    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
    tree.indexLeaves();
    SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);

    TregexPattern tgrepPattern = npOrPrpMentionPattern;
    TregexMatcher matcher = tgrepPattern.matcher(tree);
    while (matcher.find()) {
      Tree t = matcher.getMatch();
      List<Tree> mLeaves = t.getLeaves();
      int beginIdx = ((CoreLabel)mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class)-1;
      int endIdx = ((CoreLabel)mLeaves.get(mLeaves.size()-1).label()).get(CoreAnnotations.IndexAnnotation.class);
View Full Code Here

  protected static void extractEnumerations(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
    SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);

    TregexPattern tgrepPattern = enumerationsMentionPattern;
    TregexMatcher matcher = tgrepPattern.matcher(tree);
    Map<IntPair, Tree> spanToMentionSubTree = Generics.newHashMap();
    while (matcher.find()) {
      matcher.getMatch();
      Tree m1 = matcher.getNode("m1");
      Tree m2 = matcher.getNode("m2");
View Full Code Here

  protected void compileAnnotations(HeadFinder hf) {
    TregexPatternCompiler compiler = new TregexPatternCompiler(hf);

    annotationPatterns.clear();
    for (Map.Entry<String, Pair<String, Function<TregexMatcher, String>>> annotation : annotations.entrySet()) {
      TregexPattern compiled;
      try {
        compiled = compiler.compile(annotation.getValue().first());
      } catch (TregexParseException e) {
        int nth = annotationPatterns.size() + 1;
        System.err.println("Parse exception on annotation pattern #" + nth + " initialization: " + e);
View Full Code Here

  }

  /** Find syntactic pattern in a sentence by tregex */
  private void findTreePattern(Tree tree, String tregex, Set<Pair<Integer, Integer>> foundPairs) {
    try {
      TregexPattern tgrepPattern = TregexPattern.compile(tregex);
      findTreePattern(tree, tgrepPattern, foundPairs);
    } catch (Exception e) {
      // shouldn't happen....
      throw new RuntimeException(e);
    }
View Full Code Here

      this.sourcePattern = null;
    }

    for (String pattern : targetPatterns) {
      try {
        TregexPattern p = tregexCompiler.compile(pattern);
        this.targetPatterns.add(p);
      } catch (edu.stanford.nlp.trees.tregex.TregexParseException pe) {
        throw new RuntimeException("Bad pattern: " + pattern, pe);
      }
    }
View Full Code Here

    try {
      BufferedReader br = new BufferedReader(new StringReader(editStr));
      List<TsurgeonPattern> tsp = new ArrayList<TsurgeonPattern>();
      while ((line = br.readLine()) != null) {
        if (DEBUG) System.err.print("Pattern is " + line);
        TregexPattern matchPattern = TregexPattern.compile(line);
        if (DEBUG) System.err.println(" [" + matchPattern + "]");
        tsp.clear();
        while (continuing(line = br.readLine())) {
          TsurgeonPattern p = Tsurgeon.parseOperation(line);
          if (DEBUG) System.err.println("Operation is " + line + " [" + p + "]");
View Full Code Here

    File f = new File(args[0]);
    try {
      //These bad trees in the Candito training set should be thrown out:
      //  (ROOT (SENT (" ") (. .)))
      //  (ROOT (SENT (. .)))
      TregexPattern pBadTree = TregexPattern.compile("@SENT <: @PUNC");
      TregexPattern pBadTree2 = TregexPattern.compile("@SENT <1 @PUNC <2 @PUNC !<3 __");
     
      BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(f), "UTF-8"));
      TreeReaderFactory trf = new FrenchTreeReaderFactory();
      TreeReader tr = trf.newTreeReader(br);
  
      int nTrees = 0;
      for(Tree t; (t = tr.readTree()) != null;nTrees++) {
        TregexMatcher m = pBadTree.matcher(t);
        TregexMatcher m2 = pBadTree2.matcher(t);
        if(m.find() || m2.find()) {
          System.err.println("Discarding tree: " + t.toString());
        } else {
          Tree fixedT = tt.transformTree(t);
          System.out.println(fixedT.toString());
View Full Code Here

    throws IOException
  {
    Queue<Integer> fSizeQueue = new LinkedList<Integer>(Arrays.asList(fSizes));
    Queue<String> fNameQueue = new LinkedList<String>(Arrays.asList(fNames));

    TregexPattern pBadTree = TregexPattern.compile("@SENT <: @PUNC");
    TregexPattern pBadTree2 = TregexPattern.compile("@SENT <1 @PUNC <2 @PUNC !<3 __");
   
    final TreeTransformer tt = new FTBCorrector();

    int size = fSizeQueue.remove();
    String filename = fNameQueue.remove();

    System.err.println("Outputing " + filename);

    PrintWriter writer =
      new PrintWriter(new BufferedWriter
                      (new OutputStreamWriter
                       (new FileOutputStream(filename), "UTF-8")));

    int outputCount = 0;
    for (String id : ids) {
      if (!treeMap.containsKey(id)) {
        System.err.println("Missing id: " + id);
        continue;
      }

      Tree tree = treeMap.get(id);
      TregexMatcher m = pBadTree.matcher(tree);
      TregexMatcher m2 = pBadTree2.matcher(tree);
      if(m.find() || m2.find()) {
        System.err.println("Discarding tree: " + tree.toString());
        continue;
      }
     
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.trees.tregex.TregexPattern$TRegexTreeReaderFactory

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.