Package edu.stanford.nlp.international.french.pipeline

Examples of edu.stanford.nlp.international.french.pipeline.FTBCorrector


    Queue<String> fNameQueue = new LinkedList<String>(Arrays.asList(fNames));

    TregexPattern pBadTree = TregexPattern.compile("@SENT <: @PUNC");
    TregexPattern pBadTree2 = TregexPattern.compile("@SENT <1 @PUNC <2 @PUNC !<3 __");
   
    final TreeTransformer tt = new FTBCorrector();

    int size = fSizeQueue.remove();
    String filename = fNameQueue.remove();

    System.err.println("Outputing " + filename);

    PrintWriter writer =
      new PrintWriter(new BufferedWriter
                      (new OutputStreamWriter
                       (new FileOutputStream(filename), "UTF-8")));

    int outputCount = 0;
    for (String id : ids) {
      if (!treeMap.containsKey(id)) {
        System.err.println("Missing id: " + id);
        continue;
      }

      Tree tree = treeMap.get(id);
      TregexMatcher m = pBadTree.matcher(tree);
      TregexMatcher m2 = pBadTree2.matcher(tree);
      if(m.find() || m2.find()) {
        System.err.println("Discarding tree: " + tree.toString());
        continue;
      }
     
      // Punctuation normalization, etc.
      Tree backupCopy = tree.deepCopy();
      tree = tt.transformTree(tree);
      if (tree.firstChild().children().length == 0) {
        // Some trees have only punctuation. Tregex will mangle these. Don't throw those away.
        System.err.println("Saving tree: " + tree.toString());
        System.err.println("Backup: " + backupCopy.toString());
        tree = backupCopy;
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.international.french.pipeline.FTBCorrector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.