Package edu.stanford.nlp.trees

Examples of edu.stanford.nlp.trees.TreeReader


    TreeReaderFactory trf = new FrenchXMLTreeReaderFactory(false);
    int totalTrees = 0;
    Set<String> morphAnalyses = Generics.newHashSet();
    try {
      for(File file : fileList) {
        TreeReader tr = trf.newTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8")));

        Tree t;
        int numTrees;
        String canonicalFileName = file.getName().substring(0, file.getName().lastIndexOf('.'));

        for(numTrees = 0; (t = tr.readTree()) != null; numTrees++) {
          String ftbID = ((CoreLabel) t.label()).get(CoreAnnotations.SentenceIDAnnotation.class);
          System.out.printf("%s-%s\t%s%n",canonicalFileName, ftbID, t.toString());
          List<Label> leaves = t.yield();
          for(Label label : leaves) {
            if(label instanceof CoreLabel)
              morphAnalyses.add(((CoreLabel) label).originalText());
          }
        }

        tr.close();
        System.err.printf("%s: %d trees%n",file.getName(),numTrees);
        totalTrees += numTrees;
      }

//wsg2011: Print out the observed morphological analyses
View Full Code Here


      TregexPattern pBadTree = TregexPattern.compile("@SENT <: @PUNC");
      TregexPattern pBadTree2 = TregexPattern.compile("@SENT <1 @PUNC <2 @PUNC !<3 __");
     
      BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(f), "UTF-8"));
      TreeReaderFactory trf = new FrenchTreeReaderFactory();
      TreeReader tr = trf.newTreeReader(br);
  
      int nTrees = 0;
      for(Tree t; (t = tr.readTree()) != null;nTrees++) {
        TregexMatcher m = pBadTree.matcher(t);
        TregexMatcher m2 = pBadTree2.matcher(t);
        if(m.find() || m2.find()) {
          System.err.println("Discarding tree: " + t.toString());
        } else {
          Tree fixedT = tt.transformTree(t);
          System.out.println(fixedT.toString());
        }
      }
     
      tr.close();
     
      System.err.printf("Wrote %d trees%n",nTrees);
     
    } catch (UnsupportedEncodingException e) {
      e.printStackTrace();
View Full Code Here

    MultiWordTreeExpander expander = new MultiWordTreeExpander();

    try {
      BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
      TreeReaderFactory trf = new SpanishTreeReaderFactory();
      TreeReader tr = trf.newTreeReader(br);

      PrintWriter pw = new PrintWriter(new PrintStream(new FileOutputStream(new File(treeFile + ".fixed")),false,"UTF-8"));

      int nTrees = 0;
      for(Tree t; (t = tr.readTree()) != null;nTrees++) {
        traverseAndFix(t, null, unigramTagger, retainNER);

        // Now "decompress" further the expanded trees formed by
        // multiword token splitting
        t = expander.expandPhrases(t, tn, tf);

        if (tn != null)
          t = tn.normalizeWholeTree(t, tf);

        pw.println(t.toString());
      }

      pw.close();
      tr.close();

      System.out.println("Processed " +nTrees+ " trees");

    } catch (UnsupportedEncodingException e) {
      e.printStackTrace();
View Full Code Here

      new TwoDimensionalCounter<String,String>();

    try {
      BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
      TreeReaderFactory trf = new SpanishTreeReaderFactory();
      TreeReader tr = trf.newTreeReader(br);

      for(Tree t; (t = tr.readTree()) != null;) {
        updateTagger(unigramTagger, t);
      }
      tr.close(); //Closes the underlying reader

      System.out.println("Resolving DUMMY tags");
      resolveDummyTags(treeFile, unigramTagger, retainNER,
                       normalize ? new SpanishTreeNormalizer(true, false, false) : null);
View Full Code Here

    String treeFile = args[0];
   
    TreeReaderFactory trf = new FrenchTreeReaderFactory();
    try {
      TreeReader tr = trf.newTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8")));
 
      for (Tree tree1; (tree1 = tr.readTree()) != null;) {
        List<Label> pretermYield = tree1.preTerminalYield();
        List<Label> yield = tree1.yield();
        int yieldLen = yield.size();
        for (int i = 0; i < yieldLen; ++i) {
          CoreLabel rawToken = (CoreLabel) yield.get(i);
          String word = rawToken.value();
          String morphStr = rawToken.originalText();
          Pair<String,String> lemmaMorph = MorphoFeatureSpecification.splitMorphString(word, morphStr);
          String lemma = lemmaMorph.first();
          String morph = lemmaMorph.second();
          if (morph == null || morph.equals("") || morph.equals("XXX")) {
            morph = ((CoreLabel) pretermYield.get(i)).value();
          }
          System.out.printf("%s %s %s%n", word, lemma, morph);
        }
        System.out.println();
      }
   
      tr.close();
     
    } catch (UnsupportedEncodingException e) {
      e.printStackTrace();
    } catch (FileNotFoundException e) {
      e.printStackTrace();
View Full Code Here

    String treeFile = args[0];
    String morfetteFile = args[1];
    TreeReaderFactory trf = new FrenchTreeReaderFactory();
    try {
      TreeReader tr = trf.newTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8")));
      Iterator<List<CoreLabel>> morfetteItr = new MorfetteFileIterator(morfetteFile);
      for (Tree tree; (tree = tr.readTree()) != null && morfetteItr.hasNext();) {
        List<CoreLabel> analysis = morfetteItr.next();
        List<Label> yield = tree.yield();
        assert analysis.size() == yield.size();

        int yieldLen = yield.size();
        for (int i = 0; i < yieldLen; ++i) {
          CoreLabel tokenAnalysis = analysis.get(i);
          Label token = yield.get(i);
          String lemma = getLemma(token.value(), tokenAnalysis.lemma());
          String newLeaf = String.format("%s%s%s%s%s", token.value(),
              MorphoFeatureSpecification.MORPHO_MARK,
              lemma,
              MorphoFeatureSpecification.LEMMA_MARK,
              tokenAnalysis.tag());
          ((CoreLabel) token).setValue(newLeaf);
        }
        System.out.println(tree.toString());
      }

      if (tr.readTree() != null || morfetteItr.hasNext()) {
        System.err.println("WARNING: Uneven input files!");
      }

      tr.close();

    } catch (UnsupportedEncodingException e) {
      e.printStackTrace();
    } catch (FileNotFoundException e) {
      e.printStackTrace();
View Full Code Here

      System.exit(-1);
    }
   
    TreeReaderFactory trf = new ArabicTreeReaderFactory();
    try {
      TreeReader tr = trf.newTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(args[0]), "UTF-8")));
      TreeVisitor visitor = new MWETreeVisitorExternal();
     
      int treeId = 0;
      for (Tree tree; (tree = tr.readTree()) != null; ++treeId) {
        if (tree.value().equals("ROOT")) {
          // Skip over the ROOT tag
          tree = tree.firstChild();
        }
        visitor.visitTree(tree);
        System.out.println(tree.toString());
      }
      tr.close();
     
      System.err.printf("Processed %d trees.%n", treeId);
   
    } catch (UnsupportedEncodingException e) {
      e.printStackTrace();
View Full Code Here

    String treeFile = args[0];

    try {
      BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
      TreeReaderFactory trf = new SpanishTreeReaderFactory();
      TreeReader tr = trf.newTreeReader(br);

      StringBuilder sb = new StringBuilder();
      String nl = System.getProperty("line.separator");

      Pattern nePattern = Pattern.compile("^grup\\.nom\\.");
      Pattern npPattern = Pattern.compile("^np0000.$");

      for (Tree tree; (tree = tr.readTree()) != null;) {
        for(Tree t : tree) {
          if(!t.isPreTerminal())
            continue;

          char type = 'O';
          Tree grandma = t.ancestor(1, tree);
          String grandmaValue = ((CoreLabel) grandma.label()).value();

          // grup.nom.x
          if(nePattern.matcher(grandmaValue).find())
            type = grandmaValue.charAt(9);

          // else check the pos for np0000x or not
          else {
            String pos = ((CoreLabel) t.label()).value();
            if(npPattern.matcher(pos).find())
              type = pos.charAt(6);
          }

          Tree wordNode = t.firstChild();
          String word = ((CoreLabel) wordNode.label()).value();
          sb.append(word).append("\t");
          switch(type) {
          case 'p':
            sb.append("PERS");
            break;
          case 'l':
            sb.append("LUG");
            break;
          case 'o':
            sb.append("ORG");
            break;
          case '0':
            sb.append("OTROS");
            break;
          default:
            sb.append("O");
          }
          sb.append(nl);
        }
        sb.append(nl);
      }
      System.out.print(sb.toString());

      tr.close();
    } catch (UnsupportedEncodingException e) {
      e.printStackTrace();
    } catch (FileNotFoundException e) {
      e.printStackTrace();
    } catch (IOException e) {
View Full Code Here

    File f = new File(args[0]);
    try {

      BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(f), "UTF-8"));
      TreeReaderFactory trf = new ArabicTreeReaderFactory.ArabicRawTreeReaderFactory();
      TreeReader tr = trf.newTreeReader(br);

      int nTrees = 0;
      for(Tree t; (t = tr.readTree()) != null;nTrees++) {
        Tree fixedT = tt.transformTree(t);
        System.out.println(fixedT.toString());
      }

      tr.close();

      System.err.printf("Wrote %d trees%n",nTrees);

    } catch (UnsupportedEncodingException e) {
      e.printStackTrace();
View Full Code Here

public class ThreadedParserSlowITest extends TestCase {
  public static List<Tree> readTrees(String filename, String encoding) {
    ArrayList<Tree> trees = new ArrayList<Tree>();
    try {
      TreeReaderFactory trf = new LabeledScoredTreeReaderFactory();
      TreeReader tr = trf.newTreeReader(new InputStreamReader(
                        new FileInputStream(filename), encoding));
      Tree next;
      while ((next = tr.readTree()) != null) {
        trees.add(next);
      }
      System.out.println("Read " + trees.size() + " trees from " + filename);
      return trees;
    } catch (IOException e) {
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.trees.TreeReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.