Package edu.stanford.nlp.trees

Examples of edu.stanford.nlp.trees.TreeReader


     * @see edu.jhu.hltcoe.sp.data.depparse.AgigaSentence#getStanfordContituencyTree()
     */
    public Tree getStanfordContituencyTree() {
        TreeFactory tf = new LabeledScoredTreeFactory();
        StringReader r = new StringReader(getParseText());
        TreeReader tr = new PennTreeReader(r, tf);
        try {
            return tr.readTree();
        } catch (IOException e) {
            throw new RuntimeException("Error: IOException should not be thrown by StringReader");
        }
    }
View Full Code Here


    Set<String> uniquePOSSequences = Generics.newHashSet();
   
    try {
      BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
      TreeReaderFactory trf = new FrenchTreeReaderFactory();
      TreeReader tr = trf.newTreeReader(br);

      final TregexPattern pMWE = TregexPattern.compile("/^MW/");
      for(Tree t; (t = tr.readTree()) != null;) {
        //Count MWE statistics
        TregexMatcher m = pMWE.matcher(t);
        while(m.findNextMatchingNode()) {
          Tree match = m.getMatch();
          String label = match.value();
          List<CoreLabel> yield = match.taggedLabeledYield();
          StringBuilder termYield = new StringBuilder();
          StringBuilder posYield = new StringBuilder();
          for(CoreLabel cl : yield) {
            termYield.append(cl.word()).append(" ");
            posYield.append(cl.tag()).append(" ");
          }
          mweLabelToString.incrementCount(label, termYield.toString().trim());
          uniquePOSSequences.add(posYield.toString().trim());
        }
      }
      tr.close(); //Closes the underlying reader
     
      System.out.printf("Type\t#Type\t#Single\t%%Single\t%%Total%n");
     
      double nMWEs = mweLabelToString.totalCount();
      int nAllSingletons = 0;
View Full Code Here

    String inputFile = args[0];
    File treeFile = new File(inputFile);
    try {
      TreeReaderFactory trf = new HebrewTreeReaderFactory();
      BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), tlp.getEncoding()));
      TreeReader tr = trf.newTreeReader(br);

      PrintWriter pwDev = new PrintWriter(new PrintStream(new FileOutputStream(inputFile + ".clean.dev"),false,tlp.getEncoding()));
      PrintWriter pwTrain = new PrintWriter(new PrintStream(new FileOutputStream(inputFile + ".clean.train"),false,tlp.getEncoding()));
      PrintWriter pwTest = new PrintWriter(new PrintStream(new FileOutputStream(inputFile + ".clean.test"),false,tlp.getEncoding()));

      int numTrees = 0;
      for(Tree t; ((t = tr.readTree()) != null); numTrees++) {
        if(numTrees < 483)
          pwDev.println(t.toString());
        else if(numTrees >= 483 && numTrees < 5724)
          pwTrain.println(t.toString());
        else
          pwTest.println(t.toString());
      }

      tr.close();
      pwDev.close();
      pwTrain.close();
      pwTest.close();

      System.err.printf("Processed %d trees.%n",numTrees);
View Full Code Here

    TreebankLanguagePack tlp = new HebrewTreebankLanguagePack();
    File treeFile = new File(args[0]);
    try {
      TreeReaderFactory trf = new HebrewTreeReaderFactory();
      BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), tlp.getEncoding()));
      TreeReader tr = trf.newTreeReader(br);

      int numTrees = 0;
      for(Tree t; ((t = tr.readTree()) != null); numTrees++)
        System.out.println(t.toString());

      tr.close();
      System.err.printf("Processed %d trees.%n",numTrees);

    } catch (UnsupportedEncodingException e) {
      e.printStackTrace();
    } catch (FileNotFoundException e) {
View Full Code Here

      TwoDimensionalCounter<String, String> unigramTagger) {

    try {
      BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
      TreeReaderFactory trf = new FrenchTreeReaderFactory();
      TreeReader tr = trf.newTreeReader(br);

      PrintWriter pw = new PrintWriter(new PrintStream(new FileOutputStream(new File(treeFile + ".fixed")),false,"UTF-8"));

      int nTrees = 0;
      for(Tree t; (t = tr.readTree()) != null;nTrees++) {
        traverseAndFix(t, pretermLabel, unigramTagger);
        pw.println(t.toString());
      }
      pw.close();
      tr.close();

      System.out.println("Processed " +nTrees+ " trees");

    } catch (UnsupportedEncodingException e) {
      e.printStackTrace();
View Full Code Here

      new TwoDimensionalCounter<String,String>();

    try {
      BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
      TreeReaderFactory trf = new FrenchTreeReaderFactory();
      TreeReader tr = trf.newTreeReader(br);

      for(Tree t; (t = tr.readTree()) != null;) {
        countMWEStatistics(t, unigramTagger,
                           labelPreterm, pretermLabel, labelTerm, termLabel);
      }
      tr.close(); //Closes the underlying reader

      System.out.println("Generating {MWE Type -> Terminal}");
      printCounter(labelTerm, "label_term.csv");

      System.out.println("Generating {Terminal -> MWE Type}");
View Full Code Here

    Tree t;
    for (File file : fileList) {
      Reader in =
        new BufferedReader(new InputStreamReader(new FileInputStream(file),
                                                 SpanishTreebankLanguagePack.STB_ENCODING));
      TreeReader tr = trf.newTreeReader(in);

      // Tree reading will implicitly perform tree normalization for us
      while ((t = tr.readTree()) != null) {
        // Update tagger with this tree
        List<CoreLabel> yield = t.taggedLabeledYield();
        for (CoreLabel leafLabel : yield) {
          if (leafLabel.tag().equals(SpanishTreeNormalizer.MW_TAG))
            continue;
View Full Code Here

    this.noNormalization = noNormalization;
    this.retainPPClr = retainPPClr;
  }

  public TreeReader newTreeReader(Reader in) {
    TreeReader tr = null;
    if(noNormalization) {
      tr = new PennTreeReader(in, new LabeledScoredTreeFactory(), new TreeNormalizer(), new ArabicTreebankTokenizer(in));
    } else
      tr = new PennTreeReader(in, new LabeledScoredTreeFactory(), new ArabicTreeNormalizer(retainNPTmp,retainPRD,changeNoLabels, retainNPSbj, retainPPClr), new ArabicTreebankTokenizer(in));
View Full Code Here

    for (final File file : fileList) {
      pool.execute(new Runnable() {
          public void run() {
            try {
              Reader in = new BufferedReader(new InputStreamReader(new FileInputStream(file), "ISO-8859-1"));
              TreeReader tr = trf.newTreeReader(file.getPath(), in);
              process(file, tr, posPattern, wordPattern, plainPrint);
              tr.close();
            } catch (FileNotFoundException e) {
              e.printStackTrace();
            } catch (IOException e) {
              e.printStackTrace();
            }
View Full Code Here

    TreebankLanguagePack tlp = new NegraPennLanguagePack();
    TreeReaderFactory trf = new NegraPennTreeReaderFactory(2,false,false,tlp);

    try {
      TreeReader tr = trf.newTreeReader(IOUtils.readerFromString(args[0], tlp.getEncoding()));

      for (Tree t; (t = tr.readTree()) != null; ) {
        t.pennPrint();
      }

      tr.close();

    } catch (UnsupportedEncodingException e) {
      e.printStackTrace();
    } catch (FileNotFoundException e) {
      e.printStackTrace();
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.trees.TreeReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.