Package com.clearnlp.constituent

Examples of com.clearnlp.constituent.CTReader


    fnew.close();
  }
 
  void wc(String inputFile)
  {
    CTReader reader = new CTReader(UTInput.createBufferedFileReader(inputFile));
    CTTree tree;
    int sc, wc;
   
    for (sc=0,wc=0; (tree = reader.nextTree()) != null; sc++)
      wc += tree.getTokens().size();
   
    System.out.println(sc+" "+wc);
  }
View Full Code Here


    System.out.println(sc+" "+wc);
  }
 
  void stripTrees(String[] args)
  {
    CTReader reader = new CTReader(UTInput.createBufferedFileReader(args[0]));
    PrintStream fout = UTOutput.createPrintBufferedFileStream(args[0]+".strip");
    Set<String> set = new HashSet<String>();
    String forms;
    CTTree tree;
    int i;
   
    for (i=0; (tree = reader.nextTree()) != null; i++)
    {
      forms = tree.toForms();
     
      if (!set.contains(forms))
      {
View Full Code Here

    System.out.println(i+" -> "+set.size());
  }
 
  void splitTrees(String[] args)
  {
    CTReader reader = new CTReader(UTInput.createBufferedFileReader(args[0]));
    PrintStream[] fout = new PrintStream[4];
    CTTree tree;
    int i, j;
   
    fout[0] = UTOutput.createPrintBufferedFileStream(args[0]+".trn.parse");
    fout[1] = UTOutput.createPrintBufferedFileStream(args[0]+".trn.raw");
    fout[2] = UTOutput.createPrintBufferedFileStream(args[0]+".tst.parse");
    fout[3] = UTOutput.createPrintBufferedFileStream(args[0]+".tst.raw");
   
    for (i=0; (tree = reader.nextTree()) != null; i++)
    {
      j = (i%6 == 0) ? 2 : 0;
     
      fout[j.println(tree.toString()+"\n");
      fout[j+1].println(tree.toForms());
View Full Code Here

    for (PrintStream f : foutf.close();
  }
 
  void printTreesForCKY(String[] args)
  {
    CTReader reader = new CTReader(UTInput.createBufferedFileReader(args[0]));
    PrintStream fout = UTOutput.createPrintBufferedFileStream(args[1]);
    CTTree tree;
    CTNode root;
    int count;
   
    while ((tree = reader.nextTree()) != null)
    {
      root = tree.getRoot();
     
      if (root.getChildrenSize() == 1)
      {
        count = stripPunct(tree);
       
        if (root.getChildrenSize() > 0 && tree.getTokens().size()-count >= 4 && !containsEmptyCategories(tree) && isCKYTree(root.getChild(0)))
          fout.println(tree+"\n");
      }
    }
   
    reader.close();
    fout.close();
  }
View Full Code Here

    return true;
  }
 
  void traverse(String inputFile)
  {
    CTReader reader = new CTReader(UTInput.createBufferedFileReader(inputFile));
    CTTree tree;
   
    while ((tree = reader.nextTree()) != null)
      traverseAux(tree.getRoot());
  }
View Full Code Here

    }
  }
 
  void checkConstituentTags(String[] args)
  {
    CTReader reader = new CTReader(UTInput.createBufferedFileReader(args[0]));
    Set<String> phrases = new TreeSet<String>();
    Set<String> tokens  = new TreeSet<String>();
    CTTree tree;
   
    while ((tree = reader.nextTree()) != null)
      checkConstituents(tree.getRoot(), phrases, tokens);
   
    reader.close();
   
    for (String s : phrasesSystem.out.println(s);
    System.out.println();
    for (String s : tokens)    System.out.println(s);
  }
View Full Code Here

  void cleanSejong(String[] args)
  {
    String[] ptbFiles = UTFile.getSortedFileList(args[0], "ptb");
    String[] rawFiles = UTFile.getSortedFileList(args[1], "raw");
   
    CTReader  pin = new CTReader();
    TOKReader tin = new TOKReader(0);
   
    int i, size = ptbFiles.length;
    List<String> tokens;
    CTTree tree;
   
    for (i=0; i<size; i++)
    {
      pin.open(UTInput.createBufferedFileReader(ptbFiles[i]));
      tin.open(UTInput.createBufferedFileReader(rawFiles[i]));
      System.out.println(rawFiles[i]);
     
      while ((tree = pin.nextTree()) != null)
      {
        tokens = tin.next();
       
        if (tree.getTokens().size() != tokens.size())
          System.out.println(UTArray.join(tokens, " "));
View Full Code Here

    }
  }
 
  void printTreebank(String[] args)
  {
    CTReader reader = new CTReader(UTInput.createBufferedFileReader(args[0]));
    PrintStream fout = UTOutput.createPrintBufferedFileStream(args[1]);
    CTTree tree;
   
    while ((tree = reader.nextTree()) != null)
      fout.println(tree.toString()+"\n");
   
    reader.close();
    fout.close();
  }
View Full Code Here

  {
    AbstractC2DConverter converter = new KaistC2DConverter(new HeadRuleMap(UTInput.createBufferedFileReader(args[0])));
    String[] inputFiles = UTFile.getSortedFileList(args[1], "ptb");
    String outputFile;
    PrintStream fout;
    CTReader reader;
    DEPTree dTree;
    CTTree cTree;
   
    for (String inputFile : inputFiles)
    {
      outputFile = UTFile.replaceExtension(inputFile, "dep");
      reader = new CTReader(UTInput.createBufferedFileReader(inputFile));
      fout = UTOutput.createPrintBufferedFileStream(outputFile);
      System.out.println(outputFile);
     
      while ((cTree = reader.nextTree()) != null)
      {
        dTree = converter.toDEPTree(cTree);
        fout.println(dTree.toStringDEP()+"\n");
      }
     
      reader.close();
      fout.close();
    }
  }
View Full Code Here

    }
  }
 
  void extractDEP(String[] args)
  {
    CTReader reader = new CTReader(UTInput.createBufferedFileReader(args[0]));
    Set<String> set = new HashSet<String>();
    Pattern delim = Pattern.compile("\\+");
    CTTree tree;
   
    while ((tree = reader.nextTree()) != null)
    {
      extractDEPAux(tree.getRoot(), set, delim);
    }

    List<String> list = new ArrayList<String>(set);
View Full Code Here

TOP

Related Classes of com.clearnlp.constituent.CTReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.