Package com.clearnlp.constituent

Examples of com.clearnlp.constituent.CTReader


  {
    String MY_DIR = args[0];
    String O5_DIR = args[1];
   
    File root = new File(MY_DIR);
    CTReader myReader, o5Reader;
    String myPath, o5Path;
    CTTree myTree, o5Tree;
   
    for (File genre : root.listFiles())
    {
      if (!genre.isDirectory()) continue;
     
      for (File source : genre.listFiles())
      {
        if (!source.isDirectory()) continue;
       
        for (File section : source.listFiles())
        {
          if (!section.isDirectory())  continue;
         
          for (File myParse : section.listFiles(new FileExtFilter("parse")))
          {
            myPath = myParse.getPath();
            o5Path = O5_DIR + myPath.substring(MY_DIR.length());

            myReader = new CTReader(UTInput.createBufferedFileReader(myPath));
            o5Reader = new CTReader(UTInput.createBufferedFileReader(o5Path));
           
            while ((myTree = myReader.nextTree()) != null)
            {
              o5Tree = o5Reader.nextTree();
//             
              if (!myTree.compareBrackets(o5Tree))
              {
View Full Code Here


    }
  }
 
  void checkConstituentTags(String[] args)
  {
    CTReader reader = new CTReader(UTInput.createBufferedFileReader(args[0]));
    Set<String> phrases = new TreeSet<String>();
    Set<String> tokens  = new TreeSet<String>();
    CTTree tree;
   
    while ((tree = reader.nextTree()) != null)
      checkConstituents(tree.getRoot(), phrases, tokens);
   
    reader.close();
   
    for (String s : phrasesSystem.out.println(s);
    System.out.println();
    for (String s : tokens)    System.out.println(s);
  }
View Full Code Here

  void cleanSejong(String[] args)
  {
    String[] ptbFiles = UTFile.getSortedFileList(args[0], "ptb");
    String[] rawFiles = UTFile.getSortedFileList(args[1], "raw");
   
    CTReader  pin = new CTReader();
    TOKReader tin = new TOKReader(0);
   
    int i, size = ptbFiles.length;
    List<String> tokens;
    CTTree tree;
   
    for (i=0; i<size; i++)
    {
      pin.open(UTInput.createBufferedFileReader(ptbFiles[i]));
      tin.open(UTInput.createBufferedFileReader(rawFiles[i]));
      System.out.println(rawFiles[i]);
     
      while ((tree = pin.nextTree()) != null)
      {
        tokens = tin.next();
       
        if (tree.getTokens().size() != tokens.size())
          System.out.println(UTArray.join(tokens, " "));
View Full Code Here

    }
  }
 
  void printTreebank(String[] args)
  {
    CTReader reader = new CTReader(UTInput.createBufferedFileReader(args[0]));
    PrintStream fout = UTOutput.createPrintBufferedFileStream(args[1]);
    CTTree tree;
   
    while ((tree = reader.nextTree()) != null)
      fout.println(tree.toString()+"\n");
   
    reader.close();
    fout.close();
  }
View Full Code Here

  {
    AbstractC2DConverter converter = new KaistC2DConverter(new HeadRuleMap(UTInput.createBufferedFileReader(args[0])));
    String[] inputFiles = UTFile.getSortedFileList(args[1], "ptb");
    String outputFile;
    PrintStream fout;
    CTReader reader;
    DEPTree dTree;
    CTTree cTree;
   
    for (String inputFile : inputFiles)
    {
      outputFile = UTFile.replaceExtension(inputFile, "dep");
      reader = new CTReader(UTInput.createBufferedFileReader(inputFile));
      fout = UTOutput.createPrintBufferedFileStream(outputFile);
      System.out.println(outputFile);
     
      while ((cTree = reader.nextTree()) != null)
      {
        dTree = converter.toDEPTree(cTree);
        fout.println(dTree.toStringDEP()+"\n");
      }
     
      reader.close();
      fout.close();
    }
  }
View Full Code Here

    }
  }
 
  void extractDEP(String[] args)
  {
    CTReader reader = new CTReader(UTInput.createBufferedFileReader(args[0]));
    Set<String> set = new HashSet<String>();
    Pattern delim = Pattern.compile("\\+");
    CTTree tree;
   
    while ((tree = reader.nextTree()) != null)
    {
      extractDEPAux(tree.getRoot(), set, delim);
    }

    List<String> list = new ArrayList<String>(set);
View Full Code Here

    }
  }
 
  void extractPos(String[] args)
  {
    CTReader reader = new CTReader(UTInput.createBufferedFileReader(args[0]));
    Set<String> set = new HashSet<String>();
    Pattern delim = Pattern.compile("\\+");
    CTTree tree;
   
    while ((tree = reader.nextTree()) != null)
    {
      for (CTNode node : tree.getTokens())
        for (String pos : delim.split(node.pTag))
          set.add(pos);
    }
View Full Code Here

    fnew.close();
  }
 
  void wc(String inputFile)
  {
    CTReader reader = new CTReader(UTInput.createBufferedFileReader(inputFile));
    CTTree tree;
    int sc, wc;
   
    for (sc=0,wc=0; (tree = reader.nextTree()) != null; sc++)
      wc += tree.getTokens().size();
   
    System.out.println(sc+" "+wc);
  }
View Full Code Here

    System.out.println(sc+" "+wc);
  }
 
  void stripTrees(String[] args)
  {
    CTReader reader = new CTReader(UTInput.createBufferedFileReader(args[0]));
    PrintStream fout = UTOutput.createPrintBufferedFileStream(args[0]+".strip");
    Set<String> set = new HashSet<String>();
    String forms;
    CTTree tree;
    int i;
   
    for (i=0; (tree = reader.nextTree()) != null; i++)
    {
      forms = tree.toForms();
     
      if (!set.contains(forms))
      {
View Full Code Here

    System.out.println(i+" -> "+set.size());
  }
 
  void splitTrees(String[] args)
  {
    CTReader reader = new CTReader(UTInput.createBufferedFileReader(args[0]));
    PrintStream[] fout = new PrintStream[4];
    CTTree tree;
    int i, j;
   
    fout[0] = UTOutput.createPrintBufferedFileStream(args[0]+".trn.parse");
    fout[1] = UTOutput.createPrintBufferedFileStream(args[0]+".trn.raw");
    fout[2] = UTOutput.createPrintBufferedFileStream(args[0]+".tst.parse");
    fout[3] = UTOutput.createPrintBufferedFileStream(args[0]+".tst.raw");
   
    for (i=0; (tree = reader.nextTree()) != null; i++)
    {
      j = (i%6 == 0) ? 2 : 0;
     
      fout[j.println(tree.toString()+"\n");
      fout[j+1].println(tree.toForms());
View Full Code Here

TOP

Related Classes of com.clearnlp.constituent.CTReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.