Package com.clearnlp.constituent

Examples of com.clearnlp.constituent.CTReader


    }
  }
 
  void extractPos(String[] args)
  {
    CTReader reader = new CTReader(UTInput.createBufferedFileReader(args[0]));
    Set<String> set = new HashSet<String>();
    Pattern delim = Pattern.compile("\\+");
    CTTree tree;
   
    while ((tree = reader.nextTree()) != null)
    {
      for (CTNode node : tree.getTokens())
        for (String pos : delim.split(node.pTag))
          set.add(pos);
    }
View Full Code Here


    fnew.close();
  }
 
  void wc(String inputFile)
  {
    CTReader reader = new CTReader(UTInput.createBufferedFileReader(inputFile));
    CTTree tree;
    int sc, wc;
   
    for (sc=0,wc=0; (tree = reader.nextTree()) != null; sc++)
      wc += tree.getTokens().size();
   
    System.out.println(sc+" "+wc);
  }
View Full Code Here

    System.out.println(sc+" "+wc);
  }
 
  void stripTrees(String[] args)
  {
    CTReader reader = new CTReader(UTInput.createBufferedFileReader(args[0]));
    PrintStream fout = UTOutput.createPrintBufferedFileStream(args[0]+".strip");
    Set<String> set = new HashSet<String>();
    String forms;
    CTTree tree;
    int i;
   
    for (i=0; (tree = reader.nextTree()) != null; i++)
    {
      forms = tree.toForms();
     
      if (!set.contains(forms))
      {
View Full Code Here

    System.out.println(i+" -> "+set.size());
  }
 
  void splitTrees(String[] args)
  {
    CTReader reader = new CTReader(UTInput.createBufferedFileReader(args[0]));
    PrintStream[] fout = new PrintStream[4];
    CTTree tree;
    int i, j;
   
    fout[0] = UTOutput.createPrintBufferedFileStream(args[0]+".trn.parse");
    fout[1] = UTOutput.createPrintBufferedFileStream(args[0]+".trn.raw");
    fout[2] = UTOutput.createPrintBufferedFileStream(args[0]+".tst.parse");
    fout[3] = UTOutput.createPrintBufferedFileStream(args[0]+".tst.raw");
   
    for (i=0; (tree = reader.nextTree()) != null; i++)
    {
      j = (i%6 == 0) ? 2 : 0;
     
      fout[j.println(tree.toString()+"\n");
      fout[j+1].println(tree.toForms());
View Full Code Here

    for (PrintStream f : foutf.close();
  }
 
  void printTreesForCKY(String[] args)
  {
    CTReader reader = new CTReader(UTInput.createBufferedFileReader(args[0]));
    PrintStream fout = UTOutput.createPrintBufferedFileStream(args[1]);
    CTTree tree;
    CTNode root;
    int count;
   
    while ((tree = reader.nextTree()) != null)
    {
      root = tree.getRoot();
     
      if (root.getChildrenSize() == 1)
      {
        count = stripPunct(tree);
       
        if (root.getChildrenSize() > 0 && tree.getTokens().size()-count >= 4 && !containsEmptyCategories(tree) && isCKYTree(root.getChild(0)))
          fout.println(tree+"\n");
      }
    }
   
    reader.close();
    fout.close();
  }
View Full Code Here

    return true;
  }
 
  void traverse(String inputFile)
  {
    CTReader reader = new CTReader(UTInput.createBufferedFileReader(inputFile));
    CTTree tree;
   
    while ((tree = reader.nextTree()) != null)
      traverseAux(tree.getRoot());
  }
View Full Code Here

   * @param outputDir the directory to save normalized tree files.
   * @param extension the tree file extension (e.g., {@code parse}).
   */
  public void run(String inputDir, String outputDir, String extension)
  {
    CTReader      reader;
    CTTree        tree;
    PrintStream   fout;
    StringBuilder build;
   
    File dir = new File(outputDir);
    if (!dir.exists())  dir.mkdirs();
   
    inputDir  += File.separator;
    outputDir += File.separator;
   
    for (String filename : new File(inputDir).list(new FileExtFilter(extension)))
    {
      reader = new CTReader(UTInput.createBufferedFileReader(inputDir + filename));
      fout   = UTOutput.createPrintBufferedFileStream(outputDir + filename);
     
      while ((tree = reader.nextTree()) != null)
      {
      /*  build = new StringBuilder();
       
        for (CTNode node : tree.getTokens())
        {
          build.append(" ");
          build.append(node.form);
          build.append("/");
          build.append(node.pTag);
        }
       
        fout.println(build.substring(1));*/
       
        for (CTNode node : tree.getTokens())
        {
          build = new StringBuilder();
         
          build.append(node.form);
          build.append(AbstractColumnReader.DELIM_COLUMN);
          build.append(node.pTag);
         
          fout.println(build.toString());
        }
       
        fout.println();
      }
     
      reader.close();
      fout.close();
    }
  }
View Full Code Here

   * @param inputFile the name of the file containing input trees.
   * @param outputFile the name of the file to contain output trees.
   */
  public void print(String inputFile, String outputFile)
  {
    CTReader    reader = new CTReader(UTInput.createBufferedFileReader(inputFile));
    PrintStream fout   = UTOutput.createPrintBufferedFileStream(outputFile);
    CTTree      tree;

    while ((tree = reader.nextTree()) != null)
      fout.println(tree.toStringLine());
   
    fout.close();
  }
View Full Code Here

    }
  }
 
  public void normalize(String inputFile, String outputFile)
  {
    CTReader reader = new CTReader(UTInput.createBufferedFileReader(inputFile));
    PrintStream fout = UTOutput.createPrintBufferedFileStream(outputFile);
    CTTree tree;
   
    while ((tree = reader.nextTree()) != null)
    {
      CTLib.normalizeIndices(tree);
      fout.println(tree.toString()+"\n");
    }
   
    reader.close();
    fout.close();
  }
View Full Code Here

    Set<String> ids = UTInput.getStringSet(new FileInputStream(args[0]));
    ObjectIntHashMap<String> scm = new ObjectIntHashMap<>();
    ObjectIntHashMap<String> wcm = new ObjectIntHashMap<>();
    Pattern p = Pattern.compile("/");
    String root = args[1];
    CTReader reader;
    String genre;
    CTTree tree;
    int sc, wc;
   
    for (String id : ids)
    {
      reader = new CTReader(UTInput.createBufferedFileReader(root+"/"+id+".parse"));
      genre  = p.split(id)[0];
      sc = wc = 0;
     
      while ((tree = reader.nextTree()) != null)
      {
        wc += tree.getTokens().size();
        sc ++;
      }
     
View Full Code Here

TOP

Related Classes of com.clearnlp.constituent.CTReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.