Package edu.ucla.sspace.tools

Source Code of edu.ucla.sspace.tools.PUkWacSentenceStripper

/*
* Copyright (c) 2010, Lawrence Livermore National Security, LLC. Produced at
* the Lawrence Livermore National Laboratory. Written by Keith Stevens,
* kstevens@cs.ucla.edu OCEC-10-073 All rights reserved.
*
* This file is part of the C-Cat package and is covered under the terms and
* conditions therein.
*
* The S-Space package is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation and distributed hereunder to you.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
* EXPRESS OR IMPLIED ARE MADE.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
* NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
* PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
* WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
* RIGHTS.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/


package edu.ucla.sspace.tools;


import edu.ucla.sspace.dependency.DependencyExtractor;
import edu.ucla.sspace.dependency.CoNLLDependencyExtractor;
import edu.ucla.sspace.dependency.DependencyTreeNode;

import edu.ucla.sspace.text.Document;
import edu.ucla.sspace.text.UkWacDependencyFileIterator;

import java.util.Iterator;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintWriter;


/**
* @author Keith Stevens
*/
public class PUkWacSentenceStripper {

    public static void main(String[] args) throws IOException {
        Iterator<Document> ukWacIter = new UkWacDependencyFileIterator(args[0]);

        PrintWriter writer = null;
        try {
          writer = new PrintWriter(args[1]);
          StringBuilder builder = new StringBuilder();
          DependencyExtractor extractor = new CoNLLDependencyExtractor();
          while (ukWacIter.hasNext()) {
              BufferedReader doc = ukWacIter.next().reader();
              for (DependencyTreeNode[] tree = null;
                   (tree = extractor.readNextTree(doc)) != null; ) {
                  for (DependencyTreeNode node : tree)
                      builder.append(node.word()).append(" ");
              }
              writer.println(builder.toString());
              builder = new StringBuilder();
          }
        } finally {
          if (writer != null) writer.close();
        }
    }
}
TOP

Related Classes of edu.ucla.sspace.tools.PUkWacSentenceStripper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.