Package opennlp.ccg

Source Code of opennlp.ccg.Parse

///////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2010 Michael White
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
//////////////////////////////////////////////////////////////////////////////

package opennlp.ccg;

import java.io.*;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;

import org.jdom.Document;
import org.jdom.Element;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;

import opennlp.ccg.grammar.Grammar;
import opennlp.ccg.hylo.HyloHelper;
import opennlp.ccg.hylo.Nominal;
import opennlp.ccg.lexicon.Tokenizer;
import opennlp.ccg.parse.ParseException;
import opennlp.ccg.parse.Parser;
import opennlp.ccg.parse.Supertagger;
import opennlp.ccg.parse.supertagger.WordAndPOSDictionaryLabellingStrategy;
import opennlp.ccg.synsem.Category;
import opennlp.ccg.synsem.LF;
import opennlp.ccg.synsem.Sign;
import opennlp.ccg.synsem.SignScorer;
import opennlp.ccg.test.RegressionInfo;
import opennlp.ccgbank.extract.Testbed;

/**
* Creates a testbed file by parsing a text file.
* Text is assumed to be tokenized, with one sentence per line.
*
* @author      Michael White
* @version     $Revision: 1.2 $, $Date: 2010/10/28 02:46:32 $
*/
public class Parse {

  public static void main(String[] args) throws IOException {
   
        String usage = "Usage: java opennlp.ccg.Parse \n" +
          "  (-g <grammarfile>) \n" +
          "  -parsescorer <scorerclass> \n" +
          "  (-supertagger <supertaggerclass> | -stconfig <configfile>) \n" +
          "  (-nbestListSize <nbestListSize>) \n" +
          "  <inputfile> <outputfile>";
       
        if (args.length == 0 || args[0].equals("-h")) {
            System.out.println(usage);
            System.exit(0);
        }
       
        // args
        String grammarfile = "grammar.xml";
        String inputfile = null;
        String outputfile = null;
        String parseScorerClass = null;
        String supertaggerClass = null, stconfig = null;
  int nbestListSize = 1;
        for (int i = 0; i < args.length; i++) {
            if (args[i].equals("-g")) { grammarfile = args[++i]; continue; }
            if (args[i].equals("-parsescorer")) { parseScorerClass = args[++i]; continue; }
            if (args[i].equals("-supertagger")) { supertaggerClass = args[++i]; continue; }
            if (args[i].equals("-stconfig")) { stconfig = args[++i]; continue; }
      if (args[i].equals("-nbestListSize")) { nbestListSize = Integer.parseInt(args[++i]); continue; }
            if (inputfile == null) { inputfile = args[i]; continue; }
            outputfile = args[i];
        }
  if (nbestListSize < 1) nbestListSize = 1;

        if (inputfile == null || outputfile == null ||
          parseScorerClass == null || (supertaggerClass == null && stconfig == null))
        {
            System.out.println(usage);
            System.exit(0);
        }
       
    // make test doc, sign map
    Document outDoc = new Document();
    Element outRoot = new Element("regression");
    outDoc.setRootElement(outRoot);
    Map<String,Sign> signMap = new HashMap<String,Sign>();

        // load grammar
        URL grammarURL = new File(grammarfile).toURI().toURL();
        System.out.println("Loading grammar from URL: " + grammarURL);
        Grammar grammar = new Grammar(grammarURL);
        Tokenizer tokenizer = grammar.lexicon.tokenizer;
        System.out.println();
       
        // set up parser
        Parser parser = new Parser(grammar);
        // instantiate scorer
        try {
            System.out.println("Instantiating parsing sign scorer from class: " + parseScorerClass);
            SignScorer parseScorer = (SignScorer) Class.forName(parseScorerClass).newInstance();
            parser.setSignScorer(parseScorer);
            System.out.println();
        } catch (Exception exc) {
            throw (RuntimeException) new RuntimeException().initCause(exc);
        }
        // instantiate supertagger
        try {
          Supertagger supertagger;
          if (supertaggerClass != null) {
                System.out.println("Instantiating supertagger from class: " + supertaggerClass);
                supertagger = (Supertagger) Class.forName(supertaggerClass).newInstance();
          }
          else {
            System.out.println("Instantiating supertagger from config file: " + stconfig);
            supertagger = WordAndPOSDictionaryLabellingStrategy.supertaggerFactory(stconfig);
          }
            parser.setSupertagger(supertagger);
            System.out.println();
        } catch (Exception exc) {
            throw (RuntimeException) new RuntimeException().initCause(exc);
        }
       
        // loop through input
        BufferedReader in = new BufferedReader(new FileReader(inputfile));
        String line;
        Map<String,String> predInfoMap = new HashMap<String,String>();
        System.out.println("Parsing " + inputfile);
        System.out.println();
        int count = 1;
        while ((line = in.readLine()) != null) {
          String id = "s" + count;
          try {
            // parse it
            System.out.println(line);
      parser.parse(line);
      int numParses = Math.min(nbestListSize, parser.getResult().size());
      for (int i=0; i < numParses; i++) {
          Sign thisParse = parser.getResult().get(i);
          // convert lf
          Category cat = thisParse.getCategory();
          LF convertedLF = null;
          String predInfo = null;
          if (cat.getLF() != null) {
        // convert LF
        LF flatLF = cat.getLF();
        cat = cat.copy();
        Nominal index = cat.getIndexNominal();
        convertedLF = HyloHelper.compactAndConvertNominals(flatLF, index, thisParse);
        // get pred info
        predInfoMap.clear();
        Testbed.extractPredInfo(flatLF, predInfoMap);
        predInfo = Testbed.getPredInfo(predInfoMap);
          }
          // add test item, sign
          Element item = RegressionInfo.makeTestItem(grammar, line, 1, convertedLF);
          String actualID = (nbestListSize == 1) ? id : id + "-" + (i+1);
          item.setAttribute("info", actualID);
          outRoot.addContent(item);
          signMap.put(actualID, thisParse);
          // Add parsed words as a separate LF element
          Element fullWordsElt = new Element("full-words");
          fullWordsElt.addContent(tokenizer.format(thisParse.getWords()));
          item.addContent(fullWordsElt);
          if (predInfo != null) {
        Element predInfoElt = new Element("pred-info");
        predInfoElt.setAttribute("data", predInfo);
        item.addContent(predInfoElt);
          }
      }
    } catch (ParseException e) {
        System.out.println("Unable to parse!");
        // add test item with zero parses
        Element item = RegressionInfo.makeTestItem(grammar, line, 0, null);
        item.setAttribute("info", id);
        outRoot.addContent(item);
    }
    count++;
        }
        System.out.println();
       
    // write test doc, saved signs
        System.out.println("Writing parses to " + outputfile);
    XMLOutputter outputter = new XMLOutputter(Format.getPrettyFormat());
    File regressionFile = new File(outputfile);
    outputter.output(outDoc, new FileOutputStream(regressionFile));
    RegressionInfo.writeSerFile(signMap, regressionFile);
        System.out.println();
   
        // done
        in.close();
        System.out.println("Done.");
  }
}
TOP

Related Classes of opennlp.ccg.Parse

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.