Package opennlp.ccgbank.extract

Source Code of opennlp.ccgbank.extract.MorphExtract

///////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
//////////////////////////////////////////////////////////////////////////////

//Program which takes in the /tmp/temp.xml file generated and forms a morph.xml file

package opennlp.ccgbank.extract;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;

import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.sax.SAXResult;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.sax.SAXTransformerFactory;

import opennlp.ccgbank.extract.ExtractGrammar.ExtractionProperties;

import org.apache.xml.serializer.OutputPropertiesFactory;
import org.apache.xml.serializer.Serializer;
import org.apache.xml.serializer.SerializerFactory;
import org.jdom.JDOMException;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLFilter;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;

public class MorphExtract {

  public static void extractMorph(ExtractionProperties extractProps)
      throws TransformerException, TransformerConfigurationException,
      SAXException, IOException, JDOMException {

    System.out.println("Extracting morph:");
    System.out.println("Generating morph.xml");

    TransformerFactory tFactory = TransformerFactory.newInstance();

    File morphFile = new File(new File(extractProps.destDir), "morph.xml");
    File tempFile = new File(new File(extractProps.tempDir), "temp.xml");

    if (tFactory.getFeature(SAXSource.FEATURE)
        && tFactory.getFeature(SAXResult.FEATURE)) {

      SAXTransformerFactory saxTFactory = ((SAXTransformerFactory) tFactory);

      ArrayList<XMLFilter> filterChain = new ArrayList<XMLFilter>();
      ArrayList<String> xslChain = new ArrayList<String>();

      if (extractProps.macroSpecs.length() > 0) {

      }

      addTransforms(xslChain, extractProps.macroSpecs);

      for (String xslFile : xslChain)
        filterChain.add(saxTFactory.newXMLFilter(ExtractGrammar
            .getSource(xslFile)));
      // Create an XMLReader and set first xsl transform to that.
      XMLReader reader = XMLReaderFactory.createXMLReader();
      XMLFilter xmlFilter0 = filterChain.get(0);
      xmlFilter0.setParent(reader);

      //Create chain of xsl transforms
      // Create an XMLFilter for each stylesheet.
      for (int i = 1; i < filterChain.size(); i++) {
        XMLFilter xmlFilterPrev = filterChain.get(i - 1);
        XMLFilter xmlFilterCurr = filterChain.get(i);
        xmlFilterCurr.setParent(xmlFilterPrev);
      }

      XMLFilter xmlFilter = filterChain.get(filterChain.size() - 1);

      java.util.Properties xmlProps = OutputPropertiesFactory
          .getDefaultMethodProperties("xml");
      xmlProps.setProperty("indent", "yes");
      xmlProps.setProperty("standalone", "no");
      xmlProps.setProperty("{http://xml.apache.org/xalan}indent-amount",
          "2");
      Serializer serializer = SerializerFactory.getSerializer(xmlProps);
      serializer.setOutputStream(new FileOutputStream(morphFile));
      //XMLFilter xmlFilter = xmlFilter2;
      //XMLFilter xmlFilter = xmlFilter3;

      xmlFilter.setContentHandler(serializer.asContentHandler());
      xmlFilter.parse(new InputSource(tempFile.getPath()));
    }

    //Deleting the temporary lex file
    //tempFile.delete();
  }

  public static void addTransforms(ArrayList<String> xslChain, String macroSpecs) {

    xslChain.add("opennlp.ccgbank/transform/morphExtr.xsl");

    if (macroSpecs.length() == 0)
      xslChain.add("opennlp.ccgbank/transform/macroInsert.xsl");

    if (macroSpecs.contains("agr")) {
      System.out
          .println("Inserting a macro to check agreement in the copula");
      xslChain.add("opennlp.ccgbank/transform/agr-macroInsert.xsl");
    }

    if (macroSpecs.contains("anim")) {
      System.out
          .println("Inserting a macro to check animacy constraints");
      xslChain.add("opennlp.ccgbank/transform/anim-macroInsert.xsl");
    }
  }
}
TOP

Related Classes of opennlp.ccgbank.extract.MorphExtract

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.