Package opennlp.ccg.disjunctivizer

Source Code of opennlp.ccg.disjunctivizer.DisjunctivizerTest

package opennlp.ccg.disjunctivizer;

import static org.junit.Assert.*;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.util.LinkedHashSet;
import java.util.Properties;
import java.util.Set;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import opennlp.ccg.alignment.Alignment;
import opennlp.ccg.alignment.Alignments;
import opennlp.ccg.alignment.Mapping;
import opennlp.ccg.alignment.Phrase;
import opennlp.ccg.alignment.PhrasePosition;
import opennlp.ccg.alignment.Status;
import opennlp.ccg.hylo.graph.LFGraphFactory;
import opennlp.ccg.hylo.graph.LFBaseTest;

import org.apache.xml.serializer.OutputPropertiesFactory;
import org.apache.xml.serializer.Serializer;
import org.apache.xml.serializer.SerializerFactory;
import org.jdom.input.DOMBuilder;
import org.junit.Before;
import org.junit.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

public class DisjunctivizerTest extends LFBaseTest {

  static Properties OUTPUT_PROPERTIES = OutputPropertiesFactory.getDefaultMethodProperties("xml");
 
  static {
    OUTPUT_PROPERTIES.setProperty("indent", "yes");
    OUTPUT_PROPERTIES.setProperty("media-type", "text/xml");
    OUTPUT_PROPERTIES.setProperty(OutputPropertiesFactory.S_KEY_INDENT_AMOUNT, "2");
    OUTPUT_PROPERTIES.setProperty("{http\u003a//xml.apache.org/xalan}indent-amount", "2");
  }
 
  DocumentBuilder documentBuilder;
  DOMBuilder domBuilder;
  File alignmentsFile, paraphrasesFile, outputFile;
 
  @Before
  public void setUp() throws Exception {
    super.setUp();
   
    try {
      documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
    }
    catch(ParserConfigurationException e) {
      throw new Exception("problem with parser configuration: " + e.getLocalizedMessage(), e);
    }
   
    domBuilder = new DOMBuilder();
   
    File testDir = new File(System.getProperty("user.dir"), "test");
   
    paraphrasesFile = new File(testDir, "paraphrases.xml");
    outputFile = new File(testDir, "output.xml");
  }
 
  @Test
  public void testDisjunctivizer() {
    try {
      new Disjunctivizer(null);
      fail("able to create disjunctivizer with null document");
    }
    catch(IllegalArgumentException expected) {
      // do nothing
    }
  }

  @Test
  public void testBuildDisjunctiveLF() throws Exception {
    Document paraphrases = documentBuilder.parse(paraphrasesFile);
   
    Serializer s = SerializerFactory.getSerializer(OUTPUT_PROPERTIES);
    s.setOutputFormat(OUTPUT_PROPERTIES);
    s.setWriter(new BufferedWriter(new FileWriter(outputFile)));
   
    Disjunctivizer disj = null;
    NodeList paras = paraphrases.getElementsByTagName("paraphrase");
    Document out = documentBuilder.newDocument();
    Element dlfsElement = out.createElement("dlfs");
    out.appendChild(dlfsElement);
   
    for(int i = 0; i < paras.getLength(); i++) {
      Element para = (Element)paras.item(i);
      Integer id = Integer.parseInt(para.getAttribute("id"));
     
      Element first = (Element)para.getElementsByTagName("first").item(0),
          second = (Element)para.getElementsByTagName("second").item(0);
     
      Set<Mapping> ms = new LinkedHashSet<Mapping>();
      NodeList als = para.getElementsByTagName("alignments");
      for(int j = 0; j < als.getLength(); j++) {
        Element al = (Element)als.item(j);
        if(al.getAttribute("source").equals("ANNOTATOR")) {
          NodeList as = al.getElementsByTagName("alignment");
          for(int k = 0; k < as.getLength(); k++) {
            Element a = (Element)as.item(k);
            ms.add(new Mapping(id, Integer.parseInt(a.getAttribute("first")),
                Integer.parseInt(a.getAttribute("second")),
                    Status.forAbbreviation(a.getAttribute("status"))));
          }
        }
      }
     
      Alignment a = new Alignment(new Phrase(id,
            Alignments.tokenize(first.getElementsByTagName("string").item(0).getTextContent())),
              new Phrase(id,
                Alignments.tokenize(second.getElementsByTagName("string").item(0).getTextContent())),
            ms);
     
      Element firstLF = (Element)first.getElementsByTagName("lf").item(0),
          secondLF = (Element)second.getElementsByTagName("lf").item(0);
     
      LFGraphDifference diff = (firstLF != null && secondLF != null)
          ? new LFGraphDifference(LFGraphFactory.newGraphFrom(firstLF),
              LFGraphFactory.newGraphFrom(secondLF), a)
          : null;
     
      for(PhrasePosition pos : PhrasePosition.values()) {
        Element str = out.createElement("string");
        str.setAttribute("number", Integer.toString(id));
        str.setAttribute("position", pos.name());
        str.setTextContent(Alignments.untokenize(a.get(pos)));
       
        dlfsElement.appendChild(str);
      }
     
      Element msEl = out.createElement("mappings");
      msEl.appendChild(out.createCDATASection(ms.toString()));
      dlfsElement.appendChild(msEl);
     
      if(diff == null) {
        dlfsElement.appendChild(out.createComment("missing LF!"));
      }
      else {
        if(disj == null) {
          disj = new Disjunctivizer(out);
        }
       
        Element dlf = disj.buildDisjunctiveLFFor(diff);
        dlfsElement.appendChild(dlf);
       
        assertEquals(dlf, disj.buildDisjunctiveLFFor(diff));
       
        dlfsElement.appendChild(disj.buildDisjunctiveLFFor(diff.reverse()));
       
        assertNotSame(dlf, disj.buildDisjunctiveLFFor(diff.reverse()));
      }
    }
   
    s.asDOMSerializer().serialize(out);
  }

}
TOP

Related Classes of opennlp.ccg.disjunctivizer.DisjunctivizerTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.