Package com.jpetrak.gate.stringannotation.extendedgazetteer2.tests

Source Code of com.jpetrak.gate.stringannotation.extendedgazetteer2.tests.Tests1

package com.jpetrak.gate.stringannotation.extendedgazetteer2.tests;

import static org.junit.Assert.*;
import gate.Annotation;
import gate.AnnotationSet;
import gate.Document;
import gate.Factory;
import gate.FeatureMap;
import gate.Gate;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.util.AnnotationDiffer;
import gate.util.GateException;

import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashSet;
import java.util.Iterator;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.WildcardFileFilter;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;

import com.jpetrak.gate.stringannotation.extendedgazetteer2.ExtendedGazetteer2;
import com.jpetrak.gate.stringannotation.extendedgazetteer2.FeatureGazetteer;
import com.jpetrak.gate.stringannotation.extendedgazetteer2.GazStore;
import com.jpetrak.gate.stringannotation.extendedgazetteer2.Lookup;
import com.jpetrak.gate.stringannotation.extendedgazetteer2.State;
import com.jpetrak.gate.stringannotation.extendedgazetteer2.trie2.GazStoreTrie2;
import com.jpetrak.gate.stringannotation.extendedgazetteer2.trie3.GazStoreTrie3;
import com.jpetrak.gate.stringannotation.extendedgazetteer2.trie3.StoreArrayOfCharArrays;
import com.jpetrak.gate.stringannotation.extendedgazetteer2.trie3.StoreCharMapPhase1;
import com.jpetrak.gate.stringannotation.extendedgazetteer2.trie3.StoreStates;

public class Tests1 {

  private static boolean isInitialized = false;
  private static File testingDir;
  @BeforeClass
  public static void init() throws GateException, MalformedURLException {
    if(!isInitialized) {
      System.out.println("Tests1: Inititalizing ...");
      isInitialized = true;
      Gate.runInSandbox(true);
      Gate.init();
      File pluginHome = new File(".");
      System.out.println("Plugin home directory is "+pluginHome.getAbsolutePath());
      Gate.getCreoleRegister().registerDirectories(
              pluginHome.toURI().toURL());
      testingDir = new File(pluginHome,"tests");
      assertTrue(testingDir.exists());
      FileFilter fileFilter = new WildcardFileFilter("*.gazbin");
      File[] files = testingDir.listFiles(fileFilter);
      for(File file : files) {
        file.delete();
      }
    } else {
      System.out.println("Already initialized ...");
    }
  }
 
  @AfterClass
  public static void cleanup() throws Exception {
    System.out.println("Tests1: Cleaning up ...");
  }
 

  @Test
  public void testStore()  {
    StoreArrayOfCharArrays as = new StoreArrayOfCharArrays();
   
    int i;
    char[] r;
    i = as.addData("asdfjk".toCharArray());
    assertEquals(0,i);
    i = as.addData("qwertyqwerty".toCharArray());
    assertEquals(8,i);
    r = as.getData(0);
    assertEquals("asdfjk",new String(r));
    r = as.getData(8);
    assertEquals("qwertyqwerty",new String(r));
   
    // test the list methods
    i = as.addListData("l1:4567890".toCharArray());
    assertEquals(22,i);
    int size = as.getListSize(i);
    assertEquals(1,size);
    r = as.getListData(i, 0);
    assertEquals("l1:4567890",new String(r));
   
    i = as.addListData(i,"l2:4567890".toCharArray());
    assertEquals(22,i);
    size = as.getListSize(i);
    assertEquals(2,size);
    r = as.getListData(i,1);
    assertEquals("l2:4567890",new String(r));
       
    // check if the first element (element 0) is still the same!
    r = as.getListData(i,0);
    assertEquals("l1:4567890",new String(r));
   
    i = as.addListData(i,"l3:4567890".toCharArray());
    assertEquals(22,i);
    size = as.getListSize(i);
    assertEquals(3,size);
    r = as.getListData(i,2);
    assertEquals("l3:4567890",new String(r));
   
    i = as.addListData("another".toCharArray());
    size = as.getListSize(i);
    assertEquals(1,size);
    r = as.getListData(i,0);
    assertEquals("another",new String(r));

    char[] f1 = "fixed data 01".toCharArray();
    int i1 = as.addFixedLengthData(f1);
    int i2 = as.addData("last".toCharArray());
    r = as.getFixedLengthData(i1, f1.length);
    assertNotNull(r);
    assertEquals(r.length,f1.length);
    assertEquals(new String(f1),new String(r));
    r = as.getData(i2);
    assertNotNull(r);
    assertEquals("last",new String(r));
   
    i = as.addListData("element1".toCharArray());
    as.addListData(i,"element2".toCharArray());
    int j = as.findListData(i, "element1".toCharArray());
    assertEquals(0,j);
    j = as.findListData(i, "element2".toCharArray());
    assertEquals(1,j);
  }
 
 
  @Test
  public void testLookupTrie2() {
    GazStoreTrie2 gs = new GazStoreTrie2();
    gs.runImplementationTests();
  }
  @Test
  public void testLookupTrie3() {
    GazStoreTrie3 gs = new GazStoreTrie3();
    gs.runImplementationTests();
  }
 
  @Test
  public void testTrie2() {
    GazStore gs = new GazStoreTrie2();
    FeatureMap fm = Factory.newFeatureMap();
    fm.put("listFeature1","value1");
    fm.put("listFeature2","value2");
    int info1 = gs.addListInfo("Type1", "URL1", fm);
    String[] keyvals1 = new String[]{"key1","val1","key2","","k3","valuenumberthree"};
    /*
    State st1 = gs.addLookupOld("asdf", info1, keyvals1);
    Iterator<Lookup> it1 = gs.getLookups(st1);
    assertEquals(true,it1.hasNext());
    Lookup l1 = it1.next();
    */
  }
  @Test
  public void testTrie3() {
    GazStoreTrie3 gs = new GazStoreTrie3();
    FeatureMap fm = Factory.newFeatureMap();
    fm.put("listFeature1","value1");
    fm.put("listFeature2","value2");
    int info1 = gs.addListInfo("Type1", "URL1", fm);
    String[] keyvals1 = new String[]{"key1","val1","key2","","k3","valuenumberthree"};
   
    gs.addLookup("asdf", info1, keyvals1);
    gs.addLookup("asdf", info1, keyvals1);
    gs.addLookup("asdf", info1, keyvals1);
    // TODO: check we have only added the lookup once!
    Iterator<Lookup> it = gs.match("asdf");
    assertNotNull(it);
    int nrLookups = 0;
    while(it.hasNext()) {
      it.next();
      nrLookups++;
    }
    assertEquals(1,nrLookups);
  }
 
  // TODO: the tests for BE1 and BE2 cannot be the same as for BE3 any more because
  // BE3 does now check for duplicate identical lines within a list and BE1/2 do not.
  // Since the test gazetteer contains duplicate entries, only BE3 gives the correct numbers!
  /*
  @Test
  public void testGazetteerApplication1BE1()
      throws MalformedURLException, ResourceInstantiationException, ExecutionException {
    do_testGazetteerApplication1(1);
  }
  @Test
  public void testGazetteerApplication1BE2()
      throws MalformedURLException, ResourceInstantiationException, ExecutionException {
    do_testGazetteerApplication1(2);
  }
  */
  @Test
  public void testGazetteerApplication1BE3()
      throws MalformedURLException, ResourceInstantiationException, ExecutionException {
    do_testGazetteerApplication1(3);
  }
  public void do_testGazetteerApplication1(int backendNr) throws MalformedURLException, ResourceInstantiationException, ExecutionException {
    System.out.println("Running gazetteer application test 1 for backend "+backendNr);
    FeatureMap parms = Factory.newFeatureMap();
    File defFile = new File(testingDir,"extgaz2.def");
    URL gazURL = defFile.toURI().toURL();
    parms.put("configFileURL", gazURL);
    parms.put("backendNr",backendNr);
    ExtendedGazetteer2 eg = (ExtendedGazetteer2)Factory.createResource(
            "com.jpetrak.gate.stringannotation.extendedgazetteer2.ExtendedGazetteer2", parms);
    // load the document
    parms = Factory.newFeatureMap();
    File docFile = new File(testingDir,"extgaz2docprep.xml");
    parms.put("sourceUrl",docFile.toURI().toURL());
    Document doc = (Document)
         Factory.createResource("gate.corpora.DocumentImpl", parms);
    AnnotationSet lookups = doc.getAnnotations().get("OutType");
    assertEquals(0,lookups.size());
    // run the gazetteer on the document
    eg.setDocument(doc);
    eg.execute();
    // check if we got the correct annotations
    AnnotationSet tokens = doc.getAnnotations().get("Token");
    assertEquals(46,tokens.size());
    AnnotationSet sentences = doc.getAnnotations().get("Sentence");
    assertEquals(4,sentences.size());
    lookups = doc.getAnnotations().get("OutType");
    if(backendNr == 3) {
      assertEquals(12,lookups.size());
    } else {
      assertEquals(14,lookups.size());
    }
    int i = 1;
    FeatureMap fm;
    long from;
    long to;
    for(Annotation ann : gate.Utils.inDocumentOrder(lookups)) {
      //System.out.println("Annotation: "+ann);
      fm = ann.getFeatures();
      String inst = (String)fm.get("inst");
      String string = (String)fm.get("_string");
      from = ann.getStartNode().getOffset();
      to = ann.getEndNode().getOffset();
      if(i == 1) {
        assertEquals(8,from);
        assertEquals(12,to);
        assertEquals("i1",inst);
        assertEquals("some",string);
      } else if(i == 5) {
        assertEquals(26,from);
        assertEquals(34,to);
        assertEquals("i11",inst);
        assertEquals("word and",string);
      }
      i++;
    }
    doc.getAnnotations().removeAll(lookups);
    eg.setMatchAtWordStartOnly(false);
    eg.setMatchAtWordEndOnly(false);
    eg.execute();
    lookups = doc.getAnnotations().get("OutType");
    assertEquals(22,lookups.size());
    doc.getAnnotations().removeAll(lookups);
    eg.setLongestMatchOnly(false);
    eg.execute();
    lookups = doc.getAnnotations().get("OutType");
    assertEquals(26,lookups.size());
    System.out.println("Gazetteer application test 1 finished for backedn "+backendNr);
  }
 
  @Test
  public void testGazetteerApplication2BE1()
      throws ResourceInstantiationException, ExecutionException, IOException {
    do_testGazetteerApplication2(1);
  }
  @Test
  public void testGazetteerApplication2BE2()
      throws ResourceInstantiationException, ExecutionException, IOException {
    do_testGazetteerApplication2(2);
  }
  public void testGazetteerApplication2BE3()
      throws ResourceInstantiationException, ExecutionException, IOException {
    do_testGazetteerApplication2(3);
  }
 
  public void do_testGazetteerApplication2(int backendNr)
      throws ResourceInstantiationException, ExecutionException, IOException {
    System.out.println("Running gazetteer application test 2 for news1pre for backend "+backendNr);
    FeatureMap parms = Factory.newFeatureMap();
    File defFile = new File(testingDir,"annie/lists.def");
    URL gazURL = defFile.toURI().toURL();
    parms.put("configFileURL", gazURL);
    parms.put("backendNr",backendNr);
    ExtendedGazetteer2 eg = (ExtendedGazetteer2)Factory.createResource(
            "com.jpetrak.gate.stringannotation.extendedgazetteer2.ExtendedGazetteer2", parms);
    // load the document
    eg.setOutputAnnotationSet("EXT");
    parms = Factory.newFeatureMap();
    File docFile = new File(testingDir,"news1pre.xml");
    parms.put("sourceUrl",docFile.toURI().toURL());
    Document doc = (Document)
         Factory.createResource("gate.corpora.DocumentImpl", parms);
    AnnotationSet lookups = doc.getAnnotations("EXT").get("Lookup");
    assertEquals(0,lookups.size());
    // run the gazetteer on the document
    eg.setDocument(doc);
    eg.execute();
    AnnotationDiffer differ = new AnnotationDiffer();
    differ.setSignificantFeaturesSet(new HashSet<String>());
    AnnotationSet keys = doc.getAnnotations().get("Lookup");
    System.out.println("Lookups old: "+keys.size());
    AnnotationSet responses = doc.getAnnotations("EXT").get("Lookup");
    System.out.println("Lookups new: "+responses.size());
    differ.calculateDiff(keys, responses);
    int correct = differ.getCorrectMatches();
    int falsePositives = differ.getFalsePositivesStrict();
    int missing = differ.getMissing();
    System.out.println("Diff: correct="+correct+" false positives="+falsePositives+" missing="+missing);
    File outFile = new File(testingDir,"news1pre_procBE"+backendNr+".xml");
    FileUtils.writeStringToFile(outFile, doc.toXml(),"UTF-8");
    // the 33 false Positives come from duplicates that are introduced from several list files
    // and which are not removed by the ExtGaz
    // the 2 missing come from matches within hyphenated words (after a hyphen) which are
    // not found by ExtGaz because they occur inside a single Token (hyphens do not split up tokens)
    assertEquals(194,correct);
    // The backend 2 does not do any duplicate tests at all, therefore the duplicate entry "N. Ireland" in
    // provinces.lst shows up twice for BE2, but only once for BE1
    if(backendNr==1) {
      assertEquals(32,falsePositives);
    } else {
      assertEquals(33,falsePositives);
    }
    assertEquals(2,missing);
    System.out.println("Gazetteer application test 2 finished for backend nr: "+backendNr);
  }

  @Test
  public void testFeatureGazetteer1() throws MalformedURLException, ResourceInstantiationException, ExecutionException {
    int backendNr = 3;
    System.out.println("Running FEATURE GAZETTEER application test for backend "+backendNr);
    FeatureMap parms = Factory.newFeatureMap();
    File defFile = new File(testingDir,"extgaz2.def");
    URL gazURL = defFile.toURI().toURL();
    parms.put("configFileURL", gazURL);
    parms.put("backendNr",backendNr);
    FeatureGazetteer eg = (FeatureGazetteer)Factory.createResource(
            "com.jpetrak.gate.stringannotation.extendedgazetteer2.FeatureGazetteer", parms);
    // test matching directly
    Iterator<Lookup> ret = null;
    ret = eg.doMatch("some", true,true);
    System.out.println("Matching same: "+ret);
    // check that it works with a gazetteer list that does not have any features
    defFile = new File(testingDir,"extgaz3.def");
    gazURL = defFile.toURI().toURL();
    parms.put("configFileURL", gazURL);
    eg = (FeatureGazetteer)Factory.createResource(
        "com.jpetrak.gate.stringannotation.extendedgazetteer2.FeatureGazetteer", parms);
    ret = eg.doMatch("word", true,true);
    System.out.println("Matching extgaz3 'word': "+eg.lookups2FeatureMaps(ret));
    ret = eg.doMatch("word", false,false);
    System.out.println("Matching extgaz3 'word': "+eg.lookups2FeatureMaps(ret));
    ret = eg.doMatch("word", true,true);
    System.out.println("Matching extgaz3 'word': "+eg.lookups2FeatureMaps(ret));
    ret = eg.doMatch("notthere", true,true);
    System.out.println("Matching extgaz3 'notthere': "+eg.lookups2FeatureMaps(ret));
    ret = eg.doMatch("test", true,true);
    System.out.println("Matching extgaz3 'test': "+eg.lookups2FeatureMaps(ret));
    ret = eg.doMatch("text", true,true);
    System.out.println("Matching extgaz3 'text': "+eg.lookups2FeatureMaps(ret));
    ret = eg.doMatch("thewordyes", false,false);
    System.out.println("Matching extgaz3 'thewordyes': "+eg.lookups2FeatureMaps(ret));
   
    System.out.println("Feature Gazetteer application test finished");
  }
 

  @Test
  public void testStoreCharMapPhase1() {
    StoreCharMapPhase1 store = new StoreCharMapPhase1(new StoreArrayOfCharArrays());
    System.out.println("Adding first:1");
    int i = store.put(-1, 'a', 1);
    assertEquals(0,i);
    System.out.println("Adding first:2");
    i = store.put(0,'b', 2);
    assertEquals(0,i);
    System.out.println("Adding second");
    i = store.put(-1,'a',3);
    assertEquals(1,i);
  }

  @Test
  public void testStoreStates1() {
    StoreArrayOfCharArrays backing = new StoreArrayOfCharArrays();
    StoreStates store = new StoreStates(backing);
    store.test();
    // creating a StoreStates object also creates the initial state, so the first
    // index must be 5
    int s1 = store.newCharMapState();
    assertEquals(5,s1);
    // also the initial state must have index 0
    assertEquals(0,store.initialState);
    int s2 = store.newCharMapState();
    assertEquals(10,s2);
    int s3 = store.newSingleCharState();
    assertEquals(15,s3);
    assertEquals(true,store.getIsCharMapState(s1));
    assertEquals(true,store.getIsCharMapState(s2));
    assertEquals(false,store.getIsCharMapState(s3));
    store.setLookupIndex(s1, 123);
    store.setLookupIndex(s2, 234);
    store.setLookupIndex(s3, 345);
    assertEquals(true,store.getIsCharMapState(s1));
    assertEquals(true,store.getIsCharMapState(s2));
    assertEquals(false,store.getIsCharMapState(s3));
    assertEquals(123,store.getLookupIndex(s1));
    assertEquals(234,store.getLookupIndex(s2));
    assertEquals(345,store.getLookupIndex(s3));
    assertEquals(-1,store.getCharMapIndex(s1));
    assertEquals(-1,store.getCharMapIndex(s2));
    assertEquals(0,store.getSingleCharStateChar(s3));
    store.put(store.initialState, 'a', s1);
    store.put(store.initialState, 'b', s2);
    store.put(store.initialState, 'c', s3);
    assertEquals(s1,store.next(store.initialState, 'a'));
    assertEquals(s2,store.next(store.initialState, 'b'));
    assertEquals(s3,store.next(store.initialState, 'c'));
  }

  @Test
  public void testGazStoreTrie3() throws IOException {
    System.out.println("GazStoreTrie3: *******************************");
    GazStoreTrie3 gs = new GazStoreTrie3();
    FeatureMap fm1 = Factory.newFeatureMap();
    fm1.put("feature1","value1");
    int info1 = gs.addListInfo("Type1", "TheFile", fm1);
    String[] kv1 = new String[2];
    kv1[0] = "key1of1";
    kv1[1] = "value1of1";
    gs.addLookup("entry", info1, kv1);
    String[] kv2 = new String[4];
    kv2[0] = "KEY1of2";
    kv2[1] = "VALUE1of2";
    kv2[2] = "KEY2of2";
    kv2[3] = "VALUE2of2";
    gs.addLookup("as", info1, kv2);
    State init = gs.getInitialState();
    System.out.println("Initial State: "+init);
    State s1 = init.next('a');
    System.out.println("State after a: "+s1);
    System.out.println("isFinal: "+s1.isFinal());
    State s2 = s1.next('s');
    System.out.println("State after s: "+s2);
    System.out.println("isFinal: "+s2.isFinal());
    Iterator<Lookup> lookupIter = gs.getLookups(s2);
    System.out.println("Have lookups: "+lookupIter.hasNext());
    while(lookupIter.hasNext()) {
      Lookup l = lookupIter.next();
      System.out.println("Have a lookup"+l);
    }
    File someFile = new File("tmp.gazbin");
    try {
      gs.save(someFile);
    } catch (FileNotFoundException e) {
      e.printStackTrace();
      assertTrue("could not save trie", false);
      return;
    }
    GazStoreTrie3 gs2 = new GazStoreTrie3();
    try {
      gs2 = (GazStoreTrie3)gs2.load(someFile);
    } catch (FileNotFoundException e) {
      e.printStackTrace();
      assertTrue("could not load trie",false);
      return;
    }
    State init_2 = gs2.getInitialState();
    System.out.println("Initial State: "+init_2);
    State s1_2 = init_2.next('a');
    System.out.println("State after a: "+s1_2);
    System.out.println("isFinal: "+s1_2.isFinal());
    State s2_2 = s1_2.next('s');
    System.out.println("State after s: "+s2_2);
    System.out.println("isFinal: "+s2_2.isFinal());
    Iterator<Lookup> lookupIter_2 = gs2.getLookups(s2_2);
    System.out.println("Have lookups: "+lookupIter_2.hasNext());
    while(lookupIter_2.hasNext()) {
      Lookup l = lookupIter_2.next();
      System.out.println("Have a lookup"+l);
    }
   
    System.out.println("************* end ********************");
 
 
}
TOP

Related Classes of com.jpetrak.gate.stringannotation.extendedgazetteer2.tests.Tests1

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.