Package joshua.decoder.ff.lm.buildin_lm

Examples of joshua.decoder.ff.lm.buildin_lm.TrieLM


 
  @Test(dependsOnMethods={"setup","testIteration"})
  public void testChildren() throws FileNotFoundException {
    ArpaFile arpaFile = new ArpaFile(arpaFileName, vocab);
   
    TrieLM lm = new TrieLM(arpaFile);
//    System.err.println(lm.getChildren().size());
    Assert.assertNotSame(lm.getChildren().size(), 0);
  }
View Full Code Here


 
  @Test(dependsOnMethods={"setup","testIteration","testChildren"})
  public void testTrie() throws FileNotFoundException {
    ArpaFile arpaFile = new ArpaFile(arpaFileName, vocab);
   
    TrieLM lm = new TrieLM(arpaFile);

    // Test unigrams known to be in the language model
    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("a")),-1.992672, 0.000001f);
    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("because")),-2.713723, 0.000001f);
    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("boycott")),-4.678545, 0.000001f);
    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("of")),-1.609573, 0.000001f);
    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("parliament")),-3.875917, 0.000001f);
    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("potato")),-9.753210, 0.000001f);
    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("resumption")),-4.678545, 0.000001f);
    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("the")),-1.712444, 0.000001f);
   
    // Test unigrams known to NOT be in the language model
    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("banana")), -JoshuaConfiguration.lm_ceiling_cost, 0.000001f);
   
   
    // Test bigrams known to be in the language model
    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("because of")), -0.3552987, 0.000001f);
    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("of the")), -0.7507797, 0.000001f);
    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("resumption of")), -0.7266324, 0.000001f);
    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("the resumption")), -3.936147, 0.000001f);
   
    // Test trigrams known to be in the language model
    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("because of the")), -0.6309999f, 0.000001f);
 
 
    // Test bigrams know to NOT be in the language model (but the unigrams are)
    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("a boycott")), -4.678545f + -0.1195484f, 0.000001f);
    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("of parliament")), -3.875917f + -0.1991907f, 0.000001f);
    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("the potato")), -9.753210f + -0.1606644f, 0.000001f);
    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("potato parliament")), -3.875917f + -0.0f, 0.000001f);
   
    // Test trigrams know to NOT be in the language model (but the bigrams are)
    int[] words = vocab.getIDs("because of a");
    double f = lm.ngramLogProbability(words);
    Assert.assertEquals(f, -1.403534f + -0.03083654f, 0.000001f);
//    //Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("of the parliament")), -3.875917f + -0.05237135f, 0.000001f);
   
  }
View Full Code Here

    } else if (JoshuaConfiguration.use_trie_lm) {
      if (JoshuaConfiguration.use_left_equivalent_state
          || JoshuaConfiguration.use_right_equivalent_state) {
            throw new IllegalArgumentException("using Trie LM, we cannot use suffix/prefix stuff");
          }
          this.languageModel = new TrieLM(
              this.symbolTable,
              JoshuaConfiguration.lm_file);
        } else {
     
//      logger.info("Reading language model from " + JoshuaConfiguration.lm_file + " into internal trie");
View Full Code Here

TOP

Related Classes of joshua.decoder.ff.lm.buildin_lm.TrieLM

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.