Package joshua.corpus.suffix_array

Source Code of joshua.corpus.suffix_array.SuffixArrayTest

/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/

package joshua.corpus.suffix_array;

import java.io.IOException;

import joshua.corpus.CorpusArray;
import joshua.corpus.MatchedHierarchicalPhrases;
import joshua.corpus.Phrase;
import joshua.corpus.suffix_array.BasicPhrase;
import joshua.corpus.suffix_array.SuffixArray;
import joshua.corpus.suffix_array.Suffixes;
import joshua.corpus.suffix_array.mm.MemoryMappedSuffixArray;
import joshua.corpus.vocab.SymbolTable;
import joshua.corpus.vocab.Vocabulary;


import org.testng.Assert;
import org.testng.annotations.Parameters;
import org.testng.annotations.Test;

/**
* Unit tests for suffix array.
*
* @author Lane Schwartz
*/
public class SuffixArrayTest {

  private final Suffixes suffixArray;
  private final Vocabulary vocab;
 
  @Parameters({"binaryFileName"})
  public SuffixArrayTest(String binaryFileName) throws IOException, ClassNotFoundException {
   
    // Adam Lopez's example...
    String corpusString = "it makes him and it mars him , it sets him on and it takes him off .";

    vocab = new Vocabulary();
    Phrase exampleSentence = new BasicPhrase(corpusString, vocab);
   
    exampleSentence = new BasicPhrase(corpusString, vocab);
    int[] sentences = new int[1];
    sentences[0] = 0;
    int[] corpus = new int[exampleSentence.size()];
    for(int i = 0; i < exampleSentence.size(); i++) {
      corpus[i] = exampleSentence.getWordID(i);
    }
   
    CorpusArray corpusArray = new CorpusArray(corpus, sentences, vocab);
   
    if (binaryFileName==null || binaryFileName.trim().length()==0)
      suffixArray = new SuffixArray(corpusArray);
    else
      suffixArray = new MemoryMappedSuffixArray(binaryFileName, corpusArray, MemoryMappedSuffixArray.DEFAULT_CACHE_CAPACITY);
   
  }
 
 
  @Test
  public void findTriviallyHieroPhrase() {
    Assert.assertNotNull(vocab);
    Assert.assertNotNull(suffixArray);
   
    Pattern pattern = new Pattern(vocab, vocab.getID("it"), vocab.getID("makes"), vocab.getID("him"));
    Assert.assertEquals(pattern.arity(), 0);
    Assert.assertEquals(pattern.size(), 3);
   
    int minNonterminalSpan = 2;
    int maxPhraseSpan = 5;
   
    MatchedHierarchicalPhrases matches =
      suffixArray.createHierarchicalPhrases(pattern, minNonterminalSpan, maxPhraseSpan);
   
    Assert.assertNotNull(matches);
    Assert.assertEquals(matches.getPattern(), pattern);
    Assert.assertEquals(matches.arity(), 0);
    Assert.assertEquals(matches.size(), 1);
  }
 
  @Test(dependsOnMethods={"findTriviallyHieroPhrase"})
  public void findHieroPhrase() {
   
    Assert.assertNotNull(vocab);
    Assert.assertNotNull(suffixArray);
   
    {
      Pattern pattern = new Pattern(vocab, vocab.getID("it"), vocab.getID(SymbolTable.X_STRING));
      Assert.assertEquals(pattern.arity(), 1);
      Assert.assertEquals(pattern.size(), 2);
     
      int minNonterminalSpan = 2;
      int maxPhraseSpan = 5;

      MatchedHierarchicalPhrases matches =
        suffixArray.createHierarchicalPhrases(pattern, minNonterminalSpan, maxPhraseSpan);

      Assert.assertNotNull(matches);
      Assert.assertEquals(matches.getPattern(), pattern);
      Assert.assertEquals(matches.arity(), 1);
      Assert.assertEquals(matches.size(), 4);
    }
   
    {
      Pattern pattern = new Pattern(vocab, vocab.getID("it"), vocab.getID(SymbolTable.X_STRING), vocab.getID("and"));
      Assert.assertEquals(pattern.arity(), 1);
      Assert.assertEquals(pattern.size(), 3);

      int minNonterminalSpan = 2;
      int maxPhraseSpan = 5;

      MatchedHierarchicalPhrases matches =
        suffixArray.createHierarchicalPhrases(pattern, minNonterminalSpan, maxPhraseSpan);

      Assert.assertNotNull(matches);
      Assert.assertEquals(matches.getPattern(), pattern);
      Assert.assertEquals(matches.arity(), 1);
      Assert.assertEquals(matches.size(), 2);
    }
  }
 
 
  @Test
  public void findPhrase() {
   
    // Look up phrase "it makes him"
   
    Phrase phrase = new BasicPhrase("it makes him", vocab);
    int[] bounds = suffixArray.findPhrase(phrase);
   
    int expectedSuffixArrayStartIndex = 0;
    int expectedSuffixArrayEndIndex = 0;
   
    Assert.assertEquals(bounds.length, 2);
    Assert.assertEquals(bounds[0], expectedSuffixArrayStartIndex);
    Assert.assertEquals(bounds[1], expectedSuffixArrayEndIndex);
   
   
    // Look up phrase "and it"
   
    phrase = new BasicPhrase("and it", vocab);
    bounds = suffixArray.findPhrase(phrase);
   
    expectedSuffixArrayStartIndex = 9;
    expectedSuffixArrayEndIndex = 10;
   
    Assert.assertEquals(bounds.length, 2);
    Assert.assertEquals(bounds[0], expectedSuffixArrayStartIndex);
    Assert.assertEquals(bounds[1], expectedSuffixArrayEndIndex);
  }

}
TOP

Related Classes of joshua.corpus.suffix_array.SuffixArrayTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.