/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package joshua.corpus.suffix_array;
import java.io.IOException;
import joshua.corpus.CorpusArray;
import joshua.corpus.MatchedHierarchicalPhrases;
import joshua.corpus.Phrase;
import joshua.corpus.suffix_array.BasicPhrase;
import joshua.corpus.suffix_array.SuffixArray;
import joshua.corpus.suffix_array.Suffixes;
import joshua.corpus.suffix_array.mm.MemoryMappedSuffixArray;
import joshua.corpus.vocab.SymbolTable;
import joshua.corpus.vocab.Vocabulary;
import org.testng.Assert;
import org.testng.annotations.Parameters;
import org.testng.annotations.Test;
/**
* Unit tests for suffix array.
*
* @author Lane Schwartz
*/
public class SuffixArrayTest {
private final Suffixes suffixArray;
private final Vocabulary vocab;
@Parameters({"binaryFileName"})
public SuffixArrayTest(String binaryFileName) throws IOException, ClassNotFoundException {
// Adam Lopez's example...
String corpusString = "it makes him and it mars him , it sets him on and it takes him off .";
vocab = new Vocabulary();
Phrase exampleSentence = new BasicPhrase(corpusString, vocab);
exampleSentence = new BasicPhrase(corpusString, vocab);
int[] sentences = new int[1];
sentences[0] = 0;
int[] corpus = new int[exampleSentence.size()];
for(int i = 0; i < exampleSentence.size(); i++) {
corpus[i] = exampleSentence.getWordID(i);
}
CorpusArray corpusArray = new CorpusArray(corpus, sentences, vocab);
if (binaryFileName==null || binaryFileName.trim().length()==0)
suffixArray = new SuffixArray(corpusArray);
else
suffixArray = new MemoryMappedSuffixArray(binaryFileName, corpusArray, MemoryMappedSuffixArray.DEFAULT_CACHE_CAPACITY);
}
@Test
public void findTriviallyHieroPhrase() {
Assert.assertNotNull(vocab);
Assert.assertNotNull(suffixArray);
Pattern pattern = new Pattern(vocab, vocab.getID("it"), vocab.getID("makes"), vocab.getID("him"));
Assert.assertEquals(pattern.arity(), 0);
Assert.assertEquals(pattern.size(), 3);
int minNonterminalSpan = 2;
int maxPhraseSpan = 5;
MatchedHierarchicalPhrases matches =
suffixArray.createHierarchicalPhrases(pattern, minNonterminalSpan, maxPhraseSpan);
Assert.assertNotNull(matches);
Assert.assertEquals(matches.getPattern(), pattern);
Assert.assertEquals(matches.arity(), 0);
Assert.assertEquals(matches.size(), 1);
}
@Test(dependsOnMethods={"findTriviallyHieroPhrase"})
public void findHieroPhrase() {
Assert.assertNotNull(vocab);
Assert.assertNotNull(suffixArray);
{
Pattern pattern = new Pattern(vocab, vocab.getID("it"), vocab.getID(SymbolTable.X_STRING));
Assert.assertEquals(pattern.arity(), 1);
Assert.assertEquals(pattern.size(), 2);
int minNonterminalSpan = 2;
int maxPhraseSpan = 5;
MatchedHierarchicalPhrases matches =
suffixArray.createHierarchicalPhrases(pattern, minNonterminalSpan, maxPhraseSpan);
Assert.assertNotNull(matches);
Assert.assertEquals(matches.getPattern(), pattern);
Assert.assertEquals(matches.arity(), 1);
Assert.assertEquals(matches.size(), 4);
}
{
Pattern pattern = new Pattern(vocab, vocab.getID("it"), vocab.getID(SymbolTable.X_STRING), vocab.getID("and"));
Assert.assertEquals(pattern.arity(), 1);
Assert.assertEquals(pattern.size(), 3);
int minNonterminalSpan = 2;
int maxPhraseSpan = 5;
MatchedHierarchicalPhrases matches =
suffixArray.createHierarchicalPhrases(pattern, minNonterminalSpan, maxPhraseSpan);
Assert.assertNotNull(matches);
Assert.assertEquals(matches.getPattern(), pattern);
Assert.assertEquals(matches.arity(), 1);
Assert.assertEquals(matches.size(), 2);
}
}
@Test
public void findPhrase() {
// Look up phrase "it makes him"
Phrase phrase = new BasicPhrase("it makes him", vocab);
int[] bounds = suffixArray.findPhrase(phrase);
int expectedSuffixArrayStartIndex = 0;
int expectedSuffixArrayEndIndex = 0;
Assert.assertEquals(bounds.length, 2);
Assert.assertEquals(bounds[0], expectedSuffixArrayStartIndex);
Assert.assertEquals(bounds[1], expectedSuffixArrayEndIndex);
// Look up phrase "and it"
phrase = new BasicPhrase("and it", vocab);
bounds = suffixArray.findPhrase(phrase);
expectedSuffixArrayStartIndex = 9;
expectedSuffixArrayEndIndex = 10;
Assert.assertEquals(bounds.length, 2);
Assert.assertEquals(bounds[0], expectedSuffixArrayStartIndex);
Assert.assertEquals(bounds[1], expectedSuffixArrayEndIndex);
}
}