logger.info("Constructing ARPA file");
ArpaFile arpaFile = new ArpaFile(args[0]);
logger.info("Getting symbol table");
SymbolTable vocab = arpaFile.getVocab();
logger.info("Constructing TrieLM");
TrieLM lm = new TrieLM(arpaFile);
int n = Integer.valueOf(args[2]);
logger.info("N-gram order will be " + n);
Scanner scanner = new Scanner(new File(args[1]));
LinkedList<String> wordList = new LinkedList<String>();
LinkedList<String> window = new LinkedList<String>();
logger.info("Starting to scan " + args[1]);
while (scanner.hasNext()) {
logger.info("Getting next line...");
String line = scanner.nextLine();
logger.info("Line: " + line);
String[] words = Regex.spaces.split(line);
wordList.clear();
wordList.add("<s>");
for (String word : words) {
wordList.add(word);
}
wordList.add("</s>");
ArrayList<Integer> sentence = new ArrayList<Integer>();
// int[] ids = new int[wordList.size()];
for (int i=0, size=wordList.size(); i<size; i++) {
sentence.add(vocab.getID(wordList.get(i)));
// ids[i] = ;
}
while (! wordList.isEmpty()) {
window.clear();
{
int i=0;
for (String word : wordList) {
if (i>=n) break;
window.add(word);
i++;
}
wordList.remove();
}
{
int i=0;
int[] wordIDs = new int[window.size()];
for (String word : window) {
wordIDs[i] = vocab.getID(word);
i++;
}
logger.info("logProb " + window.toString() + " = " + lm.ngramLogProbability(wordIDs, n));
}