} else {
out = new PrintStream(outputFile,"UTF-8");
logger.info("Rules will be written to " + outputFile);
}
ParallelCorpusGrammarFactory parallelCorpus = this.getGrammarFactory();
logger.info("Getting symbol table");
SymbolTable sourceVocab = parallelCorpus.getSourceCorpus().getVocabulary();
int lineNumber = 0;
boolean oneTreePerSentence = ! this.keepTree;
logger.info("Will read test sentences from " + testFileName);
Scanner testFileScanner = new Scanner(new File(testFileName), encoding);
logger.info("Read test sentences from " + testFileName);
PrefixTree prefixTree = null;
while (testFileScanner.hasNextLine() && (lineNumber-startingSentence+1)<maxTestSentences) {
String line = testFileScanner.nextLine();
lineNumber++;
if (lineNumber < startingSentence) continue;
int[] words = sourceVocab.getIDs(line);
if (oneTreePerSentence || null==prefixTree)
{
// prefixTree = new PrefixTree(sourceSuffixArray, targetCorpusArray, alignments, sourceSuffixArray.getVocabulary(), lexProbs, ruleExtractor, maxPhraseSpan, maxPhraseLength, maxNonterminals, minNonterminalSpan);
if (logger.isLoggable(Level.INFO)) logger.info("Constructing new prefix tree");
Node.resetNodeCounter();
prefixTree = new PrefixTree(parallelCorpus);
prefixTree.setPrintStream(out);
prefixTree.sentenceInitialX = this.sentenceInitialX;
prefixTree.sentenceFinalX = this.sentenceFinalX;
prefixTree.edgeXMayViolatePhraseSpan = this.edgeXViolates;
}
try {
if (logger.isLoggable(Level.INFO)) logger.info("Processing source line " + lineNumber + ": " + line);
prefixTree.add(words);
} catch (OutOfMemoryError e) {
logger.warning("Out of memory - attempting to clear cache to free space");
parallelCorpus.getSuffixArray().getCachedHierarchicalPhrases().clear();
// targetSuffixArray.getCachedHierarchicalPhrases().clear();
prefixTree = null;
System.gc();
logger.info("Cleared cache and collected garbage. Now attempting to re-construct prefix tree...");
// prefixTree = new PrefixTree(sourceSuffixArray, targetCorpusArray, alignments, sourceSuffixArray.getVocabulary(), lexProbs, ruleExtractor, maxPhraseSpan, maxPhraseLength, maxNonterminals, minNonterminalSpan);