//////////////////////////////////
// Source language corpus array //
//////////////////////////////////
final Corpus sourceCorpusArray;
if (binaryCorpus) {
if (logger.isLoggable(Level.INFO)) logger.info("Constructing memory mapped source language corpus array.");
sourceCorpusArray = new MemoryMappedCorpusArray(commonVocab, sourceFileName);
} else {
if (logger.isLoggable(Level.INFO)) logger.info("Constructing source language corpus array.");
sourceCorpusArray = SuffixArrayFactory.createCorpusArray(sourceFileName, commonVocab, numSourceWords, numSourceSentences);
}
//////////////////////////////////
// Source language suffix array //
//////////////////////////////////
Suffixes sourceSuffixArray;
String binarySourceSuffixArrayFileName = sourceSuffixesFileName;
if (binaryCorpus) {
if (logger.isLoggable(Level.INFO)) logger.info("Constructing source language suffix array from binary file " + binarySourceSuffixArrayFileName);
sourceSuffixArray = new MemoryMappedSuffixArray(binarySourceSuffixArrayFileName, sourceCorpusArray, cacheSize);
} else {
if (logger.isLoggable(Level.INFO)) logger.info("Constructing source language suffix array from source corpus.");
sourceSuffixArray = SuffixArrayFactory.createSuffixArray(sourceCorpusArray, cacheSize);
}
//////////////////////////////////
// Target language corpus array //
//////////////////////////////////
final Corpus targetCorpusArray;
if (binaryCorpus) {
if (logger.isLoggable(Level.INFO)) logger.info("Constructing memory mapped target language corpus array.");
targetCorpusArray = new MemoryMappedCorpusArray(commonVocab, targetFileName);
} else {
if (logger.isLoggable(Level.INFO)) logger.info("Constructing target language corpus array.");
targetCorpusArray = SuffixArrayFactory.createCorpusArray(targetFileName, commonVocab, numTargetWords, numTargetSentences);
}
//////////////////////////////////
// Target language suffix array //
//////////////////////////////////
Suffixes targetSuffixArray;
String binaryTargetSuffixArrayFileName = targetSuffixesFileName;
if (binaryCorpus) {
if (logger.isLoggable(Level.INFO)) logger.info("Constructing target language suffix array from binary file " + binaryTargetSuffixArrayFileName);
targetSuffixArray = new MemoryMappedSuffixArray(binaryTargetSuffixArrayFileName, targetCorpusArray, cacheSize);
} else {
if (logger.isLoggable(Level.INFO)) logger.info("Constructing target language suffix array from target corpus.");
targetSuffixArray = SuffixArrayFactory.createSuffixArray(targetCorpusArray, cacheSize);
}
int trainingSize = sourceCorpusArray.getNumSentences();
if (trainingSize != targetCorpusArray.getNumSentences()) {
throw new RuntimeException("Source and target corpora have different number of sentences. This is bad.");
}
/////////////////////