// use the System properties in case the user specified them as
// -Dprop=<val> to the JVM directly.
Properties props = setupProperties();
FixedDurationTemporalRandomIndexing fdTri =
new FixedDurationTemporalRandomIndexing(props);
// The user may also specify a limit to the words for which semantics
// are computed. If so, set up Random Indexing to not keep semantics
// for those words.
if (argOptions.hasOption("semanticFilter")) {
String fileName = argOptions.getStringOption("semanticFilter");
BufferedReader br = new BufferedReader(new FileReader(fileName));
Set<String> wordsToCompute = new HashSet<String>();
for (String line = null; (line = br.readLine()) != null; ) {
for (String s : line.split("\\s+")) {
wordsToCompute.add(s);
}
}
LOGGER.info("computing semantics for only " + wordsToCompute.size()
+ " words");
fdTri.setSemanticFilter(wordsToCompute);
}
// Load the word-to-IndexVector mappings if they were specified.
if (argOptions.hasOption("loadVectors")) {
String fileName = argOptions.getStringOption("loadVectors");
LOGGER.info("loading index vectors from " + fileName);
Map<String,TernaryVector> wordToIndexVector =
IndexVectorUtil.load(new File(fileName));
fdTri.setWordToIndexVector(wordToIndexVector);
}
String formatName = (argOptions.hasOption("outputFormat"))
? argOptions.getStringOption("outputFormat").toUpperCase()
: "TEXT";
format = SSpaceFormat.valueOf(formatName.toUpperCase());
parseDocumentsMultiThreaded(fdTri, docIter, timeSpan, numThreads);
long startTime = System.currentTimeMillis();
fdTri.processSpace(props);
long endTime = System.currentTimeMillis();
LOGGER.info(String.format("processed space in %.3f seconds%n",
((endTime - startTime) / 1000d)));
// save the word-to-IndexVector mapping if specified to do so
if (argOptions.hasOption("saveVectors")) {
String fileName = argOptions.getStringOption("saveVectors");
LOGGER.info("saving index vectors to " + fileName);
IndexVectorUtil.save(fdTri.getWordToIndexVector(),
new File(fileName));
}
}