// Process the command-line options
CommandOption.setSummary (TopicTrain.class,
"A tool for training and test streamline topic model.");
CommandOption.process (TopicTrain.class, args);
LDAStream lda = null;
if (inputFile.value != null) {
InstanceList instances = InstanceList.load (new File(inputFile.value));
System.out.println ("Training Data loaded.");
lda=new LDAStream(numTopics.value, alpha.value, beta.value);
lda.addInstances(instances);
}
if(testFile.value != null) {
InstanceList testing = InstanceList.load(new File(testFile.value));
lda.setTestingInstances(testing);
}
lda.setTopicDisplay(showTopicsInterval.value, topWords.value);
if (outputModelInterval.value != 0) {
lda.setModelOutput(outputModelInterval.value, outputModelFilename.value);
}
lda.setNumIterations(numIterations.value);
if (randomSeed.value != 0) {
lda.setRandomSeed(randomSeed.value);
}
if (outputStateInterval.value != 0) {
lda.setSaveState(outputStateInterval.value, stateFile.value);
}
lda.estimate();
//save the model, we need typeTopicCounts and tokensPerTopic for empirical likelihood
lda.write(new File (inputFile.value + ".model"));
if (topicKeysFile.value != null) {
lda.printTopWords(new File(topicKeysFile.value), topWords.value, false);
}
if (topicKeysFile.value != null) {
lda.printTopWords(new File(topicKeysFile.value), topWords.value, false);
}
if (topicTypesFile.value != null) {
lda.printPhi(new File(topicTypesFile.value), 1e-4);
}
if (stateFile.value != null) {
lda.printState (lda.getData(), new File(stateFile.value));
}
if (docTopicsFile.value != null) {
lda.printDocumentTopics(lda.getData(), new PrintWriter (new FileWriter ((new File(docTopicsFile.value)))),
docTopicsThreshold.value, docTopicsMax.value);
}