instances.addThruPipe(new DataBagSourceIterator(documents));
// Create a model with numTopics, alpha_t = 0.01, beta_w = 0.01
// Note that the first parameter is passed as the sum over topics, while
// the second is the parameter for a single dimension of the Dirichlet prior.
ParallelTopicModel model = new ParallelTopicModel(numTopics, 1.0, 0.01);
model.addInstances(instances);
model.setNumThreads(1); // Important, since this is being run in the reduce, just use one thread
model.setTopicDisplay(0,0);
model.setNumIterations(2000);
model.estimate();
// Get the results
Alphabet dataAlphabet = instances.getDataAlphabet();
ArrayList<TopicAssignment> assignments = model.getData();
// Convert the results into comprehensible topics
for (int topicNum = 0; topicNum < model.getNumTopics(); topicNum++) {
TreeSet<IDSorter> sortedWords = model.getSortedWords().get(topicNum);
Iterator<IDSorter> iterator = sortedWords.iterator();
DataBag topic = BagFactory.getInstance().newDefaultBag();
// Get the set of keywords with weights for this topic and add them as tuples