public void inferenceWithTheta(int maxIteration, InstanceList theta){
this.test = new ArrayList<Topication>(); //initialize test
//initial sampling on testdata
ArrayList<LabelSequence> topicSequences = new ArrayList<LabelSequence>();
for (Instance instance : testing) {
LabelSequence topicSequence = new LabelSequence(topicAlphabet, new int[instanceLength(instance)]);
if (false) {
// This method not yet obeying its last "false" argument, and must be for this to work
//sampleTopicsForOneDoc((FeatureSequence)instance.getData(), topicSequence, false, false);
} else {
Randoms r = new Randoms();
FeatureSequence fs = (FeatureSequence) instance.getData();
int[] topics = topicSequence.getFeatures();
for (int i = 0; i < topics.length; i++) {
int type = fs.getIndexAtPosition(i);
topics[i] = r.nextInt(numTopics);
}
}
topicSequences.add (topicSequence);
}
//construct test
assert (testing.size() == topicSequences.size());
for (int i = 0; i < testing.size(); i++) {
Topication t = new Topication (testing.get(i), this, topicSequences.get(i));
test.add (t);
// Include sufficient statistics for this one doc
// add count on new data to n[k][w] and n[k][*]
// pay attention to unseen words
FeatureSequence tokenSequence = (FeatureSequence) t.instance.getData();
LabelSequence topicSequence = t.topicSequence;
for (int pi = 0; pi < topicSequence.getLength(); pi++) {
int topic = topicSequence.getIndexAtPosition(pi);
int type = tokenSequence.getIndexAtPosition(pi);
if(topic != -1) // type seen in training
{
typeTopicCounts[type].adjustOrPutValue(topic, 1, 1);
tokensPerTopic[topic]++;
}
}
}
long startTime = System.currentTimeMillis();
//loop
int iter = 0;
for ( ; iter <= maxIteration; iter++) {
if(iter%100==0)
{
System.out.print("Iteration: " + iter);
System.out.println();
}
int numDocs = test.size(); // TODO
for (int di = 0; di < numDocs; di++) {
FeatureVector fvTheta = (FeatureVector) theta.get(di).getData();
double[] topicDistribution = fvTheta.getValues();
FeatureSequence tokenSequence = (FeatureSequence) test.get(di).instance.getData();
LabelSequence topicSequence = test.get(di).topicSequence;
sampleTopicsForOneDocWithTheta (tokenSequence, topicSequence, topicDistribution);
}
}
long seconds = Math.round((System.currentTimeMillis() - startTime)/1000.0);