private void sampleTopicsForOneTestDoc(FeatureSequence tokenSequence,
LabelSequence topicSequence) {
// TODO Auto-generated method stub
int[] oneDocTopics = topicSequence.getFeatures();
TIntIntHashMap currentTypeTopicCounts;
int type, oldTopic, newTopic;
double tw;
double[] topicWeights = new double[numTopics];
double topicWeightsSum;
int docLength = tokenSequence.getLength();
// populate topic counts
int[] localTopicCounts = new int[numTopics];
for (int ti = 0; ti < numTopics; ti++){
localTopicCounts[ti] = 0;
}
for (int position = 0; position < docLength; position++) {
if(oneDocTopics[position] != -1) {
localTopicCounts[oneDocTopics[position]] ++;
}
}
// Iterate over the positions (words) in the document
for (int si = 0; si < docLength; si++) {
type = tokenSequence.getIndexAtPosition(si);
oldTopic = oneDocTopics[si];
if(oldTopic == -1) {
continue;
}
// Remove this token from all counts
localTopicCounts[oldTopic] --;
currentTypeTopicCounts = typeTopicCounts[type];
assert(currentTypeTopicCounts.get(oldTopic) >= 0);
if (currentTypeTopicCounts.get(oldTopic) == 1) {
currentTypeTopicCounts.remove(oldTopic);
}
else {
currentTypeTopicCounts.adjustValue(oldTopic, -1);
}
tokensPerTopic[oldTopic]--;
// Build a distribution over topics for this token
Arrays.fill (topicWeights, 0.0);
topicWeightsSum = 0;
for (int ti = 0; ti < numTopics; ti++) {
tw = ((currentTypeTopicCounts.get(ti) + beta) / (tokensPerTopic[ti] + betaSum))
* ((localTopicCounts[ti] + alpha[ti])); // (/docLen-1+tAlpha); is constant across all topics
topicWeightsSum += tw;
topicWeights[ti] = tw;
}
// Sample a topic assignment from this distribution
newTopic = random.nextDiscrete (topicWeights, topicWeightsSum);
// Put that new topic into the counts
oneDocTopics[si] = newTopic;
currentTypeTopicCounts.adjustOrPutValue(newTopic, 1, 1);
localTopicCounts[newTopic] ++;
tokensPerTopic[newTopic]++;
}
}