}
}
double[] logPhi = null;
HMapII content = value.getContent();
if (content == null) {
System.err.println("Error: content was null for document " + key.toString());
return;
}
// be careful when adjust this initial value
int gammaUpdateIterationCount = 1;
do {
likelihoodPhi = 0;
for (int i = 0; i < numberOfTopics; i++) {
tempGamma[i] = Gamma.digamma(tempGamma[i]);
updateLogGamma[i] = Math.log(alpha[i]);
}
itr = content.keySet().iterator();
while (itr.hasNext()) {
int termID = itr.next();
// acquire the corresponding beta vector for this term
if (logPhiTable.containsKey(termID)) {
// reuse existing object
logPhi = logPhiTable.get(termID);
} else {
logPhi = new double[numberOfTopics];
logPhiTable.put(termID, logPhi);
}
int termCounts = content.get(termID);
tempLogBeta = retrieveBeta(numberOfTopics, expectLogBeta, termID, numberOfTerms);
likelihoodPhi += updatePhi(numberOfTopics, termCounts, tempLogBeta, tempGamma, logPhi,
updateLogGamma);
}
for (int i = 0; i < numberOfTopics; i++) {
tempGamma[i] = Math.exp(updateLogGamma[i]);
}
gammaUpdateIterationCount++;
// send out heart-beat message
if (Math.random() < 0.01) {
reporter.incrCounter(ParameterCounter.DUMMY_COUNTER, 1);
}
} while (gammaUpdateIterationCount < maximumGammaIteration);
// compute the sum of gamma vector
double sumGamma = 0;
double likelihoodGamma = 0;
for (int i = 0; i < numberOfTopics; i++) {
sumGamma += tempGamma[i];
likelihoodGamma += Gamma.lngamma(tempGamma[i]);
}
likelihoodGamma -= Gamma.lngamma(sumGamma);
double documentLogLikelihood = likelihoodAlpha + likelihoodGamma + likelihoodPhi;
reporter.incrCounter(ParameterCounter.LOG_LIKELIHOOD,
(long) (-documentLogLikelihood * Settings.DEFAULT_COUNTER_SCALE));
double digammaSumGamma = Gamma.digamma(sumGamma);
for (int i = 0; i < numberOfTopics; i++) {
totalAlphaSufficientStatistics[i] += Gamma.digamma(tempGamma[i]) - digammaSumGamma;
}
outputCollector = output;
if (!directEmit) {
if (learning) {
if (Runtime.getRuntime().freeMemory() < Settings.MEMORY_THRESHOLD) {
itr = totalPhi.keySet().iterator();
while (itr.hasNext()) {
int termID = itr.next();
logPhi = totalPhi.get(termID);
for (int i = 0; i < numberOfTopics; i++) {
outputValue.set(logPhi[i]);
// a *positive* topic index indicates the output is a phi values
outputKey.set(i + 1, termID);
output.collect(outputKey, outputValue);
}
}
totalPhi.clear();
// for (int i = 0; i < numberOfTopics; i++) {
// a *zero* topic index and a *positive* topic index indicates the output is a term for
// alpha updating
// outputKey.set(0, i + 1);
// outputValue.set(totalAlphaSufficientStatistics[i]);
// output.collect(outputKey, outputValue);
// totalAlphaSufficientStatistics[i] = 0;
// }
}
itr = content.keySet().iterator();
while (itr.hasNext()) {
int termID = itr.next();
if (termID < Settings.TOP_WORDS_FOR_CACHING) {
if (totalPhi.containsKey(termID)) {
logPhi = logPhiTable.get(termID);
tempLogBeta = totalPhi.get(termID);
for (int i = 0; i < numberOfTopics; i++) {
tempLogBeta[i] = LogMath.add(logPhi[i], tempLogBeta[i]);
}
} else {
totalPhi.put(termID, logPhiTable.get(termID));
}
} else {
logPhi = logPhiTable.get(termID);
for (int i = 0; i < numberOfTopics; i++) {
outputValue.set(logPhi[i]);
// a *positive* topic index indicates the output is a phi values
outputKey.set(i + 1, termID);
output.collect(outputKey, outputValue);
}
}
}
}
} else {
if (learning) {
itr = content.keySet().iterator();
while (itr.hasNext()) {
int termID = itr.next();
// only get the phi's of current document
logPhi = logPhiTable.get(termID);
for (int i = 0; i < numberOfTopics; i++) {