// Log(1 + D_ij)/SQRT( SIGMA(k, D_kj) )
wordList.forEachPair(new ObjectIntProcedure<String>() {
@Override
public boolean apply(String token, int dKJ) {
try {
StringTuple tuple = new StringTuple();
tuple.add(BayesConstants.WEIGHT);
tuple.add(label);
tuple.add(token);
DoubleWritable f = new DoubleWritable(Math.log(1.0 + dKJ) / lengthNormalisation);
output.collect(tuple, f);
} catch (IOException e) {
throw new IllegalStateException(e);
}
return true;
}
});
reporter.setStatus("Bayes Feature Mapper: Document Label: " + label);
// Output Document Frequency per Word per Class
// Corpus Document Frequency (FEATURE_COUNT)
// Corpus Term Frequency (FEATURE_TF)
wordList.forEachPair(new ObjectIntProcedure<String>() {
@Override
public boolean apply(String token, int dKJ) {
try {
StringTuple dfTuple = new StringTuple();
dfTuple.add(BayesConstants.DOCUMENT_FREQUENCY);
dfTuple.add(label);
dfTuple.add(token);
output.collect(dfTuple, ONE);
StringTuple tokenCountTuple = new StringTuple();
tokenCountTuple.add(BayesConstants.FEATURE_COUNT);
tokenCountTuple.add(token);
output.collect(tokenCountTuple, ONE);
StringTuple tokenTfTuple = new StringTuple();
tokenTfTuple.add(BayesConstants.FEATURE_TF);
tokenTfTuple.add(token);
output.collect(tokenTfTuple, new DoubleWritable(dKJ));
} catch (IOException e) {
throw new IllegalStateException(e);
}
return true;
}
});
// output that we have seen the label to calculate the Count of Document per
// class
StringTuple labelCountTuple = new StringTuple();
labelCountTuple.add(BayesConstants.LABEL_COUNT);
labelCountTuple.add(label);
output.collect(labelCountTuple, ONE);
}