// Output Length Normalized + TF Transformed Frequency per Word per Class
// Log(1 + D_ij)/SQRT( SIGMA(k, D_kj) )
for (Map.Entry<String, int[]> entry : wordList.entrySet()) {
// key is label,word
String token = entry.getKey();
StringTuple tuple = new StringTuple();
tuple.add(BayesConstants.WEIGHT);
tuple.add(label);
tuple.add(token);
DoubleWritable f = new DoubleWritable(Math.log(1.0 + entry.getValue()[0]) / lengthNormalisation);
output.collect(tuple, f);
}
reporter.setStatus("Bayes Feature Mapper: Document Label: " + label);
// Output Document Frequency per Word per Class
for (String token : wordList.keySet()) {
// key is label,word
StringTuple dfTuple = new StringTuple();
dfTuple.add(BayesConstants.DOCUMENT_FREQUENCY);
dfTuple.add(label);
dfTuple.add(token);
output.collect(dfTuple, one);
StringTuple tokenCountTuple = new StringTuple();
tokenCountTuple.add(BayesConstants.FEATURE_COUNT);
tokenCountTuple.add(token);
output.collect(tokenCountTuple, one);
}
// output that we have seen the label to calculate the Count of Document per
// class
StringTuple labelCountTuple = new StringTuple();
labelCountTuple.add(BayesConstants.LABEL_COUNT);
labelCountTuple.add(label);
output.collect(labelCountTuple, one);
}