public class DeltaTFIDFComputerUni extends FeatureValueComputer {
@Override
public LinkedHashMap<Integer, Double> computeFeatures(DataUnit example, ArrayList<Feature> features, CorpusHolder corpus) {
LinkedHashMap<Integer, Double> featuresValues = new LinkedHashMap<Integer, Double>();
UniGram tempUnigram;
// The Equation used in calculating feature value :
// V = Count * log ( NumPositiveExamples * Count of term in Negative corpus / NumNegativeExamples * Count of term in positive corpus)
double featureValue;
int countInDoc = 0;
double countInPos;
double countInNeg;
double posToNegRatio = (double) corpus.getNumPositives() / (double) corpus.getNumNegatives();
HashMap<String, Integer> wordsWithCount = TextSplitter.getWordsCount(example.getDataBody());
/*while (wordsIterator.hasNext()) {
tempEnt = wordsIterator.next();
featIndex = uniGrams.indexOf(tempEnt.getKey());
if (featIndex >= 0) {
tempUnigram = (UniGram) features.get(featIndex);
countInDoc = tempEnt.getValue();
countInPos = tempUnigram.getPositiveCount() + 0.0001;
countInNeg = tempUnigram.getNegativeCount() + 0.0001;
featureValue = countInDoc * (Math.log((posToNegRatio * (countInNeg / countInPos))));
featuresValues.put(featIndex, featureValue);
}
}*/
for (int i = 0; i < features.size(); i++) {
tempUnigram = (UniGram) features.get(i);
if (wordsWithCount.containsKey(((UniGram)features.get(i)).getUniGram())) {
countInDoc = wordsWithCount.get(((UniGram)features.get(i)).getUniGram());
countInPos = tempUnigram.getPositiveCount() + 0.0001;
countInNeg = tempUnigram.getNegativeCount() + 0.0001;
featureValue = countInDoc * (Math.log((posToNegRatio * (countInNeg / countInPos))));
featuresValues.put(i, 1.0);
}
}
return featuresValues;