String name = candidate.getName();
String original = candidate.getBestFullForm();
String title = candidate.getTitle();
// Compute TFxIDF
Counter counterGlobal = (Counter) globalDictionary.get(name);
double globalVal = 0;
if (counterGlobal != null) {
globalVal = counterGlobal.value();
if (training) {
globalVal = globalVal - 1;
}
}
double tf = candidate.getTermFrequency();
double idf = -Math.log((globalVal + 1) / ((double) numDocs + 1));
// System.out.println(candidate + " count: " + candidate.getFrequency() + "
// tf: " + tf + " glob val: " + globalVal + " numDocs: " + numDocs + " idf:
// " + idf);
if (useBasicFeatures) {
newInst[tfidfIndex] = tf * idf;
newInst[firstOccurIndex] = candidate.getFirstOccurrence();
}
if (useFrequencyFeatures) {
newInst[tfIndex] = tf;
newInst[idfIndex] = idf;
}
if (usePositionsFeatures) {
newInst[lastOccurIndex] = candidate.getLastOccurrence();
newInst[spreadOccurIndex] = candidate.getSpread();
}
if (useKeyphrasenessFeature) {
if (vocabularyName.equals("wikipedia")) {
name = title;
}
Counter domainKeyphr = keyphraseDictionary.get(name);
if ((training) && (hashKeyphrases != null)
&& (hashKeyphrases.containsKey(name))) {
newInst[domainKeyphIndex] = domainKeyphr.value() - 1;
} else {
if (domainKeyphr != null) {
newInst[domainKeyphIndex] = domainKeyphr.value();
} else {
newInst[domainKeyphIndex] = 0;
}
}
}