private double[] featVals(String id, FastVector phraseInfo,
boolean training, HashMap<String,Counter> hashKeysEval,
HashMap<String,Counter> hashKeyphrases, int length, HashMap<String,FastVector> hash) {
// Compute feature values
Counter counterLocal = (Counter)phraseInfo.elementAt(1);
double[] newInst = new double[m_NumFeatures + 1];
// Compute TFxIDF
Counter counterGlobal = (Counter)m_Dictionary.get(id);
double localVal = counterLocal.value(), globalVal = 0;
if (counterGlobal != null) {
globalVal = counterGlobal.value();
if (training) {
globalVal = globalVal - 1;
}
}
// Just devide by length to get approximation of probability
// that phrase in document is our phrase
// newInst[m_TfidfIndex] = (localVal / ((double)length));
newInst[m_TfidfIndex] = (localVal / ((double)length)) *
(-Math.log((globalVal + 1)/ ((double)m_NumDocs + 1)));
// Compute first occurrence
Counter counterFirst = (Counter)phraseInfo.elementAt(0);
newInst[m_FirstOccurIndex] = (double)counterFirst.value() /
(double)length;
// Is keyphrase frequency attribute being used?
if (m_KFused) {
Counter keyphraseC = (Counter)m_KeyphraseDictionary.get(id);
if ((training) && (hashKeyphrases != null) &&
(hashKeyphrases.containsKey(id))) {
newInst[m_KeyFreqIndex] = keyphraseC.value() - 1;
} else {
if (keyphraseC != null) {
newInst[m_KeyFreqIndex] = keyphraseC.value();
} else {
newInst[m_KeyFreqIndex] = 0;
}
}
}