sLogger = logger;
}
// sLogger.setLevel(Level.DEBUG);
HMapSFW v = new HMapSFW();
float normalization=0;
for(edu.umd.cloud9.util.map.MapIF.Entry entry : tfTable.entrySet()){
// retrieve term string, tf and df
String eTerm = eVocab.get(entry.getKey());
float tf = entry.getValue();
int eId = dict.getId(eTerm);
if(eId < 1){ //OOV
continue;
}
int df = dfTable.getDf(eId);
// compute score via scoring model
float score = ((Bm25) scoringModel).computeDocumentWeight(tf, df, docLen);
if(df<1){
sLogger.warn("Suspicious DF WARNING = "+eTerm+" "+tf+" "+df+" "+score);
}
sLogger.debug(eTerm+" "+tf+" "+df+" "+score);
if(score>0){
v.put(eTerm, score);
if(isNormalize){
normalization+=Math.pow(score, 2);
}
}
}
// length-normalize doc vector
if(isNormalize){
normalization = (float) Math.sqrt(normalization);
for(Entry<String> e : v.entrySet()){
v.put(e.getKey(), e.getValue()/normalization);
}
}
return v;
}