public static HMapSFW createTermDocVector(int docLen, HMapSIW tfTable, Vocab eVocabSrc, ScoringModel scoringModel, PrefixEncodedGlobalStats globalStats, boolean isNormalize, Logger sLogger) {
if(sLogger == null){
sLogger = logger;
}
HMapSFW v = new HMapSFW();
float normalization=0;
for(edu.umd.cloud9.util.map.MapKI.Entry<String> entry : tfTable.entrySet()){
// retrieve term string, tf and df
String eTerm = entry.getKey();
int tf = entry.getValue();
int df = globalStats.getDF(eTerm);
if(df<1){ //OOV
continue;
}
// compute score via scoring model
float score = ((Bm25) scoringModel).computeDocumentWeight(tf, df, docLen);
if(score>0){
v.put(eTerm, score);
if(isNormalize){
normalization+=Math.pow(score, 2);
}
}
}
// length-normalize doc vector
if(isNormalize){
normalization = (float) Math.sqrt(normalization);
for(Entry<String> e : v.entrySet()){
v.put(e.getKey(), e.getValue()/normalization);
}
}
return v;
}