for ( TermInfo ti : phraseInfo.getTermsInfos()) {
if ( distinctTerms.add( ti.getText() ) )
phraseTotalBoost += ti.getWeight() * phraseInfo.getBoost();
length++;
}
tempSubInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(),
phraseInfo.getSeqnum(), phraseTotalBoost ) );
}
// We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
// would cause an equal weight for all fragments regardless of how much words they contain.
// To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
// we "bend" the length with a standard-normalization a little bit.
float norm = length * ( 1 / (float)Math.sqrt( length ) );
float totalBoost = 0;
for ( SubInfo tempSubInfo : tempSubInfos ) {
float subInfoBoost = tempSubInfo.getBoost() * norm;
realSubInfos.add( new SubInfo( tempSubInfo.getText(), tempSubInfo.getTermsOffsets(),
tempSubInfo.getSeqnum(), subInfoBoost ));
totalBoost += subInfoBoost;
}
getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, realSubInfos, totalBoost ) );