public boolean modifyScores(Index index, MatchingQueryTerms query, ResultSet resultSet) {
// The rest of the method applies proximity weighting as outlined
// by Yves Rasolofo for queries of 1 < length < 5.
//TODO replace ApplicationSetup.BLOCK_QUERYING
InvertedIndex invertedIndex = index.getInvertedIndex();
if (invertedIndex instanceof BlockInvertedIndex &&
query.length() > 1 && query.length() < 5) {
Lexicon<String> lexicon = index.getLexicon();
int[] docids = resultSet.getDocids();
double[] scores = resultSet.getScores();
//check when the application of proximity started.
long proximityStart = System.currentTimeMillis();
// the constants used by the algorithm
double N = index.getCollectionStatistics().getNumberOfDocuments();
int blockSize = ApplicationSetup.BLOCK_SIZE;
//The okapi constants for use with the proximity algorithm
double k = 2.0d;
double k1 = 1.2d;
double k3 = 1000d;
double b = 0.9d;
int topDocs = 100;
double avdl =
1.0D * index.getCollectionStatistics().getAverageDocumentLength();
double K = k * ((1 - b) + (b * (1 / avdl)));
// an array holding the proximity weight for each docid
// corresponds to the scores array
double[] TPRSV = new double[scores.length];
//arrays to reference the first terms block information
int[][] term1Pointers;
int[] term1blockfreqs;
int[] term1blockids;
int[] term1docids;
//int[] term1freqs;
//term2Pointers holds the information for the second term of each pair
//each of the other arrays are used to reduce the number of references
int[][] term2Pointers;
int[] term2docids;
//int[] term2termfreqs;
int[] term2blockfreqs;
int[] term2blockids;
// calculate all the possible combinations of query term pairs
ArrayList<String[]> queryTermPairs = generateQueryTermPairs(query);
//Iterator termPairIterator<ArrayList<String>> = queryTermPairs.iterator();
// for all term pairs
for (String[] queryTermPair : queryTermPairs)
{
final String term1 = queryTermPair[0];
final String term2 = queryTermPair[1];
//we seek the query term in the lexicon
LexiconEntry tEntry1 = lexicon.getLexiconEntry(term1);
if (tEntry1 == null)//and if it is not found, we continue with the next term pair
continue;
//double term1KeyFrequency = query.getTermWeight(term1);
double term1DocumentFrequency = (double)tEntry1.getDocumentFrequency();
//we seek the 2nd query term in the lexicon
LexiconEntry tEntry2 = lexicon.getLexiconEntry(term2);
//and if it is not found, we continue with the next term pair
if (tEntry1 == null)
continue;
//double term2KeyFrequency = query.getTermWeight(term2);
double term2DocumentFrequency = (double)tEntry2.getDocumentFrequency();
term1Pointers = invertedIndex.getDocuments(tEntry1);
term1docids = term1Pointers[0];
term1blockfreqs = term1Pointers[2];
term1blockids = term1Pointers[3];
term2Pointers = invertedIndex.getDocuments(tEntry2);
term2docids = term2Pointers[0];
term2blockfreqs = term2Pointers[2];
term2blockids = term2Pointers[3];
int length1 = term1docids.length;
int length2 = term2docids.length;