// Remember, token position is numbered started from one...
if (positions.containsKey(term)) {
positions.get(term).add(i + 1);
} else {
ArrayListOfInts l = new ArrayListOfInts();
l.add(i + 1);
positions.put(term, l);
}
}
int doclength = 0;
Iterator<Map.Entry<String, ArrayListOfInts>> it = positions.entrySet().iterator();
Map.Entry<String, ArrayListOfInts> e;
ArrayListOfInts positionsList;
while (it.hasNext()) {
e = it.next();
positionsList = e.getValue();
// We're storing tfs as shorts, so check for overflow...
if (positionsList.size() >= TF_CUT) {
// There are a few ways to handle this... If we're getting such a high tf, then it most
// likely means that this is a junk doc.
LOG.warn("Error: tf of " + e.getValue()
+ " will overflow max short value. docno=" + doc.getDocid() + ", term="
+ e.getKey());
it.remove();
} else {
positionsList.trimToSize();
doclength += positionsList.size();
}
}
if ( positions.size() == 0 ) {
return positions;
}
positions.put("", new ArrayListOfInts(new int[] { doclength }));
return positions;
}