// TODO: this function is too big and could use some refactoring, but
// we also need a facet cache, and refactoring of SimpleFacets instead of
// trying to pass all the various params around.
FieldType ft = searcher.getSchema().getFieldType(fieldName);
NamedList res = new NamedList();
FieldCache.StringIndex si = FieldCache.DEFAULT.getStringIndex(searcher.getReader(), fieldName);
final String[] terms = si.lookup;
final int[] termNum = si.order;
if (prefix!=null && prefix.length()==0) prefix=null;
int startTermIndex, endTermIndex;
if (prefix!=null) {
startTermIndex = Arrays.binarySearch(terms,prefix,nullStrComparator);
if (startTermIndex<0) startTermIndex=-startTermIndex-1;
// find the end term. \uffff isn't a legal unicode char, but only compareTo
// is used, so it should be fine, and is guaranteed to be bigger than legal chars.
endTermIndex = Arrays.binarySearch(terms,prefix+"\uffff\uffff\uffff\uffff",nullStrComparator);
endTermIndex = -endTermIndex-1;
} else {
startTermIndex=1;
endTermIndex=terms.length;
}
final int nTerms=endTermIndex-startTermIndex;
if (nTerms>0) {
// count collection array only needs to be as big as the number of terms we are
// going to collect counts for.
final int[] counts = new int[nTerms];
DocIterator iter = docs.iterator();
while (iter.hasNext()) {
int term = termNum[iter.nextDoc()];
int arrIdx = term-startTermIndex;
if (arrIdx>=0 && arrIdx<nTerms) counts[arrIdx]++;
}
// IDEA: we could also maintain a count of "other"... everything that fell outside
// of the top 'N'
int off=offset;
int lim=limit>=0 ? limit : Integer.MAX_VALUE;
if (sort) {
int maxsize = limit>0 ? offset+limit : Integer.MAX_VALUE-1;
maxsize = Math.min(maxsize, nTerms);
final BoundedTreeSet<CountPair<String,Integer>> queue = new BoundedTreeSet<CountPair<String,Integer>>(maxsize);
int min=mincount-1; // the smallest value in the top 'N' values
for (int i=0; i<nTerms; i++) {
int c = counts[i];
if (c>min) {
// NOTE: we use c>min rather than c>=min as an optimization because we are going in
// index order, so we already know that the keys are ordered. This can be very
// important if a lot of the counts are repeated (like zero counts would be).
queue.add(new CountPair<String,Integer>(terms[startTermIndex+i], c));
if (queue.size()>=maxsize) min=queue.last().val;
}
}
// now select the right page from the results
for (CountPair<String,Integer> p : queue) {
if (--off>=0) continue;
if (--lim<0) break;
res.add(ft.indexedToReadable(p.key), p.val);
}
} else {
// add results in index order
int i=0;
if (mincount<=0) {
// if mincount<=0, then we won't discard any terms and we know exactly
// where to start.
i=off;
off=0;
}
for (; i<nTerms; i++) {
int c = counts[i];
if (c<mincount || --off>=0) continue;
if (--lim<0) break;
res.add(ft.indexedToReadable(terms[startTermIndex+i]), c);
}
}
}
if (missing) {
res.add(null, getFieldMissingCount(searcher,docs,fieldName));
}
return res;
}