public NamedList getCounts(SolrIndexSearcher searcher, DocSet baseDocs, int offset, int limit, Integer mincount, boolean missing, String sort, String prefix) throws IOException {
use.incrementAndGet();
FieldType ft = searcher.getSchema().getFieldType(field);
NamedList res = new NamedList(); // order is important
DocSet docs = baseDocs;
int baseSize = docs.size();
int maxDoc = searcher.maxDoc();
if (baseSize >= mincount) {
final int[] index = this.index;
final int[] counts = new int[numTermsInField];
//
// If there is prefix, find it's start and end term numbers
//
int startTerm = 0;
int endTerm = numTermsInField; // one past the end
NumberedTermEnum te = ti.getEnumerator(searcher.getReader());
if (prefix != null && prefix.length() > 0) {
te.skipTo(prefix);
startTerm = te.getTermNumber();
te.skipTo(prefix + "\uffff\uffff\uffff\uffff");
endTerm = te.getTermNumber();
}
/***********
// Alternative 2: get the docSet of the prefix (could take a while) and
// then do the intersection with the baseDocSet first.
if (prefix != null && prefix.length() > 0) {
docs = searcher.getDocSet(new ConstantScorePrefixQuery(new Term(field, ft.toInternal(prefix))), docs);
// The issue with this method are problems of returning 0 counts for terms w/o
// the prefix. We can't just filter out those terms later because it may
// mean that we didn't collect enough terms in the queue (in the sorted case).
}
***********/
boolean doNegative = baseSize > maxDoc >> 1 && termInstances > 0
&& startTerm==0 && endTerm==numTermsInField
&& docs instanceof BitDocSet;
if (doNegative) {
OpenBitSet bs = (OpenBitSet)((BitDocSet)docs).getBits().clone();
bs.flip(0, maxDoc);
// TODO: when iterator across negative elements is available, use that
// instead of creating a new bitset and inverting.
docs = new BitDocSet(bs, maxDoc - baseSize);
// simply negating will mean that we have deleted docs in the set.
// that should be OK, as their entries in our table should be empty.
}
// For the biggest terms, do straight set intersections
for (TopTerm tt : bigTerms.values()) {
// TODO: counts could be deferred if sorted==false
if (tt.termNum >= startTerm && tt.termNum < endTerm) {
counts[tt.termNum] = searcher.numDocs(new TermQuery(tt.term), docs);
}
}
// TODO: we could short-circuit counting altogether for sorted faceting
// where we already have enough terms from the bigTerms
// TODO: we could shrink the size of the collection array, and
// additionally break when the termNumber got above endTerm, but
// it would require two extra conditionals in the inner loop (although
// they would be predictable for the non-prefix case).
// Perhaps a different copy of the code would be warranted.
if (termInstances > 0) {
DocIterator iter = docs.iterator();
while (iter.hasNext()) {
int doc = iter.nextDoc();
int code = index[doc];
if ((code & 0xff)==1) {
int pos = code>>>8;
int whichArray = (doc >>> 16) & 0xff;
byte[] arr = tnums[whichArray];
int tnum = 0;
for(;;) {
int delta = 0;
for(;;) {
byte b = arr[pos++];
delta = (delta << 7) | (b & 0x7f);
if ((b & 0x80) == 0) break;
}
if (delta == 0) break;
tnum += delta - TNUM_OFFSET;
counts[tnum]++;
}
} else {
int tnum = 0;
int delta = 0;
for (;;) {
delta = (delta << 7) | (code & 0x7f);
if ((code & 0x80)==0) {
if (delta==0) break;
tnum += delta - TNUM_OFFSET;
counts[tnum]++;
delta = 0;
}
code >>>= 8;
}
}
}
}
int off=offset;
int lim=limit>=0 ? limit : Integer.MAX_VALUE;
if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
int maxsize = limit>0 ? offset+limit : Integer.MAX_VALUE-1;
maxsize = Math.min(maxsize, numTermsInField);
final BoundedTreeSet<Long> queue = new BoundedTreeSet<Long>(maxsize);
int min=mincount-1; // the smallest value in the top 'N' values
for (int i=startTerm; i<endTerm; i++) {
int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
if (c>min) {
// NOTE: we use c>min rather than c>=min as an optimization because we are going in
// index order, so we already know that the keys are ordered. This can be very
// important if a lot of the counts are repeated (like zero counts would be).
// minimize object creation and speed comparison by creating a long that
// encompasses both count and term number.
// Since smaller values are kept in the TreeSet, make higher counts smaller.
//
// for equal counts, lower term numbers
// should come first and hence be "greater"
//long pair = (((long)c)<<32) | (0x7fffffff-i) ; // use if priority queue
long pair = (((long)-c)<<32) | i;
queue.add(new Long(pair));
if (queue.size()>=maxsize) min=-(int)(queue.last().longValue() >>> 32);
}
}
final int[] tnums = new int[Math.min(Math.max(0, queue.size()-off), lim)];
final int[] indirect = counts; // reuse the counts array for the index into the tnums array
assert indirect.length >= tnums.length;
int tnumCount = 0;
for (Long p : queue) {
if (--off>=0) continue;
if (--lim<0) break;
int c = -(int)(p.longValue() >>> 32);
//int tnum = 0x7fffffff - (int)p.longValue(); // use if priority queue
int tnum = (int)p.longValue();
indirect[tnumCount] = tnumCount;
tnums[tnumCount++] = tnum;
// String label = ft.indexedToReadable(getTermText(te, tnum));
// add a null label for now... we'll fill it in later.
res.add(null, c);
}
// now sort the indexes by the term numbers
PrimUtils.sort(0, tnumCount, indirect, new PrimUtils.IntComparator() {
@Override
public int compare(int a, int b) {
return tnums[a] - tnums[b];
}
});
// convert the term numbers to term values and set as the label
for (int i=0; i<tnumCount; i++) {
int idx = indirect[i];
int tnum = tnums[idx];
String label = ft.indexedToReadable(getTermText(te, tnum));
res.setName(idx, label);
}
} else {
// add results in index order
int i=startTerm;
if (mincount<=0) {
// if mincount<=0, then we won't discard any terms and we know exactly
// where to start.
i=startTerm+off;
off=0;
}
for (; i<endTerm; i++) {
int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
if (c<mincount || --off>=0) continue;
if (--lim<0) break;
String label = ft.indexedToReadable(getTermText(te, i));
res.add(label, c);
}
}
te.close();