Examples of DocIdSetIterator

org.apache.lucene.search.DocIdSetIterator
This abstract class defines methods to iterate over a set of non-decreasing doc ids. Note that this class assumes it iterates on doc Ids, and therefore {@link #NO_MORE_DOCS} is set to {@value #NO_MORE_DOCS} in order to be used asa sentinel object. Implementations of this class are expected to consider {@link Integer#MAX_VALUE} as an invalid value.

Examples of org.apache.lucene.search.DocIdSetIterator


    int i = 0;
    int votes = 0; //could be smarter but would make the code even more complex for a minor optimization out of cycle.
    // enter main loop:
    while ( true ) {
      final DocIdSetIterator iterator = iterators[i];
      int position = targetPosition;
      if ( !iteratorAlreadyOnTargetPosition( targetPosition, iterator ) ) {
        position = iterator.advance( targetPosition );
      }
      if ( position == DocIdSetIterator.NO_MORE_DOCS ) {
        return result;
      } //exit condition
      if ( position == targetPosition ) {

View Full Code Here

Examples of org.apache.lucene.search.DocIdSetIterator


    boolean allIteratorsShareSameFirstTarget = true;


    //iterator initialize, just one "next" for each DocIdSetIterator
    for ( int i = 1; i < iterators.length; i++ ) {
      final DocIdSetIterator iterator = iterators[i];
      final int position = iterator.nextDoc();
      if ( position == DocIdSetIterator.NO_MORE_DOCS ) {
        //current iterator has no values, so skip all
        return DocIdSetIterator.NO_MORE_DOCS;
      }
      if ( targetPosition != position ) {

View Full Code Here

Examples of org.apache.lucene.search.DocIdSetIterator

      if (dv == null) { // this reader does not have DocValues for the requested category list
        continue;
      }
      
      BytesRef scratch = new BytesRef();
      DocIdSetIterator docs = hits.bits.iterator();
      
      int doc;
      while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
        //System.out.println("  doc=" + doc);
        // TODO: use OrdinalsReader?  we'd need to add a
        // BytesRef getAssociation()?
        dv.get(doc, scratch);
        byte[] bytes = scratch.bytes;

View Full Code Here

Examples of org.apache.lucene.search.DocIdSetIterator

      if (dv == null) { // this reader does not have DocValues for the requested category list
        continue;
      }
    
      BytesRef scratch = new BytesRef();
      DocIdSetIterator docs = hits.bits.iterator();
      
      int doc;
      while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
        //System.out.println("  doc=" + doc);
        // TODO: use OrdinalsReader?  we'd need to add a
        // BytesRef getAssociation()?
        dv.get(doc, scratch);
        byte[] bytes = scratch.bytes;

View Full Code Here

Examples of org.apache.lucene.search.DocIdSetIterator

      SortedSetDocValues segValues = reader.getSortedSetDocValues(field);
      if (segValues == null) {
        continue;
      }


      DocIdSetIterator docs = hits.bits.iterator();


      // TODO: yet another option is to count all segs
      // first, only in seg-ord space, and then do a
      // merge-sort-PQ in the end to only "resolve to
      // global" those seg ords that can compete, if we know
      // we just want top K?  ie, this is the same algo
      // that'd be used for merging facets across shards
      // (distributed faceting).  but this has much higher
      // temp ram req'ts (sum of number of ords across all
      // segs)
      if (ordinalMap != null) {
        int segOrd = hits.context.ord;


        int numSegOrds = (int) segValues.getValueCount();


        if (hits.totalHits < numSegOrds/10) {
          //System.out.println("    remap as-we-go");
          // Remap every ord to global ord as we iterate:
          int doc;
          while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            //System.out.println("    doc=" + doc);
            segValues.setDocument(doc);
            int term = (int) segValues.nextOrd();
            while (term != SortedSetDocValues.NO_MORE_ORDS) {
              //System.out.println("      segOrd=" + segOrd + " ord=" + term + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, term));
              counts[(int) ordinalMap.getGlobalOrd(segOrd, term)]++;
              term = (int) segValues.nextOrd();
            }
          }
        } else {
          //System.out.println("    count in seg ord first");


          // First count in seg-ord space:
          final int[] segCounts = new int[numSegOrds];
          int doc;
          while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            //System.out.println("    doc=" + doc);
            segValues.setDocument(doc);
            int term = (int) segValues.nextOrd();
            while (term != SortedSetDocValues.NO_MORE_ORDS) {
              //System.out.println("      ord=" + term);
              segCounts[term]++;
              term = (int) segValues.nextOrd();
            }
          }


          // Then, migrate to global ords:
          for(int ord=0;ord<numSegOrds;ord++) {
            int count = segCounts[ord];
            if (count != 0) {
              //System.out.println("    migrate segOrd=" + segOrd + " ord=" + ord + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, ord));
              counts[(int) ordinalMap.getGlobalOrd(segOrd, ord)] += count;
            }
          }
        }
      } else {
        // No ord mapping (e.g., single segment index):
        // just aggregate directly into counts:
        int doc;
        while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
          segValues.setDocument(doc);
          int term = (int) segValues.nextOrd();
          while (term != SortedSetDocValues.NO_MORE_ORDS) {
            counts[term]++;
            term = (int) segValues.nextOrd();

View Full Code Here

Examples of org.apache.lucene.search.DocIdSetIterator

      
      int scoresIdx = 0;
      float[] scores = hits.scores;


      FunctionValues functionValues = valueSource.getValues(context, hits.context);
      DocIdSetIterator docs = hits.bits.iterator();
      
      int doc;
      while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
        ords.get(doc, scratch);
        if (keepScores) {
          scorer.docID = doc;
          scorer.score = scores[scoresIdx++];
        }

View Full Code Here

Examples of org.apache.lucene.search.DocIdSetIterator

    Collector[] sidewaysCollectors = new Collector[numDims-numBits];
    long drillDownCost = 0;
    int disiUpto = 0;
    int bitsUpto = 0;
    for (int dim=0;dim<numDims;dim++) {
      DocIdSetIterator disi = dims[dim].disi;
      if (dims[dim].bits == null) {
        disis[disiUpto] = disi;
        sidewaysCollectors[disiUpto] = dims[dim].sidewaysCollector;
        disiUpto++;
        if (disi != null) {
          drillDownCost += disi.cost();
        }
      } else {
        bits[bitsUpto] = dims[dim].bits;
        bitsSidewaysCollectors[bitsUpto] = dims[dim].sidewaysCollector;
        bitsUpto++;

View Full Code Here

Examples of org.apache.lucene.search.DocIdSetIterator

    nextDoc: while (docID != NO_MORE_DOCS) {
      Collector failedCollector = null;
      for (int i=0;i<disis.length;i++) {
        // TODO: should we sort this 2nd dimension of
        // docsEnums from most frequent to least?
        DocIdSetIterator disi = disis[i];
        if (disi != null && disi.docID() < docID) {
          disi.advance(docID);
        }
        if (disi == null || disi.docID() > docID) {
          if (failedCollector != null) {
            // More than one dim fails on this document, so
            // it's neither a hit nor a near-miss; move to
            // next doc:
            docID = baseScorer.nextDoc();

View Full Code Here

Examples of org.apache.lucene.search.DocIdSetIterator


      // First dim:
      //if (DEBUG) {
      //  System.out.println("  dim0");
      //}
      DocIdSetIterator disi = disis[0];
      if (disi != null) {
        int docID = disi.docID();
        while (docID < nextChunkStart) {
          int slot = docID & MASK;


          if (docIDs[slot] != docID) {
            seen.set(slot);
            // Mark slot as valid:
            //if (DEBUG) {
            //  System.out.println("    set docID=" + docID + " id=" + context.reader().document(docID).get("id"));
            //}
            docIDs[slot] = docID;
            missingDims[slot] = 1;
            counts[slot] = 1;
          }


          docID = disi.nextDoc();
        }
      }
      
      // Second dim:
      //if (DEBUG) {
      //  System.out.println("  dim1");
      //}
      disi = disis[1];
      if (disi != null) {
        int docID = disi.docID();
        while (docID < nextChunkStart) {
          int slot = docID & MASK;


          if (docIDs[slot] != docID) {
            // Mark slot as valid:
            seen.set(slot);
            //if (DEBUG) {
            //  System.out.println("    set docID=" + docID + " missingDim=0 id=" + context.reader().document(docID).get("id"));
            //}
            docIDs[slot] = docID;
            missingDims[slot] = 0;
            counts[slot] = 1;
          } else {
            // TODO: single-valued dims will always be true
            // below; we could somehow specialize
            if (missingDims[slot] >= 1) {
              missingDims[slot] = 2;
              counts[slot] = 2;
              //if (DEBUG) {
              //  System.out.println("    set docID=" + docID + " missingDim=2 id=" + context.reader().document(docID).get("id"));
              //}
            } else {
              counts[slot] = 1;
              //if (DEBUG) {
              //  System.out.println("    set docID=" + docID + " missingDim=" + missingDims[slot] + " id=" + context.reader().document(docID).get("id"));
              //}
            }
          }


          docID = disi.nextDoc();
        }
      }


      // After this we can "upgrade" to conjunction, because
      // any doc not seen by either dim 0 or dim 1 cannot be
      // a hit or a near miss:


      //if (DEBUG) {
      //  System.out.println("  baseScorer");
      //}


      // Fold in baseScorer, using advance:
      int filledCount = 0;
      int slot0 = 0;
      while (slot0 < CHUNK && (slot0 = seen.nextSetBit(slot0)) != -1) {
        int ddDocID = docIDs[slot0];
        assert ddDocID != -1;


        int baseDocID = baseScorer.docID();
        if (baseDocID < ddDocID) {
          baseDocID = baseScorer.advance(ddDocID);
        }
        if (baseDocID == ddDocID) {
          //if (DEBUG) {
          //  System.out.println("    keep docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id"));
          //}
          scores[slot0] = baseScorer.score();
          filledSlots[filledCount++] = slot0;
          counts[slot0]++;
        } else {
          //if (DEBUG) {
          //  System.out.println("    no docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id"));
          //}
          docIDs[slot0] = -1;


          // TODO: we could jump slot0 forward to the
          // baseDocID ... but we'd need to set docIDs for
          // intervening slots to -1
        }
        slot0++;
      }
      seen.clear(0, CHUNK);


      if (filledCount == 0) {
        if (nextChunkStart >= maxDoc) {
          break;
        }
        nextChunkStart += CHUNK;
        continue;
      }
      
      // TODO: factor this out & share w/ union scorer,
      // except we start from dim=2 instead:
      for (int dim=2;dim<numDims;dim++) {
        //if (DEBUG) {
        //  System.out.println("  dim=" + dim + " [" + dims[dim].dim + "]");
        //}
        disi = disis[dim];
        if (disi != null) {
          int docID = disi.docID();
          while (docID < nextChunkStart) {
            int slot = docID & MASK;
            if (docIDs[slot] == docID && counts[slot] >= dim) {
              // TODO: single-valued dims will always be true
              // below; we could somehow specialize
              if (missingDims[slot] >= dim) {
                //if (DEBUG) {
                //  System.out.println("    set docID=" + docID + " count=" + (dim+2));
                //}
                missingDims[slot] = dim+1;
                counts[slot] = dim+2;
              } else {
                //if (DEBUG) {
                //  System.out.println("    set docID=" + docID + " missing count=" + (dim+1));
                //}
                counts[slot] = dim+1;
              }
            }


            // TODO: sometimes use advance?
            docID = disi.nextDoc();
          }
        }
      }


      // Collect:

View Full Code Here

Examples of org.apache.lucene.search.DocIdSetIterator

          }
        });
        // Asserting bit set...
        if (VERBOSE) {
          System.out.println("expected cardinality:" + expectedResult.cardinality());
          DocIdSetIterator iterator = expectedResult.iterator();
          for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
            System.out.println(String.format(Locale.ROOT, "Expected doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id")));
          }
          System.out.println("actual cardinality:" + actualResult.cardinality());
          iterator = actualResult.iterator();
          for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
            System.out.println(String.format(Locale.ROOT, "Actual doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id")));
          }
        }
        assertEquals(expectedResult, actualResult);

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.