Package org.apache.lucene.index

Examples of org.apache.lucene.index.DocsEnum


      this.cost = cost;
    }

    protected void fillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs, TermsEnum termsEnum) throws IOException {
      BytesRef spare = new BytesRef();
      DocsEnum docsEnum = null;
      for (int i = 0; i < terms.size(); i++) {
        if (termsEnum.seekExact(terms.get(ords[i], spare))) {
          docsEnum = termsEnum.docs(acceptDocs, docsEnum, DocsEnum.FLAG_NONE);
          float score = TermsIncludingScoreQuery.this.scores[ords[i]];
          for (int doc = docsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.nextDoc()) {
            matchingDocs.set(doc);
            // In the case the same doc is also related to a another doc, a score might be overwritten. I think this
            // can only happen in a many-to-many relation
            scores[doc] = score;
          }
View Full Code Here


    }

    @Override
    protected void fillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs, TermsEnum termsEnum) throws IOException {
      BytesRef spare = new BytesRef();
      DocsEnum docsEnum = null;
      for (int i = 0; i < terms.size(); i++) {
        if (termsEnum.seekExact(terms.get(ords[i], spare))) {
          docsEnum = termsEnum.docs(acceptDocs, docsEnum, DocsEnum.FLAG_NONE);
          float score = TermsIncludingScoreQuery.this.scores[ords[i]];
          for (int doc = docsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.nextDoc()) {
            // I prefer this:
            /*if (scores[doc] < score) {
              scores[doc] = score;
              matchingDocs.set(doc);
            }*/
 
View Full Code Here

      if (terms == null) {
        continue;
      }
      Bits liveDocs = MultiFields.getLiveDocs(indexReader);
      TermsEnum te = terms.iterator(null);
      DocsEnum de = null;
      while (te.next() != null) {
        de = _TestUtil.docs(random(), te, liveDocs, de, DocsEnum.FLAG_NONE);
        int cnt = 0;
        while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
          cnt++;
        }
        res.put(new CategoryPath(te.term().utf8ToString().split(delim)), cnt);
      }
    }
View Full Code Here

      if (multipleValuesPerDocument) {
        if (scoreDocsInOrder) {
          AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.wrap(toSearcher.getIndexReader());
          Terms terms = slowCompositeReader.terms(toField);
          if (terms != null) {
            DocsEnum docsEnum = null;
            TermsEnum termsEnum = null;
            SortedSet<BytesRef> joinValues = new TreeSet<BytesRef>(BytesRef.getUTF8SortedAsUnicodeComparator());
            joinValues.addAll(joinValueToJoinScores.keySet());
            for (BytesRef joinValue : joinValues) {
              termsEnum = terms.iterator(termsEnum);
              if (termsEnum.seekExact(joinValue)) {
                docsEnum = termsEnum.docs(slowCompositeReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
                JoinScore joinScore = joinValueToJoinScores.get(joinValue);

                for (int doc = docsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.nextDoc()) {
                  // First encountered join value determines the score.
                  // Something to keep in mind for many-to-many relations.
                  if (!docToJoinScore.containsKey(doc)) {
                    docToJoinScore.put(doc, joinScore);
                  }
View Full Code Here

        if (otherMatchingDocs == null) {
          continue;
        }

        for (RandomDoc otherSideDoc : otherMatchingDocs) {
          DocsEnum docsEnum = MultiFields.getTermDocsEnum(topLevelReader, MultiFields.getLiveDocs(topLevelReader), "id", new BytesRef(otherSideDoc.id), 0);
          assert docsEnum != null;
          int doc = docsEnum.nextDoc();
          expectedResult.set(doc);
        }
      }
    }
    return expectedResult;
View Full Code Here

    doc.add(new TextField("partnum", "Q37", Field.Store.YES));
    writer.addDocument(doc);
    writer.close();

    IndexReader reader = DirectoryReader.open(dir);
    DocsEnum td = _TestUtil.docs(random(),
                                 reader,
                                 "partnum",
                                 new BytesRef("Q36"),
                                 MultiFields.getLiveDocs(reader),
                                 null,
                                 0);
    assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
    td = _TestUtil.docs(random(),
                        reader,
                        "partnum",
                        new BytesRef("Q37"),
                        MultiFields.getLiveDocs(reader),
                        null,
                        0);
    assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  }
View Full Code Here

  @Test
  public void testDocsEnum() throws Exception {
    Bits mappedLiveDocs = randomLiveDocs(reader.maxDoc());
    TermsEnum termsEnum = reader.terms(DOCS_ENUM_FIELD).iterator(null);
    assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef(DOCS_ENUM_TERM)));
    DocsEnum docs = termsEnum.docs(mappedLiveDocs, null);

    int doc;
    int prev = -1;
    while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
      assertTrue("document " + doc + " marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(doc));
      assertEquals("incorrect value; doc " + doc, sortedValues[doc].intValue(), Integer.parseInt(reader.document(doc).get(ID_FIELD)));
      while (++prev < doc) {
        assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev));
      }
    }
    while (++prev < reader.maxDoc()) {
      assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.get(prev));
    }

    DocsEnum reuse = docs;
    docs = termsEnum.docs(mappedLiveDocs, reuse);
    if (docs instanceof SortingDocsEnum) {
      assertTrue(((SortingDocsEnum) docs).reused(reuse)); // make sure reuse worked
    }
    doc = -1;
View Full Code Here

        setOther(postings, reuse); // postings.other = reuse
      }
      return postings.reset(liveDocs, termState);
    } else {
      if (reuse instanceof PulsingDocsEnum) {
        DocsEnum wrapped = wrappedPostingsReader.docs(field, termState.wrappedTermState, liveDocs, getOther(reuse), flags);
        setOther(wrapped, reuse); // wrapped.other = reuse
        return wrapped;
      } else {
        return wrappedPostingsReader.docs(field, termState.wrappedTermState, liveDocs, reuse, flags);
      }
View Full Code Here

      hasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) > 0;
      hasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) > 0;
      hasPayloads = fieldInfo.hasPayloads();

      BytesRef term;
      DocsEnum docsEnum = null;
      DocsAndPositionsEnum docsAndPositionsEnum = null;
      final TermsEnum termsEnum = termsIn.iterator(null);
      int termOffset = 0;

      final IntArrayWriter scratch = new IntArrayWriter();

      // Used for payloads, if any:
      final RAMOutputStream ros = new RAMOutputStream();

      // if (DEBUG) {
      //   System.out.println("\nLOAD terms seg=" + state.segmentInfo.name + " field=" + field + " hasOffsets=" + hasOffsets + " hasFreq=" + hasFreq + " hasPos=" + hasPos + " hasPayloads=" + hasPayloads);
      // }

      while ((term = termsEnum.next()) != null) {
        final int docFreq = termsEnum.docFreq();
        final long totalTermFreq = termsEnum.totalTermFreq();

        // if (DEBUG) {
        //   System.out.println("  term=" + term.utf8ToString());
        // }

        termOffsets[count] = termOffset;

        if (termBytes.length < (termOffset + term.length)) {
          termBytes = ArrayUtil.grow(termBytes, termOffset + term.length);
        }
        System.arraycopy(term.bytes, term.offset, termBytes, termOffset, term.length);
        termOffset += term.length;
        termOffsets[count+1] = termOffset;

        if (hasPos) {
          docsAndPositionsEnum = termsEnum.docsAndPositions(null, docsAndPositionsEnum);
        } else {
          docsEnum = termsEnum.docs(null, docsEnum);
        }

        final TermAndSkip ent;

        final DocsEnum docsEnum2;
        if (hasPos) {
          docsEnum2 = docsAndPositionsEnum;
        } else {
          docsEnum2 = docsEnum;
        }

        int docID;

        if (docFreq <= lowFreqCutoff) {

          ros.reset();

          // Pack postings for low-freq terms into a single int[]:
          while ((docID = docsEnum2.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
            scratch.add(docID);
            if (hasFreq) {
              final int freq = docsEnum2.freq();
              scratch.add(freq);
              if (hasPos) {
                for(int pos=0;pos<freq;pos++) {
                  scratch.add(docsAndPositionsEnum.nextPosition());
                  if (hasOffsets) {
                    scratch.add(docsAndPositionsEnum.startOffset());
                    scratch.add(docsAndPositionsEnum.endOffset());
                  }
                  if (hasPayloads) {
                    final BytesRef payload = docsAndPositionsEnum.getPayload();
                    if (payload != null) {
                      scratch.add(payload.length);
                      ros.writeBytes(payload.bytes, payload.offset, payload.length);
                    } else {
                      scratch.add(0);
                    }
                  }
                }
              }
            }
          }

          final byte[] payloads;
          if (hasPayloads) {
            ros.flush();
            payloads = new byte[(int) ros.length()];
            ros.writeTo(payloads, 0);
          } else {
            payloads = null;
          }

          final int[] postings = scratch.get();
       
          ent = new LowFreqTerm(postings, payloads, docFreq, (int) totalTermFreq);
        } else {
          final int[] docs = new int[docFreq];
          final int[] freqs;
          final int[][] positions;
          final byte[][][] payloads;
          if (hasFreq) {
            freqs = new int[docFreq];
            if (hasPos) {
              positions = new int[docFreq][];
              if (hasPayloads) {
                payloads = new byte[docFreq][][];
              } else {
                payloads = null;
              }
            } else {
              positions = null;
              payloads = null;
            }
          } else {
            freqs = null;
            positions = null;
            payloads = null;
          }

          // Use separate int[] for the postings for high-freq
          // terms:
          int upto = 0;
          while ((docID = docsEnum2.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
            docs[upto] = docID;
            if (hasFreq) {
              final int freq = docsEnum2.freq();
              freqs[upto] = freq;
              if (hasPos) {
                final int mult;
                if (hasOffsets) {
                  mult = 3;
View Full Code Here

   
    IndexReader ir = iw.getReader();
    iw.close();

    TermsEnum te = MultiFields.getTerms(ir, "field").iterator(null);
    DocsEnum de = null;
   
    for (int i = 0; i < 10050; i++) {
      String expected = df.format(i);
      assertEquals(expected, te.next().utf8ToString());
      de = _TestUtil.docs(random(), te, null, de, DocsEnum.FLAG_NONE);
      assertTrue(de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
      assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc());
    }
    ir.close();

    _TestUtil.checkIndex(dir);
    dir.close();
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.DocsEnum

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.