Package org.apache.lucene.index

Examples of org.apache.lucene.index.DocsAndPositionsEnum


          case 1: posFlags = DocsAndPositionsEnum.FLAG_OFFSETS; break;
          case 2: posFlags = DocsAndPositionsEnum.FLAG_PAYLOADS; break;
          default: posFlags = DocsAndPositionsEnum.FLAG_OFFSETS | DocsAndPositionsEnum.FLAG_PAYLOADS; break;
        }
        // TODO: cast to DocsAndPositionsEnum?
        DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, posFlags);
        if (docsAndPositions != null) {
          return docsAndPositions;
        }
      }
      flags |= DocsEnum.FLAG_FREQS;
View Full Code Here


          if (terms == null) {
            continue;
          }
          TermsEnum termsEnum = terms.iterator(null);
          BytesRef text;
          DocsAndPositionsEnum docsAndPositions = null;
          List<BoboTerm> boboTermList = new ArrayList<BoboTerm>();
          while ((text = termsEnum.next()) != null) {
            BoboTerm boboTerm = new BoboTerm();
            boboTerm.term = text.utf8ToString();
            boboTerm.freq = (int) termsEnum.totalTermFreq();
            docsAndPositions = termsEnum.docsAndPositions(null, docsAndPositions);
            if (docsAndPositions != null) {
              docsAndPositions.nextDoc();
              boboTerm.positions = new ArrayList<Integer>();
              boboTerm.startOffsets = new ArrayList<Integer>();
              boboTerm.endOffsets = new ArrayList<Integer>();
              for (int t = 0; t < boboTerm.freq; ++t) {
                boboTerm.positions.add(docsAndPositions.nextPosition());
                boboTerm.startOffsets.add(docsAndPositions.startOffset());
                boboTerm.endOffsets.add(docsAndPositions.endOffset());
              }
            }
            boboTermList.add(boboTerm);
          }
          tvMap.put(field, boboTermList);
View Full Code Here

      _reader = reader;
    }

    @Override
    public void load() throws Exception {
      DocsAndPositionsEnum docPosEnum = _reader.termPositionsEnum(_sizeTerm);
      if (docPosEnum == null) {
        return;
      }
      int docID = -1;
      while ((docID = docPosEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
        if (docPosEnum.freq() > 0) {
          docPosEnum.nextPosition();
          int len = bytesToInt(docPosEnum.getPayload().bytes);
          allocate(docID, Math.min(len, _maxItems), true);
        }
      }
    }
View Full Code Here

*/
public class TermVectorMapper {

  public static List<IntPair> map(Terms terms, TermsEnum reuse, boolean acceptTermsOnly, boolean convertOffsets) throws IOException {
    TermsEnum te = terms.iterator(reuse);
    DocsAndPositionsEnum dpe = null;
    List<IntPair> res = new ArrayList<IntPair>();
    while (te.next() != null) {
      DocsAndPositionsEnum newDpe = te.docsAndPositions(null, dpe, DocsAndPositionsEnum.FLAG_OFFSETS);
      if (newDpe == null) { // no positions and no offsets - just add terms if allowed
        if (!acceptTermsOnly) {
          return null;
        }
        int freq = (int)te.totalTermFreq();
View Full Code Here

      final TermsEnum termsEnum = this.getTermsEnum(context);
      if (termsEnum == null) {
        return null;
      }

      final DocsAndPositionsEnum docsEnum = termsEnum.docsAndPositions(acceptDocs, null);
      final DocsNodesAndPositionsEnum sirenDocsEnum = NodeTermQuery.this.getDocsNodesAndPositionsEnum(docsEnum);
      return new NodeTermScorer(this, sirenDocsEnum, this.createDocScorer(context));
    }
View Full Code Here

    }
    Token tokensInOriginalOrder[] = new Token[totalTokens];
    ArrayList<Token> unsortedTokens = null;
    termsEnum = tpv.iterator(null);
    BytesRef text;
    DocsAndPositionsEnum dpEnum = null;
    while ((text = termsEnum.next()) != null) {

      dpEnum = termsEnum.docsAndPositions(null, dpEnum);
      if (dpEnum == null) {
        throw new IllegalArgumentException(
            "Required TermVector Offset information was not found");
      }
      final String term = text.utf8ToString();

      dpEnum.nextDoc();
      final int freq = dpEnum.freq();
      for(int posUpto=0;posUpto<freq;posUpto++) {
        final int pos = dpEnum.nextPosition();
        if (dpEnum.startOffset() < 0) {
          throw new IllegalArgumentException(
              "Required TermVector Offset information was not found");
        }
        final Token token = new Token(term,
                                      dpEnum.startOffset(),
                                      dpEnum.endOffset());
        if (hasPayloads) {
          // Must make a deep copy of the returned payload,
          // since D&PEnum API is allowed to re-use on every
          // call:
          token.setPayload(BytesRef.deepCopyOf(dpEnum.getPayload()));
        }

        if (tokenPositionsGuaranteedContiguous && pos != -1) {
          // We have positions stored and a guarantee that the token position
          // information is contiguous
View Full Code Here

   
  private Map<Integer,Object> highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages) throws IOException
    Map<Integer,Object> highlights = new HashMap<Integer,Object>();
   
    // reuse in the real sense... for docs in same segment we just advance our old enum
    DocsAndPositionsEnum postings[] = null;
    TermsEnum termsEnum = null;
    int lastLeaf = -1;

    PassageFormatter fieldFormatter = getFormatter(field);
    if (fieldFormatter == null) {
View Full Code Here

    }
    PriorityQueue<OffsetsEnum> pq = new PriorityQueue<OffsetsEnum>();
    float weights[] = new float[terms.length];
    // initialize postings
    for (int i = 0; i < terms.length; i++) {
      DocsAndPositionsEnum de = postings[i];
      int pDoc;
      if (de == EMPTY) {
        continue;
      } else if (de == null) {
        postings[i] = EMPTY; // initially
        if (!termsEnum.seekExact(terms[i])) {
          continue; // term not found
        }
        de = postings[i] = termsEnum.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_OFFSETS);
        if (de == null) {
          // no positions available
          throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
        }
        pDoc = de.advance(doc);
      } else {
        pDoc = de.docID();
        if (pDoc < doc) {
          pDoc = de.advance(doc);
        }
      }

      if (doc == pDoc) {
        weights[i] = scorer.weight(contentLength, de.freq());
        de.nextPosition();
        pq.add(new OffsetsEnum(de, i));
      }
    }
   
    pq.add(new OffsetsEnum(EMPTY, Integer.MAX_VALUE)); // a sentinel for termination
   
    PriorityQueue<Passage> passageQueue = new PriorityQueue<Passage>(n, new Comparator<Passage>() {
      @Override
      public int compare(Passage left, Passage right) {
        if (left.score < right.score) {
          return -1;
        } else if (left.score > right.score) {
          return 1;
        } else {
          return left.startOffset - right.startOffset;
        }
      }
    });
    Passage current = new Passage();
   
    OffsetsEnum off;
    while ((off = pq.poll()) != null) {
      final DocsAndPositionsEnum dp = off.dp;
      int start = dp.startOffset();
      if (start == -1) {
        throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
      }
      int end = dp.endOffset();
      // LUCENE-5166: this hit would span the content limit... however more valid
      // hits may exist (they are sorted by start). so we pretend like we never
      // saw this term, it won't cause a passage to be added to passageQueue or anything.
      assert EMPTY.startOffset() == Integer.MAX_VALUE;
      if (start < contentLength && end > contentLength) {
        continue;
      }
      if (start >= current.endOffset) {
        if (current.startOffset >= 0) {
          // finalize current
          current.score *= scorer.norm(current.startOffset);
          // new sentence: first add 'current' to queue
          if (passageQueue.size() == n && current.score < passageQueue.peek().score) {
            current.reset(); // can't compete, just reset it
          } else {
            passageQueue.offer(current);
            if (passageQueue.size() > n) {
              current = passageQueue.poll();
              current.reset();
            } else {
              current = new Passage();
            }
          }
        }
        // if we exceed limit, we are done
        if (start >= contentLength) {
          Passage passages[] = new Passage[passageQueue.size()];
          passageQueue.toArray(passages);
          for (Passage p : passages) {
            p.sort();
          }
          // sort in ascending order
          Arrays.sort(passages, new Comparator<Passage>() {
            @Override
            public int compare(Passage left, Passage right) {
              return left.startOffset - right.startOffset;
            }
          });
          return passages;
        }
        // advance breakiterator
        assert BreakIterator.DONE < 0;
        current.startOffset = Math.max(bi.preceding(start+1), 0);
        current.endOffset = Math.min(bi.next(), contentLength);
      }
      int tf = 0;
      while (true) {
        tf++;
        current.addMatch(start, end, terms[off.id]);
        if (off.pos == dp.freq()) {
          break; // removed from pq
        } else {
          off.pos++;
          dp.nextPosition();
          start = dp.startOffset();
          end = dp.endOffset();
        }
        if (start >= current.endOffset || end > contentLength) {
          pq.offer(off);
          break;
        }
View Full Code Here

      final TermsEnum termsEnum = fieldTerms.iterator(null);

      for (int pos=0; pos<postingsFreqs.length; pos++) {
        Term[] terms = termArrays.get(pos);

        final DocsAndPositionsEnum postingsEnum;
        int docFreq;

        if (terms.length > 1) {
          postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum);
View Full Code Here

      if (termState == null) {
        // Term doesn't exist in reader
        continue;
      }
      termsEnum.seekExact(term.bytes(), termState);
      DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE);
      if (postings == null) {
        // term does exist, but has no positions
        throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
      }
      cost += postings.cost();
      docsEnums.add(postings);
    }

    _queue = new DocsQueue(docsEnums);
    _posList = new IntQueue();
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.DocsAndPositionsEnum

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.