Package org.apache.lucene.index

Examples of org.apache.lucene.index.TermsEnum


  private Map<Integer,Object> highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages) throws IOException
    Map<Integer,Object> highlights = new HashMap<Integer,Object>();
   
    // reuse in the real sense... for docs in same segment we just advance our old enum
    DocsAndPositionsEnum postings[] = null;
    TermsEnum termsEnum = null;
    int lastLeaf = -1;

    PassageFormatter fieldFormatter = getFormatter(field);
    if (fieldFormatter == null) {
      throw new NullPointerException("PassageFormatter cannot be null");
View Full Code Here


      }
    }
    Terms terms = MultiFields.getTerms(r, "body");
    if (terms != null) {
      final IntsRef scratchIntsRef = new IntsRef();
      final TermsEnum termsEnum = terms.iterator(null);
      if (VERBOSE) {
        System.out.println("TEST: got termsEnum=" + termsEnum);
      }
      BytesRef term;
      int ord = 0;

      Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton();   
      final TermsEnum termsEnum2 = terms.intersect(new CompiledAutomaton(automaton, false, false), null);

      while((term = termsEnum.next()) != null) {
        BytesRef term2 = termsEnum2.next();
        assertNotNull(term2);
        assertEquals(term, term2);
        assertEquals(termsEnum.docFreq(), termsEnum2.docFreq());
        assertEquals(termsEnum.totalTermFreq(), termsEnum2.totalTermFreq());

        if (ord == 0) {
          try {
            termsEnum.ord();
          } catch (UnsupportedOperationException uoe) {
View Full Code Here

        }
        assertEquals((exists ? "" : "non-exist ") + "id=" + id, exists ? 1 : 0, s.search(new TermQuery(new Term("id", id)), 1).totalHits);
      }

      // Verify w/ MultiTermsEnum
      final TermsEnum termsEnum = MultiFields.getTerms(r, "id").iterator(null);
      for(int iter=0;iter<2*NUM_IDS;iter++) {
        final String id;
        final String nextID;
        final boolean exists;

        if (random().nextBoolean()) {
          id = allIDsList.get(random().nextInt(allIDsList.size()));
          exists = !outOfBounds.contains(id);
          nextID = null;
          if (VERBOSE) {
            System.out.println("TEST: exactOnly " + (exists ? "" : "non-exist ") + "id=" + id);
          }
        } else {
          // Pick ID between two IDs:
          exists = false;
          final int idv = random().nextInt(NUM_IDS-1);
          if (cycle == 0) {
            id = String.format(Locale.ROOT, "%07da", idv);
            nextID = String.format(Locale.ROOT, "%07d", idv+1);
          } else {
            id = sortedAllIDsList.get(idv) + "a";
            nextID = sortedAllIDsList.get(idv+1);
          }
          if (VERBOSE) {
            System.out.println("TEST: not exactOnly id=" + id + " nextID=" + nextID);
          }
        }

        final TermsEnum.SeekStatus status;
        if (nextID == null) {
          if (termsEnum.seekExact(new BytesRef(id))) {
            status = TermsEnum.SeekStatus.FOUND;
          } else {
            status = TermsEnum.SeekStatus.NOT_FOUND;
          }
        } else {
          status = termsEnum.seekCeil(new BytesRef(id));
        }

        if (nextID != null) {
          assertEquals(TermsEnum.SeekStatus.NOT_FOUND, status);
          assertEquals("expected=" + nextID + " actual=" + termsEnum.term().utf8ToString(), new BytesRef(nextID), termsEnum.term());
        } else if (!exists) {
          assertTrue(status == TermsEnum.SeekStatus.NOT_FOUND ||
                     status == TermsEnum.SeekStatus.END);
        } else {
          assertEquals(TermsEnum.SeekStatus.FOUND, status);
View Full Code Here

    }
  }
 
  protected void maxEditDistanceChanged(BytesRef lastTerm, int maxEdits, boolean init)
      throws IOException {
    TermsEnum newEnum = getAutomatonEnum(maxEdits, lastTerm);
    // instead of assert, we do a hard check in case someone uses our enum directly
    // assert newEnum != null;
    if (newEnum == null) {
      assert maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE;
      throw new IllegalArgumentException("maxEdits cannot be > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE");
View Full Code Here

 
  private int countTerms(MultiTermQuery q) throws Exception {
    final Terms terms = MultiFields.getTerms(reader, q.getField());
    if (terms == null)
      return 0;
    final TermsEnum termEnum = q.getTermsEnum(terms);
    assertNotNull(termEnum);
    int count = 0;
    BytesRef cur, last = null;
    while ((cur = termEnum.next()) != null) {
      count++;
      if (last != null) {
        assertTrue(last.compareTo(cur) < 0);
      }
      last = BytesRef.deepCopyOf(cur);
View Full Code Here

   
    LinkedList<Term> termsWithPrefix = new LinkedList<Term>();
   
    // this TermEnum gives "piccadilly", "pie" and "pizza".
    String prefix = "pi";
    TermsEnum te = MultiFields.getFields(reader).terms("body").iterator(null);
    te.seekCeil(new BytesRef(prefix));
    do {
      String s = te.term().utf8ToString();
      if (s.startsWith(prefix)) {
        termsWithPrefix.add(new Term("body", s));
      } else {
        break;
      }
    } while (te.next() != null);
   
    query1.add(termsWithPrefix.toArray(new Term[0]));
    query2.add(termsWithPrefix.toArray(new Term[0]));
   
    ScoreDoc[] result;
View Full Code Here

  public void mergeSortedField(FieldInfo fieldInfo, final MergeState mergeState, List<SortedDocValues> toMerge) throws IOException {
    final AtomicReader readers[] = mergeState.readers.toArray(new AtomicReader[toMerge.size()]);
    final SortedDocValues dvs[] = toMerge.toArray(new SortedDocValues[toMerge.size()]);
   
    // step 1: iterate thru each sub and mark terms still in use
    TermsEnum liveTerms[] = new TermsEnum[dvs.length];
    for (int sub = 0; sub < liveTerms.length; sub++) {
      AtomicReader reader = readers[sub];
      SortedDocValues dv = dvs[sub];
      Bits liveDocs = reader.getLiveDocs();
      if (liveDocs == null) {
View Full Code Here

  public void mergeSortedSetField(FieldInfo fieldInfo, final MergeState mergeState, List<SortedSetDocValues> toMerge) throws IOException {
    final AtomicReader readers[] = mergeState.readers.toArray(new AtomicReader[toMerge.size()]);
    final SortedSetDocValues dvs[] = toMerge.toArray(new SortedSetDocValues[toMerge.size()]);
   
    // step 1: iterate thru each sub and mark terms still in use
    TermsEnum liveTerms[] = new TermsEnum[dvs.length];
    for (int sub = 0; sub < liveTerms.length; sub++) {
      AtomicReader reader = readers[sub];
      SortedSetDocValues dv = dvs[sub];
      Bits liveDocs = reader.getLiveDocs();
      if (liveDocs == null) {
View Full Code Here

      if (fieldTerms == null) {
        return null;
      }

      // Reuse single TermsEnum below:
      final TermsEnum termsEnum = fieldTerms.iterator(null);

      for (int pos=0; pos<postingsFreqs.length; pos++) {
        Term[] terms = termArrays.get(pos);

        final DocsAndPositionsEnum postingsEnum;
        int docFreq;

        if (terms.length > 1) {
          postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum);

          // coarse -- this overcounts since a given doc can
          // have more than one term:
          docFreq = 0;
          for(int termIdx=0;termIdx<terms.length;termIdx++) {
            final Term term = terms[termIdx];
            TermState termState = termContexts.get(term).get(context.ord);
            if (termState == null) {
              // Term not in reader
              continue;
            }
            termsEnum.seekExact(term.bytes(), termState);
            docFreq += termsEnum.docFreq();
          }

          if (docFreq == 0) {
            // None of the terms are in this reader
            return null;
          }
        } else {
          final Term term = terms[0];
          TermState termState = termContexts.get(term).get(context.ord);
          if (termState == null) {
            // Term not in reader
            return null;
          }
          termsEnum.seekExact(term.bytes(), termState);
          postingsEnum = termsEnum.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE);

          if (postingsEnum == null) {
            // term does exist, but has no positions
            assert termsEnum.docs(liveDocs, null, DocsEnum.FLAG_NONE) != null: "termstate found but no term exists in reader";
            throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
          }

          docFreq = termsEnum.docFreq();
        }

        postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms);
      }
View Full Code Here

    @Override
    public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
        boolean topScorer, Bits acceptDocs) throws IOException {
      assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
      final TermsEnum termsEnum = getTermsEnum(context);
      if (termsEnum == null) {
        return null;
      }
      DocsEnum docs = termsEnum.docs(acceptDocs, null);
      assert docs != null;
      return new TermScorer(this, docs, similarity.simScorer(stats, context));
    }
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.TermsEnum

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.