Package org.apache.lucene.index

Examples of org.apache.lucene.index.TermsEnum


      if (state == null) { // term is not present in that reader
        assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term;
        return null;
      }
      //System.out.println("LD=" + reader.getLiveDocs() + " set?=" + (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null"));
      final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null);
      termsEnum.seekExact(term.bytes(), state);
      return termsEnum;
    }
View Full Code Here


   
    LinkedList<Term> termsWithPrefix = new LinkedList<Term>();
   
    // this TermEnum gives "piccadilly", "pie" and "pizza".
    String prefix = "pi";
    TermsEnum te = MultiFields.getFields(reader).terms("body").iterator(null);
    te.seekCeil(new BytesRef(prefix));
    do {
      String s = te.term().utf8ToString();
      if (s.startsWith(prefix)) {
        termsWithPrefix.add(new Term("body", s));
      } else {
        break;
      }
    } while (te.next() != null);
   
    query1.add(termsWithPrefix.toArray(new Term[0]));
    assertEquals("body:\"blueberry (piccadilly pie pizza)\"", query1.toString());
    query2.add(termsWithPrefix.toArray(new Term[0]));
    assertEquals("body:\"strawberry (piccadilly pie pizza)\"", query2
        .toString());
   
    ScoreDoc[] result;
    result = searcher.search(query1, null, 1000).scoreDocs;
    assertEquals(2, result.length);
    result = searcher.search(query2, null, 1000).scoreDocs;
    assertEquals(0, result.length);
   
    // search for "blue* pizza":
    MultiPhraseQuery query3 = new MultiPhraseQuery();
    termsWithPrefix.clear();
    prefix = "blue";
    te.seekCeil(new BytesRef(prefix));
   
    do {
      if (te.term().utf8ToString().startsWith(prefix)) {
        termsWithPrefix.add(new Term("body", te.term().utf8ToString()));
      }
    } while (te.next() != null);
   
    query3.add(termsWithPrefix.toArray(new Term[0]));
    query3.add(new Term("body", "pizza"));
   
    result = searcher.search(query3, null, 1000).scoreDocs;
View Full Code Here

      assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
    }

    int nTerms = termsIndex.getValueCount();

    TermsEnum tenum = termsIndex.termsEnum();
    BytesRef val = new BytesRef();
    for (int i=0; i<nTerms; i++) {
      BytesRef val1 = tenum.next();
      termsIndex.lookupOrd(i, val);
      // System.out.println("i="+i);
      assertEquals(val, val1);
    }

    // seek the enum around (note this isn't a great test here)
    int num = atLeast(100);
    for (int i = 0; i < num; i++) {
      int k = random().nextInt(nTerms);
      termsIndex.lookupOrd(k, val);
      assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
      assertEquals(val, tenum.term());
    }

    for(int i=0;i<nTerms;i++) {
      termsIndex.lookupOrd(i, val);
      assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
      assertEquals(val, tenum.term());
    }

    // test bad field
    termsIndex = cache.getTermsIndex(reader, "bogusfield");
View Full Code Here

    List<Term> sample = new ArrayList<Term>();
    Fields fields = MultiFields.getFields(reader);
    for (String field : fields) {
      Terms terms = fields.terms(field);
      assertNotNull(terms);
      TermsEnum termsEnum = terms.iterator(null);
      while (termsEnum.next() != null) {
        if (sample.size() >= size) {
          int pos = random.nextInt(size);
          sample.set(pos, new Term(field, termsEnum.term()));
        } else {
          sample.add(new Term(field, termsEnum.term()));
        }
      }
    }
    Collections.shuffle(sample);
    return sample;
View Full Code Here

 
  private int countTerms(MultiTermQuery q) throws Exception {
    final Terms terms = MultiFields.getTerms(reader, q.getField());
    if (terms == null)
      return 0;
    final TermsEnum termEnum = q.getTermsEnum(terms);
    assertNotNull(termEnum);
    int count = 0;
    BytesRef cur, last = null;
    while ((cur = termEnum.next()) != null) {
      count++;
      if (last != null) {
        assertTrue(last.compareTo(cur) < 0);
      }
      last = BytesRef.deepCopyOf(cur);
View Full Code Here

          sort = prevSearchState.sort;
        } else {
          if (terms == null && docCount > minDocsToMakeTerms) {
            // TODO: try to "focus" on high freq terms sometimes too
            // TODO: maybe also periodically reset the terms...?
            final TermsEnum termsEnum = MultiFields.getTerms(mockReader, "body").iterator(null);
            terms = new ArrayList<BytesRef>();
            while(termsEnum.next() != null) {
              terms.add(BytesRef.deepCopyOf(termsEnum.term()));
            }
            if (VERBOSE) {
              System.out.println("TEST: init terms: " + terms.size() + " terms");
            }
            if (terms.size() == 0) {
View Full Code Here

    DirectoryReader open = DirectoryReader.open(dir);
    for (AtomicReaderContext ctx : open.leaves()) {
      AtomicReader indexReader = ctx.reader();
      Terms terms = indexReader.terms("body");
      TermsEnum iterator = terms.iterator(null);
      IdentityHashMap<DocsEnum, Boolean> enums = new IdentityHashMap<DocsEnum, Boolean>();
      MatchNoBits bits = new Bits.MatchNoBits(indexReader.maxDoc());
      while ((iterator.next()) != null) {
        DocsEnum docs = iterator.docs(random().nextBoolean() ? bits : new Bits.MatchNoBits(indexReader.maxDoc()), null, random().nextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
        enums.put(docs, true);
      }
     
      assertEquals(terms.size(), enums.size());
    }
View Full Code Here

    writer.commit();

    DirectoryReader open = DirectoryReader.open(dir);
    for (AtomicReaderContext ctx : open.leaves()) {
      Terms terms = ctx.reader().terms("body");
      TermsEnum iterator = terms.iterator(null);
      IdentityHashMap<DocsEnum, Boolean> enums = new IdentityHashMap<DocsEnum, Boolean>();
      MatchNoBits bits = new Bits.MatchNoBits(open.maxDoc());
      DocsEnum docs = null;
      while ((iterator.next()) != null) {
        docs = iterator.docs(bits, docs, random().nextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
        enums.put(docs, true);
      }
     
      assertEquals(1, enums.size());
      enums.clear();
      iterator = terms.iterator(null);
      docs = null;
      while ((iterator.next()) != null) {
        docs = iterator.docs(new Bits.MatchNoBits(open.maxDoc()), docs, random().nextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
        enums.put(docs, true);
      }
      assertEquals(terms.size(), enums.size());
     
      enums.clear();
      iterator = terms.iterator(null);
      docs = null;
      while ((iterator.next()) != null) {
        docs = iterator.docs(null, docs, random().nextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
        enums.put(docs, true);
      }
      assertEquals(1, enums.size())
    }
    IOUtils.close(writer, open, dir);
View Full Code Here

    List<AtomicReaderContext> leaves = firstReader.leaves();
    List<AtomicReaderContext> leaves2 = secondReader.leaves();
   
    for (AtomicReaderContext ctx : leaves) {
      Terms terms = ctx.reader().terms("body");
      TermsEnum iterator = terms.iterator(null);
      IdentityHashMap<DocsEnum, Boolean> enums = new IdentityHashMap<DocsEnum, Boolean>();
      MatchNoBits bits = new Bits.MatchNoBits(firstReader.maxDoc());
      iterator = terms.iterator(null);
      DocsEnum docs = null;
      BytesRef term = null;
      while ((term = iterator.next()) != null) {
        docs = iterator.docs(null, randomDocsEnum("body", term, leaves2, bits), random().nextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
        enums.put(docs, true);
      }
      assertEquals(terms.size(), enums.size());
     
      iterator = terms.iterator(null);
      enums.clear();
      docs = null;
      while ((term = iterator.next()) != null) {
        docs = iterator.docs(bits, randomDocsEnum("body", term, leaves2, bits), random().nextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
        enums.put(docs, true);
      }
      assertEquals(terms.size(), enums.size());
    }
    IOUtils.close(writer, firstReader, secondReader, dir);
View Full Code Here

    AtomicReader indexReader = readers.get(random().nextInt(readers.size())).reader();
    Terms terms = indexReader.terms(field);
    if (terms == null) {
      return null;
    }
    TermsEnum iterator = terms.iterator(null);
    if (iterator.seekExact(term)) {
      return iterator.docs(bits, null, random().nextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
    }
    return null;
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.TermsEnum

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.