Package org.apache.lucene.util

Examples of org.apache.lucene.util.BytesRef$UTF8SortedAsUnicodeComparator


        Terms terms = ctx.reader().terms(Consts.FULL);
        if (terms != null) { // cannot really happen, but be on the safe side
          termsEnum = terms.iterator(termsEnum);
          while (termsEnum.next() != null) {
            if (!cache.isFull()) {
              BytesRef t = termsEnum.term();
              // Since we guarantee uniqueness of categories, each term has exactly
              // one document. Also, since we do not allow removing categories (and
              // hence documents), there are no deletions in the index. Therefore, it
              // is sufficient to call next(), and then doc(), exactly once with no
              // 'validation' checks.
              CategoryPath cp = new CategoryPath(t.utf8ToString(), delimiter);
              docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
              boolean res = cache.put(cp, docsEnum.nextDoc() + ctx.docBase);
              assert !res : "entries should not have been evicted from the cache";
            } else {
              // the cache is full and the next put() will evict entries from it, therefore abort the iteration.
View Full Code Here


    int ret=0;
    DocsEnum tdocs = null;
    final Random random = new Random(random().nextLong());
    for (int i=0; i<iter; i++) {
      tenum.seekCeil(new BytesRef("val"));
      tdocs = _TestUtil.docs(random, tenum, MultiFields.getLiveDocs(reader), tdocs, DocsEnum.FLAG_NONE);
      while (tdocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        ret += tdocs.docID();
      }
    }
View Full Code Here

    final int numDocs = atLeast(100);
    for(int i=0;i<numDocs;i++) {
      Document d = new Document();
      long number = random().nextLong();
      d.add(new NumericDocValuesField("number", number));
      BytesRef bytes = new BytesRef(_TestUtil.randomRealisticUnicodeString(random()));
      d.add(new BinaryDocValuesField("bytes", bytes));
      binary.add(bytes);
      bytes = new BytesRef(_TestUtil.randomRealisticUnicodeString(random()));
      d.add(new SortedDocValuesField("sorted", bytes));
      sorted.add(bytes);
      w.addDocument(d);
      numbers.add(number);
    }

    w.forceMerge(1);
    final IndexReader r = w.getReader();
    w.close();

    assertEquals(1, r.leaves().size());
    final AtomicReader ar = r.leaves().get(0).reader();

    int numThreads = _TestUtil.nextInt(random(), 2, 5);
    List<Thread> threads = new ArrayList<Thread>();
    final CountDownLatch startingGun = new CountDownLatch(1);
    for(int t=0;t<numThreads;t++) {
      final Random threadRandom = new Random(random().nextLong());
      Thread thread = new Thread() {
          @Override
          public void run() {
            try {
              //NumericDocValues ndv = ar.getNumericDocValues("number");
              FieldCache.Longs ndv = FieldCache.DEFAULT.getLongs(ar, "number", false);
              //BinaryDocValues bdv = ar.getBinaryDocValues("bytes");
              BinaryDocValues bdv = FieldCache.DEFAULT.getTerms(ar, "bytes", false);
              SortedDocValues sdv = FieldCache.DEFAULT.getTermsIndex(ar, "sorted");
              startingGun.await();
              int iters = atLeast(1000);
              BytesRef scratch = new BytesRef();
              BytesRef scratch2 = new BytesRef();
              for(int iter=0;iter<iters;iter++) {
                int docID = threadRandom.nextInt(numDocs);
                switch(threadRandom.nextInt(6)) {
                case 0:
                  assertEquals((byte) numbers.get(docID).longValue(), FieldCache.DEFAULT.getBytes(ar, "number", false).get(docID));
View Full Code Here

      if (random.nextBoolean()) {
        s = _TestUtil.randomSimpleString(random);
      } else {
        s = _TestUtil.randomUnicodeString(random);
      }
      final BytesRef br = new BytesRef(s);

      if (!allowDups) {
        if (seen.contains(s)) {
          continue;
        }
        seen.add(s);
      }

      if (VERBOSE) {
        System.out.println("  " + numDocs + ": s=" + s);
      }
     
      final Document doc = new Document();
      doc.add(new SortedDocValuesField("stringdv", br));
      doc.add(new NumericDocValuesField("id", numDocs));
      docValues.add(br);
      writer.addDocument(doc);
      numDocs++;

      if (random.nextInt(40) == 17) {
        // force flush
        writer.getReader().close();
      }
    }

    writer.forceMerge(1);
    final DirectoryReader r = writer.getReader();
    writer.close();
   
    final AtomicReader sr = getOnlySegmentReader(r);

    final long END_TIME = System.currentTimeMillis() + (TEST_NIGHTLY ? 30 : 1);

    final int NUM_THREADS = _TestUtil.nextInt(random(), 1, 10);
    Thread[] threads = new Thread[NUM_THREADS];
    for(int thread=0;thread<NUM_THREADS;thread++) {
      threads[thread] = new Thread() {
          @Override
          public void run() {
            Random random = random();           
            final SortedDocValues stringDVDirect;
            final NumericDocValues docIDToID;
            try {
              stringDVDirect = sr.getSortedDocValues("stringdv");
              docIDToID = sr.getNumericDocValues("id");
              assertNotNull(stringDVDirect);
            } catch (IOException ioe) {
              throw new RuntimeException(ioe);
            }
            while(System.currentTimeMillis() < END_TIME) {
              final SortedDocValues source;
              source = stringDVDirect;
              final BytesRef scratch = new BytesRef();

              for(int iter=0;iter<100;iter++) {
                final int docID = random.nextInt(sr.maxDoc());
                source.get(docID, scratch);
                assertEquals(docValues.get((int) docIDToID.get(docID)), scratch);
View Full Code Here

      Document document = lineFileDocs.nextDoc();
      // grab the title and add some SortedSet instances for fun
      String title = document.get("titleTokenized");
      String split[] = title.split("\\s+");
      for (String trash : split) {
        document.add(new SortedSetDocValuesField("sortedset", new BytesRef(trash)));
      }
      // add a numeric dv field sometimes
      document.removeFields("sparsenumeric");
      if (random.nextInt(4) == 2) {
        document.add(new NumericDocValuesField("sparsenumeric", random.nextInt()));
View Full Code Here

          positions[j] = new PositionData[termFreq];
          int position = 0;
          for(int k=0;k<termFreq;k++) {
            position += _TestUtil.nextInt(random(), 1, 10);

            final BytesRef payload;
            if (storePayloads && random().nextInt(4) == 0) {
              final byte[] bytes = new byte[1+random().nextInt(5)];
              for(int l=0;l<bytes.length;l++) {
                bytes[l] = (byte) random().nextInt(255);
              }
              payload = new BytesRef(bytes);
            } else {
              payload = null;
            }

            positions[j][k] = new PositionData(position, payload);
View Full Code Here

    final TermsEnum termsEnum = terms2.iterator(null);

    DocsEnum docsEnum = null;
    for(int i=0;i<NUM_TERMS;i++) {
      final BytesRef term = termsEnum.next();
      assertNotNull(term);
      assertEquals(terms[i].text2, term.utf8ToString());

      // do this twice to stress test the codec's reuse, ie,
      // make sure it properly fully resets (rewinds) its
      // internal state:
      for(int iter=0;iter<2;iter++) {
        docsEnum = _TestUtil.docs(random(), termsEnum, null,  docsEnum, DocsEnum.FLAG_NONE);
        assertEquals(terms[i].docs[0], docsEnum.nextDoc());
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc());
      }
    }
    assertNull(termsEnum.next());

    for(int i=0;i<NUM_TERMS;i++) {
      assertEquals(termsEnum.seekCeil(new BytesRef(terms[i].text2)), TermsEnum.SeekStatus.FOUND);
    }

    assertFalse(fieldsEnum.hasNext());
    reader.close();
    dir.close();
View Full Code Here

      doc.add(new StringField("f", "doc", Store.NO));
      writer.addDocument(doc);
    }
    writer.close();
   
    Term term = new Term("f", new BytesRef("doc"));
    DirectoryReader reader = DirectoryReader.open(dir);
    for (AtomicReaderContext ctx : reader.leaves()) {
      DocsEnum de = ctx.reader().termDocsEnum(term);
      while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        assertEquals("wrong freq for doc " + de.docID(), 1, de.freq());
View Full Code Here

    int[] docs;
    PositionData[][] positions;
    FieldData field;

    public TermData(final String text, final int[] docs, final PositionData[][] positions) {
      this.text = new BytesRef(text);
      this.text2 = text;
      this.docs = docs;
      this.positions = positions;
    }
View Full Code Here

        assertEquals(positions[i].pos, pos);
        if (positions[i].payload != null) {
          assertNotNull(posEnum.getPayload());
          if (random().nextInt(3) < 2) {
            // Verify the payload bytes
            final BytesRef otherPayload = posEnum.getPayload();
            assertTrue("expected=" + positions[i].payload.toString() + " got=" + otherPayload.toString(), positions[i].payload.equals(otherPayload));
          }
        } else {
          assertNull(posEnum.getPayload());
        }
      }
View Full Code Here

TOP

Related Classes of org.apache.lucene.util.BytesRef$UTF8SortedAsUnicodeComparator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.