Package org.apache.lucene.util

Examples of org.apache.lucene.util.BytesRef$UTF8SortedAsUTF16Comparator


        }

        int upto = 0;
        // Test straight enum of the terms:
        while(true) {
          final BytesRef term = termsEnum.next();
          if (term == null) {
            break;
          }
          final BytesRef expected = new BytesRef(field.terms[upto++].text2);
          assertTrue("expected=" + expected + " vs actual " + term, expected.bytesEquals(term));
        }
        assertEquals(upto, field.terms.length);

        // Test random seek:
        TermData term = field.terms[random().nextInt(field.terms.length)];
        TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(term.text2));
        assertEquals(status, TermsEnum.SeekStatus.FOUND);
        assertEquals(term.docs.length, termsEnum.docFreq());
        if (field.omitTF) {
          this.verifyDocs(term.docs, term.positions, _TestUtil.docs(random(), termsEnum, null, null, DocsEnum.FLAG_NONE), false);
        } else {
          this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null), true);
        }

        // Test random seek by ord:
        final int idx = random().nextInt(field.terms.length);
        term = field.terms[idx];
        boolean success = false;
        try {
          termsEnum.seekExact(idx);
          success = true;
        } catch (UnsupportedOperationException uoe) {
          // ok -- skip it
        }
        if (success) {
          assertEquals(status, TermsEnum.SeekStatus.FOUND);
          assertTrue(termsEnum.term().bytesEquals(new BytesRef(term.text2)));
          assertEquals(term.docs.length, termsEnum.docFreq());
          if (field.omitTF) {
            this.verifyDocs(term.docs, term.positions, _TestUtil.docs(random(), termsEnum, null, null, DocsEnum.FLAG_NONE), false);
          } else {
            this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null), true);
          }
        }

        // Test seek to non-existent terms:
        if (VERBOSE) {
          System.out.println("TEST: seek non-exist terms");
        }
        for(int i=0;i<100;i++) {
          final String text2 = _TestUtil.randomUnicodeString(random()) + ".";
          status = termsEnum.seekCeil(new BytesRef(text2));
          assertTrue(status == TermsEnum.SeekStatus.NOT_FOUND ||
                     status == TermsEnum.SeekStatus.END);
        }

        // Seek to each term, backwards:
        if (VERBOSE) {
          System.out.println("TEST: seek terms backwards");
        }
        for(int i=field.terms.length-1;i>=0;i--) {
          assertEquals(Thread.currentThread().getName() + ": field=" + field.fieldInfo.name + " term=" + field.terms[i].text2, TermsEnum.SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef(field.terms[i].text2)));
          assertEquals(field.terms[i].docs.length, termsEnum.docFreq());
        }

        // Seek to each term by ord, backwards
        for(int i=field.terms.length-1;i>=0;i--) {
          try {
            termsEnum.seekExact(i);
            assertEquals(field.terms[i].docs.length, termsEnum.docFreq());
            assertTrue(termsEnum.term().bytesEquals(new BytesRef(field.terms[i].text2)));
          } catch (UnsupportedOperationException uoe) {
          }
        }

        // Seek to non-existent empty-string term
        status = termsEnum.seekCeil(new BytesRef(""));
        assertNotNull(status);
        //assertEquals(TermsEnum.SeekStatus.NOT_FOUND, status);

        // Make sure we're now pointing to first term
        assertTrue(termsEnum.term().bytesEquals(new BytesRef(field.terms[0].text2)));

        // Test docs enum
        termsEnum.seekCeil(new BytesRef(""));
        upto = 0;
        do {
          term = field.terms[upto];
          if (random().nextInt(3) == 1) {
            final DocsEnum docs;
View Full Code Here


    // compute top-K
    List<FacetResult> results = new ArrayList<FacetResult>();

    int[] counts = facetArrays.getIntArray();

    BytesRef scratch = new BytesRef();

    for (FacetRequest request : searchParams.facetRequests) {
      String dim = request.categoryPath.components[0];
      SortedSetDocValuesReaderState.OrdRange ordRange = state.getOrdRange(dim);
      // checked in ctor:
      assert ordRange != null;

      if (request.numResults >= ordRange.end - ordRange.start + 1) {
        // specialize this case, user is interested in all available results
        ArrayList<FacetResultNode> nodes = new ArrayList<FacetResultNode>();
        int dimCount = 0;
        for(int ord=ordRange.start; ord<=ordRange.end; ord++) {
          //System.out.println("  ord=" + ord + " count= "+ counts[ord] + " bottomCount=" + bottomCount);
          if (counts[ord] != 0) {
            dimCount += counts[ord];
            FacetResultNode node = new FacetResultNode(ord, counts[ord]);
            dv.lookupOrd(ord, scratch);
            node.label = new CategoryPath(scratch.utf8ToString().split(state.separatorRegex, 2));
            nodes.add(node);
          }
        }

        Collections.sort(nodes, new Comparator<FacetResultNode>() {
            @Override
            public int compare(FacetResultNode o1, FacetResultNode o2) {
              // First by highest count
              int value = (int) (o2.value - o1.value);
              if (value == 0) {
                // ... then by lowest ord:
                value = o1.ordinal - o2.ordinal;
              }
              return value;
            }
          });
     
        CategoryListParams.OrdinalPolicy op = searchParams.indexingParams.getCategoryListParams(request.categoryPath).getOrdinalPolicy(dim);
        if (op == CategoryListParams.OrdinalPolicy.ALL_BUT_DIMENSION) {
          dimCount = 0;
        }

        FacetResultNode rootNode = new FacetResultNode(-1, dimCount);
        rootNode.label = new CategoryPath(new String[] {dim});
        rootNode.subResults = nodes;
        results.add(new FacetResult(request, rootNode, nodes.size()));
        continue;
      }

      TopCountPQ q = new TopCountPQ(request.numResults);

      int bottomCount = 0;

      //System.out.println("collect");
      int dimCount = 0;
      int childCount = 0;
      FacetResultNode reuse = null;
      for(int ord=ordRange.start; ord<=ordRange.end; ord++) {
        //System.out.println("  ord=" + ord + " count= "+ counts[ord] + " bottomCount=" + bottomCount);
        if (counts[ord] > 0) {
          childCount++;
          if (counts[ord] > bottomCount) {
            dimCount += counts[ord];
            //System.out.println("    keep");
            if (reuse == null) {
              reuse = new FacetResultNode(ord, counts[ord]);
            } else {
              reuse.ordinal = ord;
              reuse.value = counts[ord];
            }
            reuse = q.insertWithOverflow(reuse);
            if (q.size() == request.numResults) {
              bottomCount = (int) q.top().value;
              //System.out.println("    new bottom=" + bottomCount);
            }
          }
        }
      }

      CategoryListParams.OrdinalPolicy op = searchParams.indexingParams.getCategoryListParams(request.categoryPath).getOrdinalPolicy(dim);
      if (op == CategoryListParams.OrdinalPolicy.ALL_BUT_DIMENSION) {
        dimCount = 0;
      }

      FacetResultNode rootNode = new FacetResultNode(-1, dimCount);
      rootNode.label = new CategoryPath(new String[] {dim});

      FacetResultNode[] childNodes = new FacetResultNode[q.size()];
      for(int i=childNodes.length-1;i>=0;i--) {
        childNodes[i] = q.pop();
        dv.lookupOrd(childNodes[i].ordinal, scratch);
        childNodes[i].label = new CategoryPath(scratch.utf8ToString().split(state.separatorRegex, 2));
      }
      rootNode.subResults = Arrays.asList(childNodes);
     
      results.add(new FacetResult(request, rootNode, childCount));
    }
View Full Code Here

    // TODO: we can make this more efficient if eg we can be
    // "involved" when OrdinalMap is being created?  Ie see
    // each term/ord it's assigning as it goes...
    String lastDim = null;
    int startOrd = -1;
    BytesRef spare = new BytesRef();

    // TODO: this approach can work for full hierarchy?;
    // TaxoReader can't do this since ords are not in
    // "sorted order" ... but we should generalize this to
    // support arbitrary hierarchy:
    for(int ord=0;ord<valueCount;ord++) {
      dv.lookupOrd(ord, spare);
      String[] components = spare.utf8ToString().split(separatorRegex, 2);
      if (components.length != 2) {
        throw new IllegalArgumentException("this class can only handle 2 level hierarchy (dim/value); got: " + spare.utf8ToString());
      }
      if (!components[0].equals(lastDim)) {
        if (lastDim != null) {
          prefixToOrdRange.put(lastDim, new OrdRange(startOrd, ord-1));
        }
View Full Code Here

        .setMergePolicy(newLogMergePolicy(false, 10))
        .setOpenMode(IndexWriterConfig.OpenMode.CREATE));

    Document doc = new Document();
    byte bytes[] = new byte[2];
    BytesRef data = new BytesRef(bytes);
    SortedDocValuesField dvField = new SortedDocValuesField("dv", data);
    doc.add(dvField);
   
    for (int i = 0; i < Integer.MAX_VALUE; i++) {
      bytes[0] = (byte)(i >> 8);
      bytes[1] = (byte) i;
      w.addDocument(doc);
      if (i % 100000 == 0) {
        System.out.println("indexed: " + i);
        System.out.flush();
      }
    }
   
    w.forceMerge(1);
    w.close();
   
    System.out.println("verifying...");
    System.out.flush();
   
    DirectoryReader r = DirectoryReader.open(dir);
    int expectedValue = 0;
    for (AtomicReaderContext context : r.leaves()) {
      AtomicReader reader = context.reader();
      BytesRef scratch = new BytesRef();
      BinaryDocValues dv = reader.getSortedDocValues("dv");
      for (int i = 0; i < reader.maxDoc(); i++) {
        bytes[0] = (byte)(expectedValue >> 8);
        bytes[1] = (byte) expectedValue;
        dv.get(i, scratch);
View Full Code Here

        .setMergePolicy(newLogMergePolicy(false, 10))
        .setOpenMode(IndexWriterConfig.OpenMode.CREATE));

    Document doc = new Document();
    byte bytes[] = new byte[4];
    BytesRef data = new BytesRef(bytes);
    SortedDocValuesField dvField = new SortedDocValuesField("dv", data);
    doc.add(dvField);
   
    long seed = random().nextLong();
    Random random = new Random(seed);
   
    for (int i = 0; i < Integer.MAX_VALUE; i++) {
      random.nextBytes(bytes);
      w.addDocument(doc);
      if (i % 100000 == 0) {
        System.out.println("indexed: " + i);
        System.out.flush();
      }
    }
   
    w.forceMerge(1);
    w.close();
   
    System.out.println("verifying...");
    System.out.flush();
   
    DirectoryReader r = DirectoryReader.open(dir);
    random.setSeed(seed);
    for (AtomicReaderContext context : r.leaves()) {
      AtomicReader reader = context.reader();
      BytesRef scratch = new BytesRef();
      BinaryDocValues dv = reader.getSortedDocValues("dv");
      for (int i = 0; i < reader.maxDoc(); i++) {
        random.nextBytes(bytes);
        dv.get(i, scratch);
        assertEquals(data, scratch);
View Full Code Here

      // Add sorted-set DV fields, one per value:
      for(CategoryPath cp : e.getValue()) {
        if (cp.length != 2) {
          throw new IllegalArgumentException("only flat facets (dimension + label) are currently supported; got " + cp);
        }
        doc.add(new SortedSetDocValuesField(dvField, new BytesRef(cp.toString(indexingParams.getFacetDelimChar()))));
      }

      // add the drill-down field
      DrillDownStream drillDownStream = getDrillDownStream(e.getValue());
      Field drillDown = new Field(clp.field, drillDownStream, drillDownFieldType());
View Full Code Here

  public int[] toDocsArray(Term term, Bits bits, IndexReader reader)
      throws IOException {
    Fields fields = MultiFields.getFields(reader);
    Terms cterms = fields.terms(term.field);
    TermsEnum ctermsEnum = cterms.iterator(null);
    if (ctermsEnum.seekExact(new BytesRef(term.text()))) {
      DocsEnum docsEnum = _TestUtil.docs(random(), ctermsEnum, bits, null, DocsEnum.FLAG_NONE);
      return toArray(docsEnum);
    }
    return null;
  }
View Full Code Here

      Terms vector = reader.get(j).terms(testFields[0]);
      assertNotNull(vector);
      assertEquals(testTerms.length, vector.size());
      TermsEnum termsEnum = vector.iterator(null);
      for (int i = 0; i < testTerms.length; i++) {
        final BytesRef text = termsEnum.next();
        assertNotNull(text);
        String term = text.utf8ToString();
        //System.out.println("Term: " + term);
        assertEquals(testTerms[i], term);
      }
      assertNull(termsEnum.next());
    }
View Full Code Here

      assertNotNull(vector);
      assertEquals(testTerms.length, vector.size());
      TermsEnum termsEnum = vector.iterator(null);
      DocsEnum docsEnum = null;
      for (int i = 0; i < testTerms.length; i++) {
        final BytesRef text = termsEnum.next();
        assertNotNull(text);
        String term = text.utf8ToString();
        //System.out.println("Term: " + term);
        assertEquals(testTerms[i], term);
       
        docsEnum = _TestUtil.docs(random(), termsEnum, null, docsEnum, DocsEnum.FLAG_NONE);
        assertNotNull(docsEnum);
View Full Code Here

    assertNotNull(vector);
    assertEquals(testTerms.length, vector.size());
    TermsEnum termsEnum = vector.iterator(null);
    DocsAndPositionsEnum dpEnum = null;
    for (int i = 0; i < testTerms.length; i++) {
      final BytesRef text = termsEnum.next();
      assertNotNull(text);
      String term = text.utf8ToString();
      //System.out.println("Term: " + term);
      assertEquals(testTerms[i], term);

      dpEnum = termsEnum.docsAndPositions(null, dpEnum);
      assertNotNull(dpEnum);
      int doc = dpEnum.docID();
      assertEquals(-1, doc);
      assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
      assertEquals(dpEnum.freq(), positions[i].length);
      for (int j = 0; j < positions[i].length; j++) {
        assertEquals(positions[i][j], dpEnum.nextPosition());
      }
      assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc());

      dpEnum = termsEnum.docsAndPositions(null, dpEnum);
      doc = dpEnum.docID();
      assertEquals(-1, doc);
      assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
      assertNotNull(dpEnum);
      assertEquals(dpEnum.freq(), positions[i].length);
      for (int j = 0; j < positions[i].length; j++) {
        assertEquals(positions[i][j], dpEnum.nextPosition());
        assertEquals(j*10, dpEnum.startOffset());
        assertEquals(j*10 + testTerms[i].length(), dpEnum.endOffset());
      }
      assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc());
    }

    Terms freqVector = reader.get(0).terms(testFields[1]); //no pos, no offset
    assertNotNull(freqVector);
    assertEquals(testTerms.length, freqVector.size());
    termsEnum = freqVector.iterator(null);
    assertNotNull(termsEnum);
    for (int i = 0; i < testTerms.length; i++) {
      final BytesRef text = termsEnum.next();
      assertNotNull(text);
      String term = text.utf8ToString();
      //System.out.println("Term: " + term);
      assertEquals(testTerms[i], term);
      assertNotNull(termsEnum.docs(null, null));
      assertNull(termsEnum.docsAndPositions(null, null)); // no pos
    }
View Full Code Here

TOP

Related Classes of org.apache.lucene.util.BytesRef$UTF8SortedAsUTF16Comparator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.