Package org.apache.lucene.util

Examples of org.apache.lucene.util.BytesRef$UTF8SortedAsUnicodeComparator


    final Comparator<GroupDoc> docSortComp = getComparator(docSort);
    @SuppressWarnings({"unchecked","rawtypes"})
    final GroupDocs<BytesRef>[] result = new GroupDocs[limit-groupOffset];
    int totalGroupedHitCount = 0;
    for(int idx=groupOffset;idx < limit;idx++) {
      final BytesRef group = sortedGroups.get(idx);
      final List<GroupDoc> docs = groups.get(group);
      totalGroupedHitCount += docs.size();
      Collections.sort(docs, docSortComp);
      final ScoreDoc[] hits;
      if (docs.size() > docOffset) {
View Full Code Here


          // groups.
          randomValue = _TestUtil.randomRealisticUnicodeString(random());
          //randomValue = _TestUtil.randomSimpleString(random());
        } while ("".equals(randomValue));

        groups.add(new BytesRef(randomValue));
      }
      final String[] contentStrings = new String[_TestUtil.nextInt(random(), 2, 20)];
      if (VERBOSE) {
        System.out.println("TEST: create fake content");
      }
      for(int contentIDX=0;contentIDX<contentStrings.length;contentIDX++) {
        final StringBuilder sb = new StringBuilder();
        sb.append("real").append(random().nextInt(3)).append(' ');
        final int fakeCount = random().nextInt(10);
        for(int fakeIDX=0;fakeIDX<fakeCount;fakeIDX++) {
          sb.append("fake ");
        }
        contentStrings[contentIDX] = sb.toString();
        if (VERBOSE) {
          System.out.println("  content=" + sb.toString());
        }
      }

      Directory dir = newDirectory();
      RandomIndexWriter w = new RandomIndexWriter(
                                                  random(),
                                                  dir,
                                                  newIndexWriterConfig(TEST_VERSION_CURRENT,
                                                                       new MockAnalyzer(random())));
      final boolean preFlex = "Lucene3x".equals(w.w.getConfig().getCodec().getName());
      boolean canUseIDV = !preFlex;

      Document doc = new Document();
      Document docNoGroup = new Document();
      Field idvGroupField = new SortedDocValuesField("group_dv", new BytesRef());
      if (canUseIDV) {
        doc.add(idvGroupField);
        docNoGroup.add(idvGroupField);
      }

      Field group = newStringField("group", "", Field.Store.NO);
      doc.add(group);
      Field sort1 = newStringField("sort1", "", Field.Store.NO);
      doc.add(sort1);
      docNoGroup.add(sort1);
      Field sort2 = newStringField("sort2", "", Field.Store.NO);
      doc.add(sort2);
      docNoGroup.add(sort2);
      Field content = newTextField("content", "", Field.Store.NO);
      doc.add(content);
      docNoGroup.add(content);
      IntField id = new IntField("id", 0, Field.Store.NO);
      doc.add(id);
      docNoGroup.add(id);
      final GroupDoc[] groupDocs = new GroupDoc[numDocs];
      for(int i=0;i<numDocs;i++) {
        final BytesRef groupValue;
        if (random().nextInt(24) == 17) {
          // So we test the "doc doesn't have the group'd
          // field" case:
          groupValue = null;
        } else {
          groupValue = groups.get(random().nextInt(groups.size()));
        }
        final GroupDoc groupDoc = new GroupDoc(i,
                                               groupValue,
                                               groups.get(random().nextInt(groups.size())),
                                               groups.get(random().nextInt(groups.size())),
                                               contentStrings[random().nextInt(contentStrings.length)]);
        if (VERBOSE) {
          System.out.println("  doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.utf8ToString()) + " sort1=" + groupDoc.sort1.utf8ToString() + " sort2=" + groupDoc.sort2.utf8ToString());
        }

        groupDocs[i] = groupDoc;
        if (groupDoc.group != null) {
          group.setStringValue(groupDoc.group.utf8ToString());
          if (canUseIDV) {
            idvGroupField.setBytesValue(BytesRef.deepCopyOf(groupDoc.group));
          }
        } else if (canUseIDV) {
          // Must explicitly set empty string, else eg if
          // the segment has all docs missing the field then
          // we get null back instead of empty BytesRef:
          idvGroupField.setBytesValue(new BytesRef());
        }
        sort1.setStringValue(groupDoc.sort1.utf8ToString());
        sort2.setStringValue(groupDoc.sort2.utf8ToString());
        content.setStringValue(groupDoc.content);
        id.setIntValue(groupDoc.id);
View Full Code Here

        payloadSize = 1 + random.nextInt(1);
      }

      fixedPayloads = random.nextBoolean();
      byte[] payloadBytes = new byte[payloadSize];
      payload = new BytesRef(payloadBytes);
      this.options = options;
      doPositions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS.compareTo(options) <= 0;
    }
View Full Code Here

  protected BytesRef randomPayload() {
    final int len = random().nextInt(5);
    if (len == 0) {
      return null;
    }
    final BytesRef payload = new BytesRef(len);
    random().nextBytes(payload.bytes);
    payload.length = len;
    return payload;
  }
View Full Code Here

    assertEquals(ft.storeTermVectorPositions(), terms.hasPositions());
    assertEquals(ft.storeTermVectorOffsets(), terms.hasOffsets());
    assertEquals(ft.storeTermVectorPayloads() && tk.hasPayloads(), terms.hasPayloads());
    final Set<BytesRef> uniqueTerms = new HashSet<BytesRef>();
    for (String term : tk.freqs.keySet()) {
      uniqueTerms.add(new BytesRef(term));
    }
    final BytesRef[] sortedTerms = uniqueTerms.toArray(new BytesRef[0]);
    Arrays.sort(sortedTerms, terms.getComparator());
    final TermsEnum termsEnum = terms.iterator(random().nextBoolean() ? null : this.termsEnum.get());
    this.termsEnum.set(termsEnum);
    for (int i = 0; i < sortedTerms.length; ++i) {
      final BytesRef nextTerm = termsEnum.next();
      assertEquals(sortedTerms[i], nextTerm);
      assertEquals(sortedTerms[i], termsEnum.term());
      assertEquals(1, termsEnum.docFreq());

      final FixedBitSet bits = new FixedBitSet(1);
View Full Code Here

      this.fieldNames = fieldNames.toArray(new String[0]);
      terms = new String[disctinctTerms];
      termBytes = new BytesRef[disctinctTerms];
      for (int i = 0; i < disctinctTerms; ++i) {
        terms[i] = _TestUtil.randomRealisticUnicodeString(random());
        termBytes[i] = new BytesRef(terms[i]);
      }
    }
View Full Code Here

  }

  private void addField(Document doc, String field, String value, boolean canUseIDV) {
    doc.add(new StringField(field, value, Field.Store.NO));
    if (canUseIDV) {
      doc.add(new SortedDocValuesField(field + "_dv", new BytesRef(value)));
    }
  }
View Full Code Here

    Document doc = new Document();
    Document docNoGroup = new Document();
    Document docNoFacet = new Document();
    Document docNoGroupNoFacet = new Document();
    Field group = newStringField("group", "", Field.Store.NO);
    Field groupDc = new SortedDocValuesField("group_dv", new BytesRef());
    if (useDv) {
      doc.add(groupDc);
      docNoFacet.add(groupDc);
    }
    doc.add(group);
    docNoFacet.add(group);
    Field[] facetFields;
    if (useDv) {
      assert !multipleFacetValuesPerDocument;
      facetFields = new Field[2];
      facetFields[0] = newStringField("facet", "", Field.Store.NO);
      doc.add(facetFields[0]);
      docNoGroup.add(facetFields[0]);
      facetFields[1] = new SortedDocValuesField("facet_dv", new BytesRef());
      doc.add(facetFields[1]);
      docNoGroup.add(facetFields[1]);
    } else {
      facetFields = multipleFacetValuesPerDocument ? new Field[2 + random.nextInt(6)] : new Field[1];
      for (int i = 0; i < facetFields.length; i++) {
        facetFields[i] = newStringField("facet", "", Field.Store.NO);
        doc.add(facetFields[i]);
        docNoGroup.add(facetFields[i]);
      }
    }
    Field content = newStringField("content", "", Field.Store.NO);
    doc.add(content);
    docNoGroup.add(content);
    docNoFacet.add(content);
    docNoGroupNoFacet.add(content);

    NavigableSet<String> uniqueFacetValues = new TreeSet<String>(new Comparator<String>() {

      @Override
      public int compare(String a, String b) {
        if (a == b) {
          return 0;
        } else if (a == null) {
          return -1;
        } else if (b == null) {
          return 1;
        } else {
          return a.compareTo(b);
        }
      }

    });
    Map<String, Map<String, Set<String>>> searchTermToFacetToGroups = new HashMap<String, Map<String, Set<String>>>();
    int facetWithMostGroups = 0;
    for (int i = 0; i < numDocs; i++) {
      final String groupValue;
      if (random.nextInt(24) == 17) {
        // So we test the "doc doesn't have the group'd
        // field" case:
        if (useDv) {
          groupValue = "";
        } else {
          groupValue = null;
        }
      } else {
        groupValue = groups.get(random.nextInt(groups.size()));
      }

      String contentStr = contentBrs[random.nextInt(contentBrs.length)];
      if (!searchTermToFacetToGroups.containsKey(contentStr)) {
        searchTermToFacetToGroups.put(contentStr, new HashMap<String, Set<String>>());
      }
      Map<String, Set<String>> facetToGroups = searchTermToFacetToGroups.get(contentStr);

      List<String> facetVals = new ArrayList<String>();
      if (useDv || random.nextInt(24) != 18) {
        if (useDv) {
          String facetValue = facetValues.get(random.nextInt(facetValues.size()));
          uniqueFacetValues.add(facetValue);
          if (!facetToGroups.containsKey(facetValue)) {
            facetToGroups.put(facetValue, new HashSet<String>());
          }
          Set<String> groupsInFacet = facetToGroups.get(facetValue);
          groupsInFacet.add(groupValue);
          if (groupsInFacet.size() > facetWithMostGroups) {
            facetWithMostGroups = groupsInFacet.size();
          }
          facetFields[0].setStringValue(facetValue);
          facetFields[1].setBytesValue(new BytesRef(facetValue));
          facetVals.add(facetValue);
        } else {
          for (Field facetField : facetFields) {
            String facetValue = facetValues.get(random.nextInt(facetValues.size()));
            uniqueFacetValues.add(facetValue);
            if (!facetToGroups.containsKey(facetValue)) {
              facetToGroups.put(facetValue, new HashSet<String>());
            }
            Set<String> groupsInFacet = facetToGroups.get(facetValue);
            groupsInFacet.add(groupValue);
            if (groupsInFacet.size() > facetWithMostGroups) {
              facetWithMostGroups = groupsInFacet.size();
            }
            facetField.setStringValue(facetValue);
            facetVals.add(facetValue);
          }
        }
      } else {
        uniqueFacetValues.add(null);
        if (!facetToGroups.containsKey(null)) {
          facetToGroups.put(null, new HashSet<String>());
        }
        Set<String> groupsInFacet = facetToGroups.get(null);
        groupsInFacet.add(groupValue);
        if (groupsInFacet.size() > facetWithMostGroups) {
          facetWithMostGroups = groupsInFacet.size();
        }
      }

      if (VERBOSE) {
        System.out.println("  doc content=" + contentStr + " group=" + (groupValue == null ? "null" : groupValue) + " facetVals=" + facetVals);
      }

      if (groupValue != null) {
        if (useDv) {
          groupDc.setBytesValue(new BytesRef(groupValue));
        }
        group.setStringValue(groupValue);
      } else if (useDv) {
        // DV cannot have missing values:
        groupDc.setBytesValue(new BytesRef());
      }
      content.setStringValue(contentStr);
      if (groupValue == null && facetVals.isEmpty()) {
        writer.addDocument(docNoGroupNoFacet);
      } else if (facetVals.isEmpty()) {
View Full Code Here

      }

      Set<String> groups = facetGroups.get(facetValue);
      int count = groups != null ? groups.size() : 0;
      if (count >= minCount) {
        entries.add(new TermGroupFacetCollector.FacetEntry(new BytesRef(facetValue), count));
      }
      totalCount += count;
    }

    // Only include null count when no facet prefix is specified
View Full Code Here

    }
    return new GroupedFacetResult(totalCount, totalMissCount, entriesResult);
  }

  private AbstractGroupFacetCollector createRandomCollector(String groupField, String facetField, String facetPrefix, boolean multipleFacetsPerDocument) {
    BytesRef facetPrefixBR = facetPrefix == null ? null : new BytesRef(facetPrefix);
    // DocValues cannot be multi-valued:
    assert !multipleFacetsPerDocument || !groupField.endsWith("_dv");
    return TermGroupFacetCollector.createTermGroupFacetCollector(groupField, facetField, multipleFacetsPerDocument, facetPrefixBR, random().nextInt(1024));
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.util.BytesRef$UTF8SortedAsUnicodeComparator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.