Package org.apache.lucene.index

Examples of org.apache.lucene.index.SortedSetDocValues


    }
   
    @Override
    public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
      assert field.getDocValuesType() == FieldInfo.DocValuesType.SORTED_SET;
      SortedSetDocValues values = in.getSortedSet(field);
      assert values != null;
      return new AssertingAtomicReader.AssertingSortedSetDocValues(values, maxDoc);
    }
View Full Code Here


      // AIOOBE can happen:
      if (ReaderUtil.getTopLevelContext(matchingDocs.context).reader() != state.origReader) {
        throw new IllegalStateException("the SortedSetDocValuesReaderState provided to this class does not match the reader being searched; you must create a new SortedSetDocValuesReaderState every time you open a new IndexReader");
      }
     
      SortedSetDocValues segValues = reader.getSortedSetDocValues(field);
      if (segValues == null) {
        return;
      }

      final int[] counts = facetArrays.getIntArray();
      final int maxDoc = reader.maxDoc();
      assert maxDoc == matchingDocs.bits.length();

      if (dv instanceof MultiSortedSetDocValues) {
        MultiDocValues.OrdinalMap ordinalMap = ((MultiSortedSetDocValues) dv).mapping;
        int segOrd = matchingDocs.context.ord;

        int numSegOrds = (int) segValues.getValueCount();

        if (matchingDocs.totalHits < numSegOrds/10) {
          // Remap every ord to global ord as we iterate:
          int doc = 0;
          while (doc < maxDoc && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
            segValues.setDocument(doc);
            int term = (int) segValues.nextOrd();
            while (term != SortedSetDocValues.NO_MORE_ORDS) {
              counts[(int) ordinalMap.getGlobalOrd(segOrd, term)]++;
              term = (int) segValues.nextOrd();
            }
            ++doc;
          }
        } else {

          // First count in seg-ord space:
          final int[] segCounts = new int[numSegOrds];
          int doc = 0;
          while (doc < maxDoc && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
            segValues.setDocument(doc);
            int term = (int) segValues.nextOrd();
            while (term != SortedSetDocValues.NO_MORE_ORDS) {
              segCounts[term]++;
              term = (int) segValues.nextOrd();
            }
            ++doc;
          }

          // Then, migrate to global ords:
          for(int ord=0;ord<numSegOrds;ord++) {
            int count = segCounts[ord];
            if (count != 0) {
              counts[(int) ordinalMap.getGlobalOrd(segOrd, ord)] += count;
            }
          }
        }
      } else {
        // No ord mapping (e.g., single segment index):
        // just aggregate directly into counts:

        int doc = 0;
        while (doc < maxDoc && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
          segValues.setDocument(doc);
          int term = (int) segValues.nextOrd();
          while (term != SortedSetDocValues.NO_MORE_ORDS) {
            counts[term]++;
            term = (int) segValues.nextOrd();
          }
          ++doc;
        }
      }
    }
View Full Code Here

    this.origReader = reader;

    // We need this to create thread-safe MultiSortedSetDV
    // per collector:
    topReader = SlowCompositeReaderWrapper.wrap(reader);
    SortedSetDocValues dv = topReader.getSortedSetDocValues(field);
    if (dv == null) {
      throw new IllegalArgumentException("field \"" + field + "\" was not indexed with SortedSetDocValues");
    }
    if (dv.getValueCount() > Integer.MAX_VALUE) {
      throw new IllegalArgumentException("can only handle valueCount < Integer.MAX_VALUE; got " + dv.getValueCount());
    }
    valueCount = (int) dv.getValueCount();

    // TODO: we can make this more efficient if eg we can be
    // "involved" when OrdinalMap is being created?  Ie see
    // each term/ord it's assigning as it goes...
    String lastDim = null;
    int startOrd = -1;
    BytesRef spare = new BytesRef();

    // TODO: this approach can work for full hierarchy?;
    // TaxoReader can't do this since ords are not in
    // "sorted order" ... but we should generalize this to
    // support arbitrary hierarchy:
    for(int ord=0;ord<valueCount;ord++) {
      dv.lookupOrd(ord, spare);
      String[] components = spare.utf8ToString().split(separatorRegex, 2);
      if (components.length != 2) {
        throw new IllegalArgumentException("this class can only handle 2 level hierarchy (dim/value); got: " + spare.utf8ToString());
      }
      if (!components[0].equals(lastDim)) {
View Full Code Here

   * The default implementation calls {@link #addSortedSetField}, passing
   * an Iterable that merges ordinals and values and filters deleted documents .
   */
  public void mergeSortedSetField(FieldInfo fieldInfo, final MergeState mergeState, List<SortedSetDocValues> toMerge) throws IOException {
    final AtomicReader readers[] = mergeState.readers.toArray(new AtomicReader[toMerge.size()]);
    final SortedSetDocValues dvs[] = toMerge.toArray(new SortedSetDocValues[toMerge.size()]);
   
    // step 1: iterate thru each sub and mark terms still in use
    TermsEnum liveTerms[] = new TermsEnum[dvs.length];
    for (int sub = 0; sub < liveTerms.length; sub++) {
      AtomicReader reader = readers[sub];
      SortedSetDocValues dv = dvs[sub];
      Bits liveDocs = reader.getLiveDocs();
      if (liveDocs == null) {
        liveTerms[sub] = dv.termsEnum();
      } else {
        OpenBitSet bitset = new OpenBitSet(dv.getValueCount());
        for (int i = 0; i < reader.maxDoc(); i++) {
          if (liveDocs.get(i)) {
            dv.setDocument(i);
            long ord;
            while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
              bitset.set(ord);
            }
          }
        }
        liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset);
      }
    }
   
    // step 2: create ordinal map (this conceptually does the "merging")
    final OrdinalMap map = new OrdinalMap(this, liveTerms);
   
    // step 3: add field
    addSortedSetField(fieldInfo,
        // ord -> value
        new Iterable<BytesRef>() {
          @Override
          public Iterator<BytesRef> iterator() {
            return new Iterator<BytesRef>() {
              final BytesRef scratch = new BytesRef();
              long currentOrd;

              @Override
              public boolean hasNext() {
                return currentOrd < map.getValueCount();
              }

              @Override
              public BytesRef next() {
                if (!hasNext()) {
                  throw new NoSuchElementException();
                }
                int segmentNumber = map.getFirstSegmentNumber(currentOrd);
                long segmentOrd = map.getFirstSegmentOrd(currentOrd);
                dvs[segmentNumber].lookupOrd(segmentOrd, scratch);
                currentOrd++;
                return scratch;
              }

              @Override
              public void remove() {
                throw new UnsupportedOperationException();
              }
            };
          }
        },
        // doc -> ord count
        new Iterable<Number>() {
          @Override
          public Iterator<Number> iterator() {
            return new Iterator<Number>() {
              int readerUpto = -1;
              int docIDUpto;
              int nextValue;
              AtomicReader currentReader;
              Bits currentLiveDocs;
              boolean nextIsSet;

              @Override
              public boolean hasNext() {
                return nextIsSet || setNext();
              }

              @Override
              public void remove() {
                throw new UnsupportedOperationException();
              }

              @Override
              public Number next() {
                if (!hasNext()) {
                  throw new NoSuchElementException();
                }
                assert nextIsSet;
                nextIsSet = false;
                // TODO make a mutable number
                return nextValue;
              }

              private boolean setNext() {
                while (true) {
                  if (readerUpto == readers.length) {
                    return false;
                  }

                  if (currentReader == null || docIDUpto == currentReader.maxDoc()) {
                    readerUpto++;
                    if (readerUpto < readers.length) {
                      currentReader = readers[readerUpto];
                      currentLiveDocs = currentReader.getLiveDocs();
                    }
                    docIDUpto = 0;
                    continue;
                  }

                  if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
                    nextIsSet = true;
                    SortedSetDocValues dv = dvs[readerUpto];
                    dv.setDocument(docIDUpto);
                    nextValue = 0;
                    while (dv.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
                      nextValue++;
                    }
                    docIDUpto++;
                    return true;
                  }

                  docIDUpto++;
                }
              }
            };
          }
        },
        // ords
        new Iterable<Number>() {
          @Override
          public Iterator<Number> iterator() {
            return new Iterator<Number>() {
              int readerUpto = -1;
              int docIDUpto;
              long nextValue;
              AtomicReader currentReader;
              Bits currentLiveDocs;
              boolean nextIsSet;
              long ords[] = new long[8];
              int ordUpto;
              int ordLength;

              @Override
              public boolean hasNext() {
                return nextIsSet || setNext();
              }

              @Override
              public void remove() {
                throw new UnsupportedOperationException();
              }

              @Override
              public Number next() {
                if (!hasNext()) {
                  throw new NoSuchElementException();
                }
                assert nextIsSet;
                nextIsSet = false;
                // TODO make a mutable number
                return nextValue;
              }

              private boolean setNext() {
                while (true) {
                  if (readerUpto == readers.length) {
                    return false;
                  }
                 
                  if (ordUpto < ordLength) {
                    nextValue = ords[ordUpto];
                    ordUpto++;
                    nextIsSet = true;
                    return true;
                  }

                  if (currentReader == null || docIDUpto == currentReader.maxDoc()) {
                    readerUpto++;
                    if (readerUpto < readers.length) {
                      currentReader = readers[readerUpto];
                      currentLiveDocs = currentReader.getLiveDocs();
                    }
                    docIDUpto = 0;
                    continue;
                  }
                 
                  if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
                    assert docIDUpto < currentReader.maxDoc();
                    SortedSetDocValues dv = dvs[readerUpto];
                    dv.setDocument(docIDUpto);
                    ordUpto = ordLength = 0;
                    long ord;
                    while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                      if (ordLength == ords.length) {
                        ords = ArrayUtil.grow(ords, ordLength+1);
                      }
                      ords[ordLength] = map.getGlobalOrd(readerUpto, ord);
                      ordLength++;
View Full Code Here

    // test bad field
    terms = cache.getTerms(reader, "bogusfield", false);

    // getDocTermOrds
    SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField");
    int numEntries = cache.getCacheEntries().length;
    // ask for it again, and check that we didnt create any additional entries:
    termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField");
    assertEquals(numEntries, cache.getCacheEntries().length);

    for (int i = 0; i < NUM_DOCS; i++) {
      termOrds.setDocument(i);
      // This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
      List<BytesRef> values = new ArrayList<BytesRef>(new LinkedHashSet<BytesRef>(Arrays.asList(multiValued[i])));
      for (BytesRef v : values) {
        if (v == null) {
          // why does this test use null values... instead of an empty list: confusing
          break;
        }
        long ord = termOrds.nextOrd();
        assert ord != SortedSetDocValues.NO_MORE_ORDS;
        BytesRef scratch = new BytesRef();
        termOrds.lookupOrd(ord, scratch);
        assertEquals(v, scratch);
      }
      assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
    }

    // test bad field
    termOrds = cache.getDocTermOrds(reader, "bogusfield");
    assertTrue(termOrds.getValueCount() == 0);

    FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
  }
View Full Code Here

    assertEquals(0, sorted.getOrd(0));
    assertEquals(1, sorted.getValueCount());
    sorted.get(0, scratch);
    assertEquals("sorted value", scratch.utf8ToString());
   
    SortedSetDocValues sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sorted");
    sortedSet.setDocument(0);
    assertEquals(0, sortedSet.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
    assertEquals(1, sortedSet.getValueCount());
   
    bits = FieldCache.DEFAULT.getDocsWithField(ar, "sorted");
    assertTrue(bits.get(0));
   
    // Numeric type: can be retrieved via getInts() and so on
    Ints numeric = FieldCache.DEFAULT.getInts(ar, "numeric", false);
    assertEquals(42, numeric.get(0));
   
    try {
      FieldCache.DEFAULT.getTerms(ar, "numeric", true);
      fail();
    } catch (IllegalStateException expected) {}
   
    try {
      FieldCache.DEFAULT.getTermsIndex(ar, "numeric");
      fail();
    } catch (IllegalStateException expected) {}
   
    try {
      FieldCache.DEFAULT.getDocTermOrds(ar, "numeric");
      fail();
    } catch (IllegalStateException expected) {}
   
    try {
      new DocTermOrds(ar, null, "numeric");
      fail();
    } catch (IllegalStateException expected) {}
   
    bits = FieldCache.DEFAULT.getDocsWithField(ar, "numeric");
    assertTrue(bits.get(0));
   
    // SortedSet type: can be retrieved via getDocTermOrds()
    if (defaultCodecSupportsSortedSet()) {
      try {
        FieldCache.DEFAULT.getInts(ar, "sortedset", false);
        fail();
      } catch (IllegalStateException expected) {}
   
      try {
        FieldCache.DEFAULT.getTerms(ar, "sortedset", true);
        fail();
      } catch (IllegalStateException expected) {}
   
      try {
        FieldCache.DEFAULT.getTermsIndex(ar, "sortedset");
        fail();
      } catch (IllegalStateException expected) {}
     
      try {
        new DocTermOrds(ar, null, "sortedset");
        fail();
      } catch (IllegalStateException expected) {}
   
      sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset");
      sortedSet.setDocument(0);
      assertEquals(0, sortedSet.nextOrd());
      assertEquals(1, sortedSet.nextOrd());
      assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
      assertEquals(2, sortedSet.getValueCount());
   
      bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset");
      assertTrue(bits.get(0));
    }
   
View Full Code Here

    SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
    assertEquals(-1, sorted.getOrd(0));
    sorted.get(0, scratch);
    assertEquals(0, scratch.length);
   
    SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued");
    sortedSet.setDocument(0);
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
   
    Bits bits = cache.getDocsWithField(ar, "bogusbits");
    assertFalse(bits.get(0));
   
    // check that we cached nothing
View Full Code Here

    SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
    assertEquals(-1, sorted.getOrd(0));
    sorted.get(0, scratch);
    assertEquals(0, scratch.length);
   
    SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued");
    sortedSet.setDocument(0);
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
   
    Bits bits = cache.getDocsWithField(ar, "bogusbits");
    assertFalse(bits.get(0));
   
    // check that we cached nothing
View Full Code Here

    }
  }
 
  @Override
  public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
    SortedSetDocValues sortedSetDV = in.getSortedSetDocValues(field);
    if (sortedSetDV == null) {
      return null;
    } else {
      return new SortingSortedSetDocValues(sortedSetDV, docMap);
   
View Full Code Here

  }
 
  @Test
  public void testSortedSetDocValuesField() throws Exception {
    assumeTrue("default codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
    SortedSetDocValues dv = reader.getSortedSetDocValues(SORTED_SET_DV_FIELD);
    int maxDoc = reader.maxDoc();
    BytesRef bytes = new BytesRef();
    for (int i = 0; i < maxDoc; i++) {
      dv.setDocument(i);
      dv.lookupOrd(dv.nextOrd(), bytes);
      int value = sortedValues[i].intValue();
      assertEquals("incorrect sorted-set DocValues for doc " + i, Integer.valueOf(value).toString(), bytes.utf8ToString());
      dv.lookupOrd(dv.nextOrd(), bytes);
      assertEquals("incorrect sorted-set DocValues for doc " + i, Integer.valueOf(value + 1).toString(), bytes.utf8ToString());
      assertEquals(SortedSetDocValues.NO_MORE_ORDS, dv.nextOrd());
    }
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.SortedSetDocValues

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.