Examples of org.apache.lucene.index.SortedSetDocValues

org.apache.lucene.index.SortedSetDocValues
A per-document set of presorted byte[] values.
Per-Document values in a SortedDocValues are deduplicated, dereferenced, and sorted into a dictionary of unique values. A pointer to the dictionary value (ordinal) can be retrieved for each document. Ordinals are dense and in increasing sorted order.

        // AIOOBE can happen:
        if (ReaderUtil.getTopLevelContext(matchingDocs.context).reader() != state.origReader) {
          throw new IllegalStateException("the SortedSetDocValuesReaderState provided to this class does not match the reader being searched; you must create a new SortedSetDocValuesReaderState every time you open a new IndexReader");
        }
        
        SortedSetDocValues segValues = reader.getSortedSetDocValues(field);
        if (segValues == null) {
          return;
        }


        final int[] counts = facetArrays.getIntArray();
        final int maxDoc = reader.maxDoc();
        assert maxDoc == matchingDocs.bits.length();


        if (dv instanceof MultiSortedSetDocValues) {
          MultiDocValues.OrdinalMap ordinalMap = ((MultiSortedSetDocValues) dv).mapping;
          int segOrd = matchingDocs.context.ord;


          int numSegOrds = (int) segValues.getValueCount();


          if (matchingDocs.totalHits < numSegOrds/10) {
            // Remap every ord to global ord as we iterate:
            int doc = 0;
            while (doc < maxDoc && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
              segValues.setDocument(doc);
              int term = (int) segValues.nextOrd();
              while (term != SortedSetDocValues.NO_MORE_ORDS) {
                counts[(int) ordinalMap.getGlobalOrd(segOrd, term)]++;
                term = (int) segValues.nextOrd();
              }
              ++doc;
            }
          } else {


            // First count in seg-ord space:
            final int[] segCounts = new int[numSegOrds];
            int doc = 0;
            while (doc < maxDoc && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
              segValues.setDocument(doc);
              int term = (int) segValues.nextOrd();
              while (term != SortedSetDocValues.NO_MORE_ORDS) {
                segCounts[term]++;
                term = (int) segValues.nextOrd();
              }
              ++doc;
            }


            // Then, migrate to global ords:
            for(int ord=0;ord<numSegOrds;ord++) {
              int count = segCounts[ord];
              if (count != 0) {
                counts[(int) ordinalMap.getGlobalOrd(segOrd, ord)] += count;
              }
            }
          }
        } else {
          // No ord mapping (e.g., single segment index):
          // just aggregate directly into counts:


          int doc = 0;
          while (doc < maxDoc && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
            segValues.setDocument(doc);
            int term = (int) segValues.nextOrd();
            while (term != SortedSetDocValues.NO_MORE_ORDS) {
              counts[term]++;
              term = (int) segValues.nextOrd();
            }
            ++doc;
          }
        }
      }

View Full Code Here

    if (reader instanceof AtomicReader) {
      topReader = (AtomicReader) reader;
    } else {
      topReader = new SlowCompositeReaderWrapper((CompositeReader) reader);
    }
    SortedSetDocValues dv = topReader.getSortedSetDocValues(field);
    if (dv == null) {
      throw new IllegalArgumentException("field \"" + field + "\" was not indexed with SortedSetDocValues");
    }
    if (dv.getValueCount() > Integer.MAX_VALUE) {
      throw new IllegalArgumentException("can only handle valueCount < Integer.MAX_VALUE; got " + dv.getValueCount());
    }
    valueCount = (int) dv.getValueCount();


    // TODO: we can make this more efficient if eg we can be
    // "involved" when OrdinalMap is being created?  Ie see
    // each term/ord it's assigning as it goes...
    String lastDim = null;
    int startOrd = -1;
    BytesRef spare = new BytesRef();


    // TODO: this approach can work for full hierarchy?;
    // TaxoReader can't do this since ords are not in
    // "sorted order" ... but we should generalize this to
    // support arbitrary hierarchy:
    for(int ord=0;ord<valueCount;ord++) {
      dv.lookupOrd(ord, spare);
      String[] components = spare.utf8ToString().split(separatorRegex, 2);
      if (components.length != 2) {
        throw new IllegalArgumentException("this class can only handle 2 level hierarchy (dim/value); got: " + spare.utf8ToString());
      }
      if (!components[0].equals(lastDim)) {

View Full Code Here

  }
  
  @Test
  public void testSortedSetDocValuesField() throws Exception {
    assumeTrue("default codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
    SortedSetDocValues dv = reader.getSortedSetDocValues(SORTED_SET_DV_FIELD);
    int maxDoc = reader.maxDoc();
    BytesRef bytes = new BytesRef();
    for (int i = 0; i < maxDoc; i++) {
      dv.setDocument(i);
      dv.lookupOrd(dv.nextOrd(), bytes);
      int value = sortedValues[i].intValue();
      assertEquals("incorrect sorted-set DocValues for doc " + i, Integer.valueOf(value).toString(), bytes.utf8ToString());
      dv.lookupOrd(dv.nextOrd(), bytes);
      assertEquals("incorrect sorted-set DocValues for doc " + i, Integer.valueOf(value + 1).toString(), bytes.utf8ToString());
      assertEquals(SortedSetDocValues.NO_MORE_ORDS, dv.nextOrd());
    }
  }

View Full Code Here

  }


  // TODO: this if DocTermsIndex was already created, we
  // should share it...
  public SortedSetDocValues getDocTermOrds(AtomicReader reader, String field) throws IOException {
    SortedSetDocValues dv = reader.getSortedSetDocValues(field);
    if (dv != null) {
      return dv;
    }
    
    SortedDocValues sdv = reader.getSortedDocValues(field);

View Full Code Here

        ordIndexInstances.put(field.number, ordIndexInstance);
      }
      ordIndex = ordIndexInstance;
    }
    
    return new SortedSetDocValues() {
      long offset;
      long endOffset;
      
      @Override
      public long nextOrd() {

View Full Code Here

    final Arc<Long> scratchArc = new Arc<Long>();
    final IntsRef scratchInts = new IntsRef();
    final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst); 
    final BytesRef ref = new BytesRef();
    final ByteArrayDataInput input = new ByteArrayDataInput();
    return new SortedSetDocValues() {
      long currentOrd;


      @Override
      public long nextOrd() {
        if (input.eof()) {

View Full Code Here

    NumericEntry entry = ordIndexes.get(field.number);
    IndexInput data = this.data.clone();
    data.seek(entry.offset);
    final MonotonicBlockPackedReader ordIndex = new MonotonicBlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
    
    return new SortedSetDocValues() {
      long offset;
      long endOffset;
      
      @Override
      public long nextOrd() {

View Full Code Here

    }
  }
  
  @Override
  public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
    SortedSetDocValues sortedSetDV = in.getSortedSetDocValues(field);
    if (sortedSetDV == null) {
      return null;
    } else {
      return new SortingSortedSetDocValues(sortedSetDV, docMap);
    }

View Full Code Here


    final IndexInput in = data.clone();
    final BytesRef scratch = new BytesRef();
    final DecimalFormat decoder = new DecimalFormat(field.pattern, new DecimalFormatSymbols(Locale.ROOT));
    
    return new SortedSetDocValues() {
      String[] currentOrds = new String[0];
      int currentIndex = 0;
      
      @Override
      public long nextOrd() {

View Full Code Here


    final IndexInput in = data.clone();
    final BytesRef scratch = new BytesRef();
    final DecimalFormat decoder = new DecimalFormat(field.pattern, new DecimalFormatSymbols(Locale.ROOT));
    
    return new SortedSetDocValues() {
      String[] currentOrds = new String[0];
      int currentIndex = 0;
      
      @Override
      public long nextOrd() {

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.index.SortedSetDocValues

org.apache.lucene.codecs.asserting.AssertingDocValuesFormat$AssertingDocValuesProducer

org.apache.lucene.codecs.cheapbastard.CheapBastardDocValuesProducer

org.apache.lucene.codecs.diskdv.DiskDocValuesProducer

org.apache.lucene.codecs.DocValuesConsumer

org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer

org.apache.lucene.codecs.lucene45.Lucene45DocValuesProducer

org.apache.lucene.codecs.memory.DirectDocValuesProducer

org.apache.lucene.codecs.memory.MemoryDocValuesProducer

org.apache.lucene.codecs.simpletext.SimpleTextDocValuesReader

org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.