Package org.apache.lucene.util.packed

Examples of org.apache.lucene.util.packed.MonotonicAppendingLongBuffer$Iterator


    sorter.sort(0, docs.length); // docs is now the newToOld mapping

    // The reason why we use MonotonicAppendingLongBuffer here is that it
    // wastes very little memory if the index is in random order but can save
    // a lot of memory if the index is already "almost" sorted
    final MonotonicAppendingLongBuffer newToOld = new MonotonicAppendingLongBuffer();
    for (int i = 0; i < maxDoc; ++i) {
      newToOld.add(docs[i]);
    }
    newToOld.freeze();

    for (int i = 0; i < maxDoc; ++i) {
      docs[(int) newToOld.get(i)] = i;
    } // docs is now the oldToNew mapping

    final MonotonicAppendingLongBuffer oldToNew = new MonotonicAppendingLongBuffer();
    for (int i = 0; i < maxDoc; ++i) {
      oldToNew.add(docs[i]);
    }
    oldToNew.freeze();
   
    return new Sorter.DocMap() {

      @Override
      public int oldToNew(int docID) {
        return (int) oldToNew.get(docID);
      }

      @Override
      public int newToOld(int docID) {
        return (int) newToOld.get(docID);
View Full Code Here


     */
    public OrdinalMap(Object owner, TermsEnum subs[]) throws IOException {
      // create the ordinal mappings by pulling a termsenum over each sub's
      // unique terms, and walking a multitermsenum over those
      this.owner = owner;
      globalOrdDeltas = new MonotonicAppendingLongBuffer(PackedInts.COMPACT);
      firstSegments = new AppendingPackedLongBuffer(PackedInts.COMPACT);
      ordDeltas = new MonotonicAppendingLongBuffer[subs.length];
      for (int i = 0; i < ordDeltas.length; i++) {
        ordDeltas[i] = new MonotonicAppendingLongBuffer();
      }
      long segmentOrds[] = new long[subs.length];
      ReaderSlice slices[] = new ReaderSlice[subs.length];
      TermsEnumIndex indexes[] = new TermsEnumIndex[slices.length];
      for (int i = 0; i < slices.length; i++) {
View Full Code Here

      diagnostics.put(SORTER_ID_PROP, sorter.getID());
      super.setInfo(info);
    }

    private MonotonicAppendingLongBuffer getDeletes(List<AtomicReader> readers) {
      MonotonicAppendingLongBuffer deletes = new MonotonicAppendingLongBuffer();
      int deleteCount = 0;
      for (AtomicReader reader : readers) {
        final int maxDoc = reader.maxDoc();
        final Bits liveDocs = reader.getLiveDocs();
        for (int i = 0; i < maxDoc; ++i) {
          if (liveDocs != null && !liveDocs.get(i)) {
            ++deleteCount;
          } else {
            deletes.add(deleteCount);
          }
        }
      }
      deletes.freeze();
      return deletes;
    }
View Full Code Here

      }
      if (docMap == null) {
        return super.getDocMap(mergeState);
      }
      assert mergeState.docMaps.length == 1; // we returned a singleton reader
      final MonotonicAppendingLongBuffer deletes = getDeletes(unsortedReaders);
      return new MergePolicy.DocMap() {
        @Override
        public int map(int old) {
          final int oldWithDeletes = old + (int) deletes.get(old);
          final int newWithDeletes = docMap.oldToNew(oldWithDeletes);
          return mergeState.docMaps[0].get(newWithDeletes);
        }
      };
    }
View Full Code Here

        }
      } else {
        startTermsBPV = 1;
      }

      MonotonicAppendingLongBuffer termOrdToBytesOffset = new MonotonicAppendingLongBuffer();
      final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio);

      int termOrd = 0;

      // TODO: use Uninvert?

      if (terms != null) {
        final TermsEnum termsEnum = terms.iterator(null);
        DocsEnum docs = null;

        while(true) {
          final BytesRef term = termsEnum.next();
          if (term == null) {
            break;
          }
          if (termOrd >= termCountHardLimit) {
            break;
          }

          termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term));
          docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
          while (true) {
            final int docID = docs.nextDoc();
            if (docID == DocIdSetIterator.NO_MORE_DOCS) {
              break;
            }
            // Store 1+ ord into packed bits
            docToTermOrd.set(docID, 1+termOrd);
          }
          termOrd++;
        }
      }
      termOrdToBytesOffset.freeze();

      // maybe an int-only impl?
      return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset, docToTermOrd.getMutable(), termOrd);
    }
View Full Code Here

      return build(maxDoc, liveDocs);
    }

    static DocMap build(final int maxDoc, final Bits liveDocs) {
      assert liveDocs != null;
      final MonotonicAppendingLongBuffer docMap = new MonotonicAppendingLongBuffer();
      int del = 0;
      for (int i = 0; i < maxDoc; ++i) {
        docMap.add(i - del);
        if (!liveDocs.get(i)) {
          ++del;
        }
      }
      final int numDeletedDocs = del;
      assert docMap.size() == maxDoc;
      return new DocMap() {

        @Override
        public int get(int docID) {
          if (!liveDocs.get(docID)) {
            return -1;
          }
          return (int) docMap.get(docID);
        }

        @Override
        public int maxDoc() {
          return maxDoc;
View Full Code Here

        }
      } else {
        startTermsBPV = 1;
      }

      MonotonicAppendingLongBuffer termOrdToBytesOffset = new MonotonicAppendingLongBuffer();
      final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio);

      int termOrd = 0;

      // TODO: use Uninvert?

      if (terms != null) {
        final TermsEnum termsEnum = terms.iterator(null);
        DocsEnum docs = null;

        while(true) {
          final BytesRef term = termsEnum.next();
          if (term == null) {
            break;
          }
          if (termOrd >= termCountHardLimit) {
            break;
          }

          termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term));
          docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
          while (true) {
            final int docID = docs.nextDoc();
            if (docID == DocIdSetIterator.NO_MORE_DOCS) {
              break;
            }
            // Store 1+ ord into packed bits
            docToTermOrd.set(docID, 1+termOrd);
          }
          termOrd++;
        }
      }
      termOrdToBytesOffset.freeze();

      // maybe an int-only impl?
      return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset, docToTermOrd.getMutable(), termOrd);
    }
View Full Code Here

      // unique terms, and walking a multitermsenum over those
      this.owner = owner;
      // even though we accept an overhead ratio, we keep these ones with COMPACT
      // since they are only used to resolve values given a global ord, which is
      // slow anyway
      globalOrdDeltas = new MonotonicAppendingLongBuffer(PackedInts.COMPACT);
      firstSegments = new AppendingPackedLongBuffer(PackedInts.COMPACT);
      final MonotonicAppendingLongBuffer[] ordDeltas = new MonotonicAppendingLongBuffer[subs.length];
      for (int i = 0; i < ordDeltas.length; i++) {
        ordDeltas[i] = new MonotonicAppendingLongBuffer(acceptableOverheadRatio);
      }
      long[] ordDeltaBits = new long[subs.length];
      long segmentOrds[] = new long[subs.length];
      ReaderSlice slices[] = new ReaderSlice[subs.length];
      TermsEnumIndex indexes[] = new TermsEnumIndex[slices.length];
      for (int i = 0; i < slices.length; i++) {
        slices[i] = new ReaderSlice(0, 0, i);
        indexes[i] = new TermsEnumIndex(subs[i], i);
      }
      MultiTermsEnum mte = new MultiTermsEnum(slices);
      mte.reset(indexes);
      long globalOrd = 0;
      while (mte.next() != null) {       
        TermsEnumWithSlice matches[] = mte.getMatchArray();
        for (int i = 0; i < mte.getMatchCount(); i++) {
          int segmentIndex = matches[i].index;
          long segmentOrd = matches[i].terms.ord();
          long delta = globalOrd - segmentOrd;
          // for each unique term, just mark the first segment index/delta where it occurs
          if (i == 0) {
            firstSegments.add(segmentIndex);
            globalOrdDeltas.add(delta);
          }
          // for each per-segment ord, map it back to the global term.
          while (segmentOrds[segmentIndex] <= segmentOrd) {
            ordDeltaBits[segmentIndex] |= delta;
            ordDeltas[segmentIndex].add(delta);
            segmentOrds[segmentIndex]++;
          }
        }
        globalOrd++;
      }
      firstSegments.freeze();
      globalOrdDeltas.freeze();
      for (int i = 0; i < ordDeltas.length; ++i) {
        ordDeltas[i].freeze();
      }
      // ordDeltas is typically the bottleneck, so let's see what we can do to make it faster
      segmentToGlobalOrds = new LongValues[subs.length];
      long ramBytesUsed = BASE_RAM_BYTES_USED + globalOrdDeltas.ramBytesUsed() + firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds);
      for (int i = 0; i < ordDeltas.length; ++i) {
        final MonotonicAppendingLongBuffer deltas = ordDeltas[i];
        if (ordDeltaBits[i] == 0L) {
          // segment ords perfectly match global ordinals
          // likely in case of low cardinalities and large segments
          segmentToGlobalOrds[i] = LongValues.IDENTITY;
        } else {
          final int bitsRequired = ordDeltaBits[i] < 0 ? 64 : PackedInts.bitsRequired(ordDeltaBits[i]);
          final long monotonicBits = deltas.ramBytesUsed() * 8;
          final long packedBits = bitsRequired * deltas.size();
          if (deltas.size() <= Integer.MAX_VALUE
              && packedBits <= monotonicBits * (1 + acceptableOverheadRatio)) {
            // monotonic compression mostly adds overhead, let's keep the mapping in plain packed ints
            final int size = (int) deltas.size();
            final PackedInts.Mutable newDeltas = PackedInts.getMutable(size, bitsRequired, acceptableOverheadRatio);
            final MonotonicAppendingLongBuffer.Iterator it = deltas.iterator();
            for (int ord = 0; ord < size; ++ord) {
              newDeltas.set(ord, it.next());
            }
            assert !it.hasNext();
            segmentToGlobalOrds[i] = new LongValues() {
              @Override
              public long get(long ord) {
                return ord + newDeltas.get((int) ord);
              }
            };
            ramBytesUsed += newDeltas.ramBytesUsed();
          } else {
            segmentToGlobalOrds[i] = new LongValues() {
              @Override
              public long get(long ord) {
                return ord + deltas.get(ord);
              }
            };
            ramBytesUsed += deltas.ramBytesUsed();
          }
          ramBytesUsed += RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds[i]);
        }
      }
      this.ramBytesUsed = ramBytesUsed;
View Full Code Here

      writeSequence();
      final byte[] data = Arrays.copyOf(out.bytes, out.length);

      // Now build the index
      final int valueCount = (numSequences - 1) / indexInterval + 1;
      final MonotonicAppendingLongBuffer indexPositions, indexWordNums;
      if (valueCount <= 1) {
        indexPositions = indexWordNums = SINGLE_ZERO_BUFFER;
      } else {
        final int pageSize = 128;
        final int initialPageCount = (valueCount + pageSize - 1) / pageSize;
        final MonotonicAppendingLongBuffer positions = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT);
        final MonotonicAppendingLongBuffer wordNums = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT);

        positions.add(0L);
        wordNums.add(0L);
        final Iterator it = new Iterator(data, cardinality, Integer.MAX_VALUE, SINGLE_ZERO_BUFFER, SINGLE_ZERO_BUFFER);
        assert it.in.getPosition() == 0;
        assert it.wordNum == -1;
        for (int i = 1; i < valueCount; ++i) {
          // skip indexInterval sequences
          for (int j = 0; j < indexInterval; ++j) {
            final boolean readSequence = it.readSequence();
            assert readSequence;
            it.skipDirtyBytes();
          }
          final int position = it.in.getPosition();
          final int wordNum = it.wordNum;
          positions.add(position);
          wordNums.add(wordNum + 1);
        }
        positions.freeze();
        wordNums.freeze();
        indexPositions = positions;
        indexWordNums = wordNums;
      }

      return new WAH8DocIdSet(data, cardinality, indexInterval, indexPositions, indexWordNums);
View Full Code Here

      encodeBlock();
      final byte[] dataArr = Arrays.copyOf(data.bytes, data.length + MAX_BYTE_BLOCK_COUNT);

      final int indexSize = (numBlocks - 1) / indexInterval + 1;
      final MonotonicAppendingLongBuffer docIDs, offsets;
      if (indexSize <= 1) {
        docIDs = offsets = SINGLE_ZERO_BUFFER;
      } else {
        final int pageSize = 128;
        final int initialPageCount = (indexSize + pageSize - 1) / pageSize;
        docIDs = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT);
        offsets = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT);
        // Now build the index
        final Iterator it = new Iterator(dataArr, cardinality, Integer.MAX_VALUE, SINGLE_ZERO_BUFFER, SINGLE_ZERO_BUFFER);
        index:
        for (int k = 0; k < indexSize; ++k) {
          docIDs.add(it.docID() + 1);
          offsets.add(it.offset);
          for (int i = 0; i < indexInterval; ++i) {
            it.skipBlock();
            if (it.docID() == DocIdSetIterator.NO_MORE_DOCS) {
              break index;
            }
          }
        }
        docIDs.freeze();
        offsets.freeze();
      }

      return new PForDeltaDocIdSet(dataArr, cardinality, indexInterval, docIDs, offsets);
    }
View Full Code Here

TOP

Related Classes of org.apache.lucene.util.packed.MonotonicAppendingLongBuffer$Iterator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.