Package org.apache.lucene.util

Examples of org.apache.lucene.util.PagedBytes$PagedBytesDataInput


      CodecUtil.checkHeader(index, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_IDX,
                                   Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_START,
                                   Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT);

      final long totalBytes = index.readLong();
      final PagedBytes bytes = new PagedBytes(16);
      bytes.copy(data, totalBytes);
      final PagedBytes.Reader bytesReader = bytes.freeze(true);
      final PackedInts.Reader reader = PackedInts.getReader(index);
      if (data.getFilePointer() != data.length()) {
        throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")");
      }
      if (index.getFilePointer() != index.length()) {
        throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")");
      }
      ramBytesUsed.addAndGet(bytes.ramBytesUsed() + reader.ramBytesUsed());
      success = true;
      return new BinaryDocValues() {
        @Override
        public void get(int docID, BytesRef result) {
          long startAddress = reader.get(docID);
View Full Code Here


                                 Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);

    final int fixedLength = data.readInt();
    final int valueCount = index.readInt();

    PagedBytes bytes = new PagedBytes(16);
    bytes.copy(data, fixedLength * (long) valueCount);
    final PagedBytes.Reader bytesReader = bytes.freeze(true);
    final PackedInts.Reader reader = PackedInts.getReader(index);
    ramBytesUsed.addAndGet(bytes.ramBytesUsed() + reader.ramBytesUsed());

    return correctBuggyOrds(new SortedDocValues() {
      @Override
      public int getOrd(int docID) {
        return (int) reader.get(docID);
View Full Code Here

    CodecUtil.checkHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX,
                                 Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START,
                                 Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);

    long maxAddress = index.readLong();
    PagedBytes bytes = new PagedBytes(16);
    bytes.copy(data, maxAddress);
    final PagedBytes.Reader bytesReader = bytes.freeze(true);
    final PackedInts.Reader addressReader = PackedInts.getReader(index);
    final PackedInts.Reader ordsReader = PackedInts.getReader(index);

    final int valueCount = addressReader.size() - 1;
    ramBytesUsed.addAndGet(bytes.ramBytesUsed() + addressReader.ramBytesUsed() + ordsReader.ramBytesUsed());

    return correctBuggyOrds(new SortedDocValues() {
      @Override
      public int getOrd(int docID) {
        return (int)ordsReader.get(docID);
View Full Code Here

  }
 
  private BinaryDocValues loadBinary(FieldInfo field) throws IOException {
    BinaryEntry entry = binaries.get(field.number);
    data.seek(entry.offset);
    PagedBytes bytes = new PagedBytes(16);
    bytes.copy(data, entry.numBytes);
    final PagedBytes.Reader bytesReader = bytes.freeze(true);
    if (entry.minLength == entry.maxLength) {
      final int fixedLength = entry.minLength;
      ramBytesUsed.addAndGet(bytes.ramBytesUsed());
      return new BinaryDocValues() {
        @Override
        public void get(int docID, BytesRef result) {
          bytesReader.fillSlice(result, fixedLength * (long)docID, fixedLength);
        }
      };
    } else {
      final MonotonicBlockPackedReader addresses = new MonotonicBlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, maxDoc, false);
      ramBytesUsed.addAndGet(bytes.ramBytesUsed() + addresses.ramBytesUsed());
      return new BinaryDocValues() {
        @Override
        public void get(int docID, BytesRef result) {
          long startAddress = docID == 0 ? 0 : addresses.get(docID-1);
          long endAddress = addresses.get(docID);
View Full Code Here

    this.totalIndexInterval = totalIndexInterval;
    indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor;
    skipInterval = indexEnum.skipInterval;
    // this is only an inital size, it will be GCed once the build is complete
    long initialSize = (long) (tiiFileLength * 1.5) / indexDivisor;
    PagedBytes dataPagedBytes = new PagedBytes(estimatePageBits(initialSize));
    PagedBytesDataOutput dataOutput = dataPagedBytes.getDataOutput();

    final int bitEstimate = 1+MathUtil.log(tiiFileLength, 2);
    GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, PackedInts.DEFAULT);

    String currentField = null;
    List<String> fieldStrs = new ArrayList<String>();
    int fieldCounter = -1;
    for (int i = 0; indexEnum.next(); i++) {
      Term term = indexEnum.term();
      if (currentField == null || !currentField.equals(term.field())) {
        currentField = term.field();
        fieldStrs.add(currentField);
        fieldCounter++;
      }
      TermInfo termInfo = indexEnum.termInfo();
      indexToTerms.set(i, dataOutput.getPosition());
      dataOutput.writeVInt(fieldCounter);
      dataOutput.writeString(term.text());
      dataOutput.writeVInt(termInfo.docFreq);
      if (termInfo.docFreq >= skipInterval) {
        dataOutput.writeVInt(termInfo.skipOffset);
      }
      dataOutput.writeVLong(termInfo.freqPointer);
      dataOutput.writeVLong(termInfo.proxPointer);
      dataOutput.writeVLong(indexEnum.indexPointer);
      for (int j = 1; j < indexDivisor; j++) {
        if (!indexEnum.next()) {
          break;
        }
      }
    }

    fields = new Term[fieldStrs.size()];
    for (int i = 0; i < fields.length; i++) {
      fields[i] = new Term(fieldStrs.get(i));
    }
   
    dataPagedBytes.freeze(true);
    dataInput = dataPagedBytes.getDataInput();
    indexToDataOffset = indexToTerms.getMutable();

    ramBytesUsed = fields.length * (RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.shallowSizeOfInstance(Term.class))
        + dataPagedBytes.ramBytesUsed() + indexToDataOffset.ramBytesUsed();
  }
View Full Code Here

    CodecUtil.checkHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX,
                                 Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START,
                                 Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);

    long maxAddress = index.readLong();
    PagedBytes bytes = new PagedBytes(16);
    bytes.copy(data, maxAddress);
    final PagedBytes.Reader bytesReader = bytes.freeze(true);
    final PackedInts.Reader addressReader = PackedInts.getReader(index);
    final PackedInts.Reader ordsReader = PackedInts.getReader(index);

    final int valueCount = addressReader.size() - 1;
    ramBytesUsed.addAndGet(bytesReader.ramBytesUsed() + addressReader.ramBytesUsed() + ordsReader.ramBytesUsed());
View Full Code Here

      Terms terms = reader.terms(key.field);

      final float acceptableOverheadRatio = ((Float) key.custom).floatValue();

      final PagedBytes bytes = new PagedBytes(15);

      int startTermsBPV;

      final int termCountHardLimit;
      if (maxDoc == Integer.MAX_VALUE) {
        termCountHardLimit = Integer.MAX_VALUE;
      } else {
        termCountHardLimit = maxDoc+1;
      }

      // TODO: use Uninvert?
      if (terms != null) {
        // Try for coarse estimate for number of bits; this
        // should be an underestimate most of the time, which
        // is fine -- GrowableWriter will reallocate as needed
        long numUniqueTerms = terms.size();
        if (numUniqueTerms != -1L) {
          if (numUniqueTerms > termCountHardLimit) {
            // app is misusing the API (there is more than
            // one term per doc); in this case we make best
            // effort to load what we can (see LUCENE-2142)
            numUniqueTerms = termCountHardLimit;
          }

          startTermsBPV = PackedInts.bitsRequired(numUniqueTerms);
        } else {
          startTermsBPV = 1;
        }
      } else {
        startTermsBPV = 1;
      }

      MonotonicAppendingLongBuffer termOrdToBytesOffset = new MonotonicAppendingLongBuffer();
      final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio);

      int termOrd = 0;

      // TODO: use Uninvert?

      if (terms != null) {
        final TermsEnum termsEnum = terms.iterator(null);
        DocsEnum docs = null;

        while(true) {
          final BytesRef term = termsEnum.next();
          if (term == null) {
            break;
          }
          if (termOrd >= termCountHardLimit) {
            break;
          }

          termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term));
          docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
          while (true) {
            final int docID = docs.nextDoc();
            if (docID == DocIdSetIterator.NO_MORE_DOCS) {
              break;
            }
            // Store 1+ ord into packed bits
            docToTermOrd.set(docID, 1+termOrd);
          }
          termOrd++;
        }
      }
      termOrdToBytesOffset.freeze();

      // maybe an int-only impl?
      return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset, docToTermOrd.getMutable(), termOrd);
    }
View Full Code Here

      final float acceptableOverheadRatio = ((Float) key.custom).floatValue();

      final int termCountHardLimit = maxDoc;

      // Holds the actual term data, expanded.
      final PagedBytes bytes = new PagedBytes(15);

      int startBPV;

      if (terms != null) {
        // Try for coarse estimate for number of bits; this
        // should be an underestimate most of the time, which
        // is fine -- GrowableWriter will reallocate as needed
        long numUniqueTerms = terms.size();
        if (numUniqueTerms != -1L) {
          if (numUniqueTerms > termCountHardLimit) {
            numUniqueTerms = termCountHardLimit;
          }
          startBPV = PackedInts.bitsRequired(numUniqueTerms*4);
        } else {
          startBPV = 1;
        }
      } else {
        startBPV = 1;
      }

      final GrowableWriter docToOffset = new GrowableWriter(startBPV, maxDoc, acceptableOverheadRatio);
     
      // pointer==0 means not set
      bytes.copyUsingLengthPrefix(new BytesRef());

      if (terms != null) {
        int termCount = 0;
        final TermsEnum termsEnum = terms.iterator(null);
        DocsEnum docs = null;
        while(true) {
          if (termCount++ == termCountHardLimit) {
            // app is misusing the API (there is more than
            // one term per doc); in this case we make best
            // effort to load what we can (see LUCENE-2142)
            break;
          }

          final BytesRef term = termsEnum.next();
          if (term == null) {
            break;
          }
          final long pointer = bytes.copyUsingLengthPrefix(term);
          docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
          while (true) {
            final int docID = docs.nextDoc();
            if (docID == DocIdSetIterator.NO_MORE_DOCS) {
              break;
            }
            docToOffset.set(docID, pointer);
          }
        }
      }

      final PackedInts.Reader offsetReader = docToOffset.getMutable();
      if (setDocsWithField) {
        wrapper.setDocsWithField(reader, key.field, new Bits() {
          @Override
          public boolean get(int index) {
            return offsetReader.get(index) != 0;
          }

          @Override
          public int length() {
            return maxDoc;
          }
        });
      }
      // maybe an int-only impl?
      return new BinaryDocValuesImpl(bytes.freeze(true), offsetReader);
    }
View Full Code Here

    try {
      CodecUtil.checkHeader(input, Lucene40DocValuesFormat.BYTES_FIXED_STRAIGHT_CODEC_NAME,
                                   Lucene40DocValuesFormat.BYTES_FIXED_STRAIGHT_VERSION_START,
                                   Lucene40DocValuesFormat.BYTES_FIXED_STRAIGHT_VERSION_CURRENT);
      final int fixedLength = input.readInt();
      PagedBytes bytes = new PagedBytes(16);
      bytes.copy(input, fixedLength * (long)state.segmentInfo.getDocCount());
      final PagedBytes.Reader bytesReader = bytes.freeze(true);
      CodecUtil.checkEOF(input);
      success = true;
      ramBytesUsed.addAndGet(bytesReader.ramBytesUsed());
      return new BinaryDocValues() {

View Full Code Here

      index = dir.openInput(indexName, state.context);
      CodecUtil.checkHeader(index, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_IDX,
                                   Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_START,
                                   Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT);
      long totalBytes = index.readVLong();
      PagedBytes bytes = new PagedBytes(16);
      bytes.copy(data, totalBytes);
      final PagedBytes.Reader bytesReader = bytes.freeze(true);
      final PackedInts.Reader reader = PackedInts.getReader(index);
      CodecUtil.checkEOF(data);
      CodecUtil.checkEOF(index);
      success = true;
      ramBytesUsed.addAndGet(bytesReader.ramBytesUsed() + reader.ramBytesUsed());
View Full Code Here

TOP

Related Classes of org.apache.lucene.util.PagedBytes$PagedBytesDataInput

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.