Package org.apache.lucene.util

Examples of org.apache.lucene.util.BytesRefHash$MaxBytesLengthExceededException


  private static final BytesRef EMPTY = new BytesRef(BytesRef.EMPTY_BYTES);

  public SortedDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
    this.fieldInfo = fieldInfo;
    this.iwBytesUsed = iwBytesUsed;
    hash = new BytesRefHash(
        new ByteBlockPool(
            new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
            BytesRefHash.DEFAULT_CAPACITY,
            new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
    pending = new AppendingLongBuffer();
View Full Code Here


      if (boost <= 0.0f)
          throw new IllegalArgumentException("boost factor must be greater than 0.0");
      int numTokens = 0;
      int numOverlapTokens = 0;
      int pos = -1;
      final BytesRefHash terms;
      final SliceByteStartArray sliceArray;
      Info info = null;
      long sumTotalTermFreq = 0;
      int offset = 0;
      if ((info = fields.get(fieldName)) != null) {
        numTokens = info.numTokens;
        numOverlapTokens = info.numOverlapTokens;
        pos = info.lastPosition + positionIncrementGap;
        offset = info.lastOffset + offsetGap;
        terms = info.terms;
        boost *= info.boost;
        sliceArray = info.sliceArray;
        sumTotalTermFreq = info.sumTotalTermFreq;
      } else {
        sliceArray = new SliceByteStartArray(BytesRefHash.DEFAULT_CAPACITY);
        terms = new BytesRefHash(byteBlockPool, BytesRefHash.DEFAULT_CAPACITY, sliceArray);
      }

      if (!fieldInfos.containsKey(fieldName)) {
        fieldInfos.put(fieldName,
            new FieldInfo(fieldName, true, fieldInfos.size(), false, false, false, this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS , null, null, null));
      }
      TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
      PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
      OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
      BytesRef ref = termAtt.getBytesRef();
      stream.reset();
     
      while (stream.incrementToken()) {
        termAtt.fillBytesRef();
//        if (DEBUG) System.err.println("token='" + term + "'");
        numTokens++;
        final int posIncr = posIncrAttribute.getPositionIncrement();
        if (posIncr == 0)
          numOverlapTokens++;
        pos += posIncr;
        int ord = terms.add(ref);
        if (ord < 0) {
          ord = (-ord) - 1;
          postingsWriter.reset(sliceArray.end[ord]);
        } else {
          sliceArray.start[ord] = postingsWriter.startNewSlice();
View Full Code Here

    if (col.hasCutOff) {
      return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query);
    } else {
      final BooleanQuery bq = getTopLevelQuery();
      if (size > 0) {
        final BytesRefHash pendingTerms = col.pendingTerms;
        final int sort[] = pendingTerms.sort(col.termsEnum.getComparator());
        for(int i = 0; i < size; i++) {
          final int pos = sort[i];
          // docFreq is not used for constant score here, we pass 1
          // to explicitely set a fake value, so it's not calculated
          addClause(bq, new Term(query.field, pendingTerms.get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]);
        }
      }
      // Strip scores
      final Query result = new ConstantScoreQuery(bq);
      result.setBoost(query.getBoost());
View Full Code Here

  private int maxCount = 0;

  public SortedSetDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
    this.fieldInfo = fieldInfo;
    this.iwBytesUsed = iwBytesUsed;
    hash = new BytesRefHash(
        new ByteBlockPool(
            new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
            BytesRefHash.DEFAULT_CAPACITY,
            new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
    pending = new AppendingPackedLongBuffer(PackedInts.COMPACT);
View Full Code Here

  private static final int EMPTY_ORD = -1;

  public SortedDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
    this.fieldInfo = fieldInfo;
    this.iwBytesUsed = iwBytesUsed;
    hash = new BytesRefHash(
        new ByteBlockPool(
            new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
            BytesRefHash.DEFAULT_CAPACITY,
            new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
    pending = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
View Full Code Here

    this.termsHash = termsHash;
    bytesUsed = termsHash.bytesUsed;
    fieldState = docInverterPerField.fieldState;
    this.consumer = termsHash.consumer.addField(this, fieldInfo);
    PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, bytesUsed);
    bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts);
    streamCount = consumer.getStreamCount();
    numPostingInt = 2*streamCount;
    this.fieldInfo = fieldInfo;
    if (nextTermsHash != null)
      nextPerField = (TermsHashPerField) nextTermsHash.addField(docInverterPerField, fieldInfo);
View Full Code Here

      // by the impersonator, but we have to give it a chance to merge them to this
      cfg.setMergePolicy(newLogMergePolicy());
    }
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, cfg);
    int numDocs = atLeast(100);
    BytesRefHash hash = new BytesRefHash();
    Map<String, String> docToString = new HashMap<>();
    int maxLength = TestUtil.nextInt(random(), 1, 50);
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      doc.add(newTextField("id", "" + i, Field.Store.YES));
      String string = TestUtil.randomRealisticUnicodeString(random(), 1, maxLength);
      BytesRef br = new BytesRef(string);
      doc.add(new SortedDocValuesField("field", br));
      hash.add(br);
      docToString.put("" + i, string);
      w.addDocument(doc);
    }
    if (rarely()) {
      w.commit();
    }
    int numDocsNoValue = atLeast(10);
    for (int i = 0; i < numDocsNoValue; i++) {
      Document doc = new Document();
      doc.add(newTextField("id", "noValue", Field.Store.YES));
      w.addDocument(doc);
    }
    if (!defaultCodecSupportsDocsWithField()) {
      BytesRef bytesRef = new BytesRef();
      hash.add(bytesRef); // add empty value for the gaps
    }
    if (rarely()) {
      w.commit();
    }
    if (!defaultCodecSupportsDocsWithField()) {
      // if the codec doesnt support missing, we expect missing to be mapped to byte[]
      // by the impersonator, but we have to give it a chance to merge them to this
      w.forceMerge(1);
    }
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      String id = "" + i + numDocs;
      doc.add(newTextField("id", id, Field.Store.YES));
      String string = TestUtil.randomRealisticUnicodeString(random(), 1, maxLength);
      BytesRef br = new BytesRef(string);
      hash.add(br);
      docToString.put(id, string);
      doc.add(new SortedDocValuesField("field", br));
      w.addDocument(doc);
    }
    w.commit();
    IndexReader reader = w.getReader();
    SortedDocValues docValues = MultiDocValues.getSortedValues(reader, "field");
    int[] sort = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
    BytesRef expected = new BytesRef();
    BytesRef actual = new BytesRef();
    assertEquals(hash.size(), docValues.getValueCount());
    for (int i = 0; i < hash.size(); i++) {
      hash.get(sort[i], expected);
      docValues.lookupOrd(i, actual);
      assertEquals(expected.utf8ToString(), actual.utf8ToString());
      int ord = docValues.lookupTerm(expected);
      assertEquals(i, ord);
    }
View Full Code Here

 
  public void testRaw() throws Exception {
    int num = atLeast(10000);
   
    Comparator<BytesRef> comparator = BytesRef.getUTF8SortedAsUnicodeComparator();
    BytesRefHash sorted = new BytesRefHash();
    TermFreq[] unsorted = new TermFreq[num];
    byte[] buffer = new byte[0];
    ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);

    for (int i = 0; i < num; i++) {
      BytesRef spare;
      long weight;
      do {
        spare = new BytesRef(_TestUtil.randomUnicodeString(random));
        if (spare.length + 8 >= buffer.length) {
          buffer = ArrayUtil.grow(buffer, spare.length + 8);
        }
        output.reset(buffer);
        output.writeBytes(spare.bytes, spare.offset, spare.length);
        weight = random.nextLong();
        output.writeLong(weight);
       
      } while (sorted.add(new BytesRef(buffer, 0, output.getPosition())) < 0);
      unsorted[i] = new TermFreq(spare, weight);
    }
   
    // test the sorted iterator wrapper
    TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted), comparator, true);
    int[] sort = sorted.sort(comparator);
    int size = sorted.size();
    BytesRef spare = new BytesRef();
    for (int i = 0; i < size; i++) {
      sorted.get(sort[i], spare);
      spare.length -= 8; // sub the long value
      assertEquals(spare, wrapper.next());
      spare.offset = spare.offset + spare.length;
      spare.length = 8;
      assertEquals(asLong(spare), wrapper.weight());
View Full Code Here

        if(tippingPoint == 0) {
            _counter = null;
            _estimator = builder.build();
            _tipped = true;
        } else {
            _counter = new BytesRefHash();
        }
    }
View Full Code Here

     * @param in the StreamInput
     * @return a new BytesRefHash
     * @throws IOException
     */
    public static BytesRefHash deserialize(final StreamInput in) throws IOException {
        final BytesRefHash output = new BytesRefHash();
        final int entries = in.readVInt();
        byte[] scratch = null;
        for(int i = 0; i < entries; i++) {
            final int length = in.readVInt();
            // Reuse previous byte array if long enough, otherwise create new one
            if(scratch == null || scratch.length < length) {
                scratch = new byte[length];
            }
            in.readBytes(scratch, 0, length);
            output.add(new BytesRef(scratch, 0, length));
        }
        return output;
    }
View Full Code Here

TOP

Related Classes of org.apache.lucene.util.BytesRefHash$MaxBytesLengthExceededException

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.