Package org.apache.lucene.util

Examples of org.apache.lucene.util.BytesRefHash$MaxBytesLengthExceededException


      if (boost <= 0.0f)
          throw new IllegalArgumentException("boost factor must be greater than 0.0");
      int numTokens = 0;
      int numOverlapTokens = 0;
      int pos = -1;
      final BytesRefHash terms;
      final SliceByteStartArray sliceArray;
      Info info = null;
      long sumTotalTermFreq = 0;
      int offset = 0;
      if ((info = fields.get(fieldName)) != null) {
        numTokens = info.numTokens;
        numOverlapTokens = info.numOverlapTokens;
        pos = info.lastPosition + positionIncrementGap;
        offset = info.lastOffset + offsetGap;
        terms = info.terms;
        boost *= info.boost;
        sliceArray = info.sliceArray;
        sumTotalTermFreq = info.sumTotalTermFreq;
      } else {
        sliceArray = new SliceByteStartArray(BytesRefHash.DEFAULT_CAPACITY);
        terms = new BytesRefHash(byteBlockPool, BytesRefHash.DEFAULT_CAPACITY, sliceArray);
      }

      if (!fieldInfos.containsKey(fieldName)) {
        fieldInfos.put(fieldName,
            new FieldInfo(fieldName, true, fieldInfos.size(), false, false, false, this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS , null, null, -1, null));
      }
      TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
      PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
      OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
      BytesRef ref = termAtt.getBytesRef();
      stream.reset();
     
      while (stream.incrementToken()) {
        termAtt.fillBytesRef();
//        if (DEBUG) System.err.println("token='" + term + "'");
        numTokens++;
        final int posIncr = posIncrAttribute.getPositionIncrement();
        if (posIncr == 0)
          numOverlapTokens++;
        pos += posIncr;
        int ord = terms.add(ref);
        if (ord < 0) {
          ord = (-ord) - 1;
          postingsWriter.reset(sliceArray.end[ord]);
        } else {
          sliceArray.start[ord] = postingsWriter.startNewSlice();
View Full Code Here


      // by the impersonator, but we have to give it a chance to merge them to this
      cfg.setMergePolicy(newLogMergePolicy());
    }
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, cfg);
    int numDocs = atLeast(100);
    BytesRefHash hash = new BytesRefHash();
    Map<String, String> docToString = new HashMap<>();
    int maxLength = TestUtil.nextInt(random(), 1, 50);
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      doc.add(newTextField("id", "" + i, Field.Store.YES));
      String string = TestUtil.randomRealisticUnicodeString(random(), 1, maxLength);
      BytesRef br = new BytesRef(string);
      doc.add(new SortedDocValuesField("field", br));
      hash.add(br);
      docToString.put("" + i, string);
      w.addDocument(doc);
    }
    if (rarely()) {
      w.commit();
    }
    int numDocsNoValue = atLeast(10);
    for (int i = 0; i < numDocsNoValue; i++) {
      Document doc = new Document();
      doc.add(newTextField("id", "noValue", Field.Store.YES));
      w.addDocument(doc);
    }
    if (!defaultCodecSupportsDocsWithField()) {
      BytesRef bytesRef = new BytesRef();
      hash.add(bytesRef); // add empty value for the gaps
    }
    if (rarely()) {
      w.commit();
    }
    if (!defaultCodecSupportsDocsWithField()) {
      // if the codec doesnt support missing, we expect missing to be mapped to byte[]
      // by the impersonator, but we have to give it a chance to merge them to this
      w.forceMerge(1);
    }
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      String id = "" + i + numDocs;
      doc.add(newTextField("id", id, Field.Store.YES));
      String string = TestUtil.randomRealisticUnicodeString(random(), 1, maxLength);
      BytesRef br = new BytesRef(string);
      hash.add(br);
      docToString.put(id, string);
      doc.add(new SortedDocValuesField("field", br));
      w.addDocument(doc);
    }
    w.commit();
    IndexReader reader = w.getReader();
    SortedDocValues docValues = MultiDocValues.getSortedValues(reader, "field");
    int[] sort = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
    BytesRef expected = new BytesRef();
    assertEquals(hash.size(), docValues.getValueCount());
    for (int i = 0; i < hash.size(); i++) {
      hash.get(sort[i], expected);
      final BytesRef actual = docValues.lookupOrd(i);
      assertEquals(expected.utf8ToString(), actual.utf8ToString());
      int ord = docValues.lookupTerm(expected);
      assertEquals(i, ord);
    }
View Full Code Here

      if (boost <= 0.0f)
          throw new IllegalArgumentException("boost factor must be greater than 0.0");
      int numTokens = 0;
      int numOverlapTokens = 0;
      int pos = -1;
      final BytesRefHash terms;
      final SliceByteStartArray sliceArray;
      Info info = null;
      long sumTotalTermFreq = 0;
      if ((info = fields.get(fieldName)) != null) {
        numTokens = info.numTokens;
        numOverlapTokens = info.numOverlapTokens;
        pos = info.lastPosition + positionIncrementGap;
        terms = info.terms;
        boost *= info.boost;
        sliceArray = info.sliceArray;
        sumTotalTermFreq = info.sumTotalTermFreq;
      } else {
        sliceArray = new SliceByteStartArray(BytesRefHash.DEFAULT_CAPACITY);
        terms = new BytesRefHash(byteBlockPool, BytesRefHash.DEFAULT_CAPACITY, sliceArray);
      }

      if (!fieldInfos.containsKey(fieldName)) {
        fieldInfos.put(fieldName,
            new FieldInfo(fieldName, true, fieldInfos.size(), false, false, false, this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS , null, null, null));
      }
      TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
      PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
      OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
      BytesRef ref = termAtt.getBytesRef();
      stream.reset();
     
      while (stream.incrementToken()) {
        termAtt.fillBytesRef();
//        if (DEBUG) System.err.println("token='" + term + "'");
        numTokens++;
        final int posIncr = posIncrAttribute.getPositionIncrement();
        if (posIncr == 0)
          numOverlapTokens++;
        pos += posIncr;
        int ord = terms.add(ref);
        if (ord < 0) {
          ord = (-ord) - 1;
          postingsWriter.reset(sliceArray.end[ord]);
        } else {
          sliceArray.start[ord] = postingsWriter.startNewSlice();
View Full Code Here

  public void testRandomSortedBytes() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, cfg);
    int numDocs = atLeast(100);
    BytesRefHash hash = new BytesRefHash();
    Map<String, String> docToString = new HashMap<String, String>();
    int maxLength = _TestUtil.nextInt(random(), 1, 50);
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      doc.add(newTextField("id", "" + i, Field.Store.YES));
      String string = _TestUtil.randomRealisticUnicodeString(random(), 1, maxLength);
      BytesRef br = new BytesRef(string);
      doc.add(new SortedDocValuesField("field", br));
      hash.add(br);
      docToString.put("" + i, string);
      w.addDocument(doc);
    }
    if (rarely()) {
      w.commit();
    }
    int numDocsNoValue = atLeast(10);
    for (int i = 0; i < numDocsNoValue; i++) {
      Document doc = new Document();
      doc.add(newTextField("id", "noValue", Field.Store.YES));
      w.addDocument(doc);
    }
    BytesRef bytesRef = new BytesRef();
    hash.add(bytesRef); // add empty value for the gaps
    if (rarely()) {
      w.commit();
    }
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      String id = "" + i + numDocs;
      doc.add(newTextField("id", id, Field.Store.YES));
      String string = _TestUtil.randomRealisticUnicodeString(random(), 1, maxLength);
      BytesRef br = new BytesRef(string);
      hash.add(br);
      docToString.put(id, string);
      doc.add(new SortedDocValuesField("field", br));
      w.addDocument(doc);
    }
    w.commit();
    IndexReader reader = w.getReader();
    SortedDocValues docValues = MultiDocValues.getSortedValues(reader, "field");
    int[] sort = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
    BytesRef expected = new BytesRef();
    BytesRef actual = new BytesRef();
    assertEquals(hash.size(), docValues.getValueCount());
    for (int i = 0; i < hash.size(); i++) {
      hash.get(sort[i], expected);
      docValues.lookupOrd(i, actual);
      assertEquals(expected.utf8ToString(), actual.utf8ToString());
      int ord = docValues.lookupTerm(expected);
      assertEquals(i, ord);
    }
View Full Code Here

      if (boost <= 0.0f)
          throw new IllegalArgumentException("boost factor must be greater than 0.0");
      int numTokens = 0;
      int numOverlapTokens = 0;
      int pos = -1;
      final BytesRefHash terms;
      final SliceByteStartArray sliceArray;
      Info info = null;
      long sumTotalTermFreq = 0;
      if ((info = fields.get(fieldName)) != null) {
        numTokens = info.numTokens;
        numOverlapTokens = info.numOverlapTokens;
        pos = info.lastPosition + positionIncrementGap;
        terms = info.terms;
        boost *= info.boost;
        sliceArray = info.sliceArray;
        sumTotalTermFreq = info.sumTotalTermFreq;
      } else {
        sliceArray = new SliceByteStartArray(BytesRefHash.DEFAULT_CAPACITY);
        terms = new BytesRefHash(byteBlockPool, BytesRefHash.DEFAULT_CAPACITY, sliceArray);
      }

      if (!fieldInfos.containsKey(fieldName)) {
        fieldInfos.put(fieldName,
            new FieldInfo(fieldName, true, fieldInfos.size(), false, false, false, this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS , null, null, null));
      }
      TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
      PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
      OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
      BytesRef ref = termAtt.getBytesRef();
      stream.reset();
     
      while (stream.incrementToken()) {
        termAtt.fillBytesRef();
//        if (DEBUG) System.err.println("token='" + term + "'");
        numTokens++;
        final int posIncr = posIncrAttribute.getPositionIncrement();
        if (posIncr == 0)
          numOverlapTokens++;
        pos += posIncr;
        int ord = terms.add(ref);
        if (ord < 0) {
          ord = (-ord) - 1;
          postingsWriter.reset(sliceArray.end[ord]);
        } else {
          sliceArray.start[ord] = postingsWriter.startNewSlice();
View Full Code Here

      // by the impersonator, but we have to give it a chance to merge them to this
      cfg.setMergePolicy(newLogMergePolicy());
    }
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, cfg);
    int numDocs = atLeast(100);
    BytesRefHash hash = new BytesRefHash();
    Map<String, String> docToString = new HashMap<String, String>();
    int maxLength = _TestUtil.nextInt(random(), 1, 50);
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      doc.add(newTextField("id", "" + i, Field.Store.YES));
      String string = _TestUtil.randomRealisticUnicodeString(random(), 1, maxLength);
      BytesRef br = new BytesRef(string);
      doc.add(new SortedDocValuesField("field", br));
      hash.add(br);
      docToString.put("" + i, string);
      w.addDocument(doc);
    }
    if (rarely()) {
      w.commit();
    }
    int numDocsNoValue = atLeast(10);
    for (int i = 0; i < numDocsNoValue; i++) {
      Document doc = new Document();
      doc.add(newTextField("id", "noValue", Field.Store.YES));
      w.addDocument(doc);
    }
    if (!defaultCodecSupportsDocsWithField()) {
      BytesRef bytesRef = new BytesRef();
      hash.add(bytesRef); // add empty value for the gaps
    }
    if (rarely()) {
      w.commit();
    }
    if (!defaultCodecSupportsDocsWithField()) {
      // if the codec doesnt support missing, we expect missing to be mapped to byte[]
      // by the impersonator, but we have to give it a chance to merge them to this
      w.forceMerge(1);
    }
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      String id = "" + i + numDocs;
      doc.add(newTextField("id", id, Field.Store.YES));
      String string = _TestUtil.randomRealisticUnicodeString(random(), 1, maxLength);
      BytesRef br = new BytesRef(string);
      hash.add(br);
      docToString.put(id, string);
      doc.add(new SortedDocValuesField("field", br));
      w.addDocument(doc);
    }
    w.commit();
    IndexReader reader = w.getReader();
    SortedDocValues docValues = MultiDocValues.getSortedValues(reader, "field");
    int[] sort = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
    BytesRef expected = new BytesRef();
    BytesRef actual = new BytesRef();
    assertEquals(hash.size(), docValues.getValueCount());
    for (int i = 0; i < hash.size(); i++) {
      hash.get(sort[i], expected);
      docValues.lookupOrd(i, actual);
      assertEquals(expected.utf8ToString(), actual.utf8ToString());
      int ord = docValues.lookupTerm(expected);
      assertEquals(i, ord);
    }
View Full Code Here

  private int maxCount = 0;

  public SortedSetDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
    this.fieldInfo = fieldInfo;
    this.iwBytesUsed = iwBytesUsed;
    hash = new BytesRefHash(
        new ByteBlockPool(
            new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
            BytesRefHash.DEFAULT_CAPACITY,
            new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
    pending = new AppendingLongBuffer();
View Full Code Here

  public void testRandomSortedBytes() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, cfg);
    int numDocs = atLeast(100);
    BytesRefHash hash = new BytesRefHash();
    Map<String, String> docToString = new HashMap<String, String>();
    int maxLength = _TestUtil.nextInt(random(), 1, 50);
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      doc.add(newTextField("id", "" + i, Field.Store.YES));
      String string = _TestUtil.randomRealisticUnicodeString(random(), 1, maxLength);
      BytesRef br = new BytesRef(string);
      doc.add(new SortedDocValuesField("field", br));
      hash.add(br);
      docToString.put("" + i, string);
      w.addDocument(doc);
    }
    if (rarely()) {
      w.commit();
    }
    int numDocsNoValue = atLeast(10);
    for (int i = 0; i < numDocsNoValue; i++) {
      Document doc = new Document();
      doc.add(newTextField("id", "noValue", Field.Store.YES));
      w.addDocument(doc);
    }
    BytesRef bytesRef = new BytesRef();
    hash.add(bytesRef); // add empty value for the gaps
    if (rarely()) {
      w.commit();
    }
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      String id = "" + i + numDocs;
      doc.add(newTextField("id", id, Field.Store.YES));
      String string = _TestUtil.randomRealisticUnicodeString(random(), 1, maxLength);
      BytesRef br = new BytesRef(string);
      hash.add(br);
      docToString.put(id, string);
      doc.add(new SortedDocValuesField("field", br));
      w.addDocument(doc);
    }
    w.commit();
    IndexReader reader = w.getReader();
    SortedDocValues docValues = MultiDocValues.getSortedValues(reader, "field");
    int[] sort = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
    BytesRef expected = new BytesRef();
    BytesRef actual = new BytesRef();
    assertEquals(hash.size(), docValues.getValueCount());
    for (int i = 0; i < hash.size(); i++) {
      hash.get(sort[i], expected);
      docValues.lookupOrd(i, actual);
      assertEquals(expected.utf8ToString(), actual.utf8ToString());
      int ord = docValues.lookupTerm(expected);
      assertEquals(i, ord);
    }
View Full Code Here

      if (boost <= 0.0f)
          throw new IllegalArgumentException("boost factor must be greater than 0.0");
      int numTokens = 0;
      int numOverlapTokens = 0;
      int pos = -1;
      final BytesRefHash terms;
      final SliceByteStartArray sliceArray;
      Info info = null;
      long sumTotalTermFreq = 0;
      if ((info = fields.get(fieldName)) != null) {
        numTokens = info.numTokens;
        numOverlapTokens = info.numOverlapTokens;
        pos = info.lastPosition + positionIncrementGap;
        terms = info.terms;
        boost *= info.boost;
        sliceArray = info.sliceArray;
        sumTotalTermFreq = info.sumTotalTermFreq;
      } else {
        sliceArray = new SliceByteStartArray(BytesRefHash.DEFAULT_CAPACITY);
        terms = new BytesRefHash(byteBlockPool, BytesRefHash.DEFAULT_CAPACITY, sliceArray);
      }

      if (!fieldInfos.containsKey(fieldName)) {
        fieldInfos.put(fieldName,
            new FieldInfo(fieldName, true, fieldInfos.size(), false, false, false, this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS , null, null, null));
      }
      TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
      PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
      OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
      BytesRef ref = termAtt.getBytesRef();
      stream.reset();
     
      while (stream.incrementToken()) {
        termAtt.fillBytesRef();
//        if (DEBUG) System.err.println("token='" + term + "'");
        numTokens++;
        final int posIncr = posIncrAttribute.getPositionIncrement();
        if (posIncr == 0)
          numOverlapTokens++;
        pos += posIncr;
        int ord = terms.add(ref);
        if (ord < 0) {
          ord = (-ord) - 1;
          postingsWriter.reset(sliceArray.end[ord]);
        } else {
          sliceArray.start[ord] = postingsWriter.startNewSlice();
View Full Code Here

      return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query);
    } else if (size == 0) {
      return getTopLevelQuery();
    } else {
      final BooleanQuery bq = getTopLevelQuery();
      final BytesRefHash pendingTerms = col.pendingTerms;
      final int sort[] = pendingTerms.sort(col.termsEnum.getComparator());
      for(int i = 0; i < size; i++) {
        final int pos = sort[i];
        // docFreq is not used for constant score here, we pass 1
        // to explicitely set a fake value, so it's not calculated
        addClause(bq, new Term(query.field, pendingTerms.get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]);
      }
      // Strip scores
      final Query result = new ConstantScoreQuery(bq);
      result.setBoost(query.getBoost());
      return result;
View Full Code Here

TOP

Related Classes of org.apache.lucene.util.BytesRefHash$MaxBytesLengthExceededException

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.