Package org.apache.lucene.util

Examples of org.apache.lucene.util.OpenBitSet


 
  /** bit-sets - for each repeating pp, for each of its repeating terms, the term ordinal values is set */
  private ArrayList<OpenBitSet> ppTermsBitSets(PhrasePositions[] rpp, HashMap<Term,Integer> tord) {
    ArrayList<OpenBitSet> bb = new ArrayList<OpenBitSet>(rpp.length);
    for (PhrasePositions pp : rpp) {
      OpenBitSet b = new OpenBitSet(tord.size());
      Integer ord;
      for (Term t: pp.terms) {
        if ((ord=tord.get(t))!=null) {
          b.set(ord);
        }
      }
      bb.add(b);
    }
    return bb;
View Full Code Here


     */
    @Override
    public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
      final SortedSetDocValues docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), query.field);
      // Cannot use FixedBitSet because we require long index (ord):
      final OpenBitSet termSet = new OpenBitSet(docTermOrds.getValueCount());
      TermsEnum termsEnum = query.getTermsEnum(new Terms() {
       
        @Override
        public Comparator<BytesRef> getComparator() {
          return BytesRef.getUTF8SortedAsUnicodeComparator();
        }
       
        @Override
        public TermsEnum iterator(TermsEnum reuse) {
          return docTermOrds.termsEnum();
        }

        @Override
        public long getSumTotalTermFreq() {
          return -1;
        }

        @Override
        public long getSumDocFreq() {
          return -1;
        }

        @Override
        public int getDocCount() {
          return -1;
        }

        @Override
        public long size() {
          return -1;
        }

        @Override
        public boolean hasFreqs() {
          return false;
        }

        @Override
        public boolean hasOffsets() {
          return false;
        }

        @Override
        public boolean hasPositions() {
          return false;
        }
       
        @Override
        public boolean hasPayloads() {
          return false;
        }
      });
     
      assert termsEnum != null;
      if (termsEnum.next() != null) {
        // fill into a OpenBitSet
        do {
          termSet.set(termsEnum.ord());
        } while (termsEnum.next() != null);
      } else {
        return null;
      }
     
      return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
        @Override
        protected final boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException {
          docTermOrds.setDocument(doc);
          long ord;
          // TODO: we could track max bit set and early terminate (since they come in sorted order)
          while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
            if (termSet.get(ord)) {
              return true;
            }
          }
          return false;
        }
View Full Code Here

    this.fieldInfo = fieldInfo;
    this.bytes = new PagedBytes(BLOCK_BITS);
    this.bytesOut = bytes.getDataOutput();
    this.lengths = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
    this.iwBytesUsed = iwBytesUsed;
    this.docsWithField = new OpenBitSet();
    this.bytesUsed = docsWithFieldBytesUsed();
    iwBytesUsed.addAndGet(bytesUsed);
  }
View Full Code Here

  private final FieldInfo fieldInfo;
  private final boolean trackDocsWithField;

  public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed, boolean trackDocsWithField) {
    pending = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
    docsWithField = new OpenBitSet();
    bytesUsed = pending.ramBytesUsed() + docsWithFieldBytesUsed();
    this.fieldInfo = fieldInfo;
    this.iwBytesUsed = iwBytesUsed;
    iwBytesUsed.addAndGet(bytesUsed);
    this.trackDocsWithField = trackDocsWithField;
View Full Code Here

     */
    @Override
    public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
      final SortedDocValues fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader(), query.field);
      // Cannot use FixedBitSet because we require long index (ord):
      final OpenBitSet termSet = new OpenBitSet(fcsi.getValueCount());
      TermsEnum termsEnum = query.getTermsEnum(new Terms() {
       
        @Override
        public Comparator<BytesRef> getComparator() {
          return BytesRef.getUTF8SortedAsUnicodeComparator();
        }
       
        @Override
        public TermsEnum iterator(TermsEnum reuse) {
          return fcsi.termsEnum();
        }

        @Override
        public long getSumTotalTermFreq() {
          return -1;
        }

        @Override
        public long getSumDocFreq() {
          return -1;
        }

        @Override
        public int getDocCount() {
          return -1;
        }

        @Override
        public long size() {
          return -1;
        }

        @Override
        public boolean hasFreqs() {
          return false;
        }

        @Override
        public boolean hasOffsets() {
          return false;
        }

        @Override
        public boolean hasPositions() {
          return false;
        }
       
        @Override
        public boolean hasPayloads() {
          return false;
        }
      });
     
      assert termsEnum != null;
      if (termsEnum.next() != null) {
        // fill into a OpenBitSet
        do {
          long ord = termsEnum.ord();
          if (ord >= 0) {
            termSet.set(ord);
          }
        } while (termsEnum.next() != null);
      } else {
        return null;
      }
     
      return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
        @Override
        protected final boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException {
          int ord = fcsi.getOrd(doc);
          if (ord == -1) {
            return false;
          }
          return termSet.get(ord);
        }
      };
    }
View Full Code Here

    }
  }
 
  private static void checkSortedSetDocValues(String fieldName, AtomicReader reader, SortedSetDocValues dv, Bits docsWithField) {
    final long maxOrd = dv.getValueCount()-1;
    OpenBitSet seenOrds = new OpenBitSet(dv.getValueCount());
    long maxOrd2 = -1;
    for (int i = 0; i < reader.maxDoc(); i++) {
      dv.setDocument(i);
      long lastOrd = -1;
      long ord;
      if (docsWithField.get(i)) {
        int ordCount = 0;
        while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
          ordCount++;
          if (ord <= lastOrd) {
            throw new RuntimeException("ords out of order: " + ord + " <= " + lastOrd + " for doc: " + i);
          }
          if (ord < 0 || ord > maxOrd) {
            throw new RuntimeException("ord out of bounds: " + ord);
          }
          lastOrd = ord;
          maxOrd2 = Math.max(maxOrd2, ord);
          seenOrds.set(ord);
        }
        if (ordCount == 0) {
          throw new RuntimeException("dv for field: " + fieldName + " has no ordinals but is not marked missing for doc: " + i);
        }
      } else {
        long o = dv.nextOrd();
        if (o != SortedSetDocValues.NO_MORE_ORDS) {
          throw new RuntimeException("dv for field: " + fieldName + " is marked missing but has ord=" + o + " for doc: " + i);
        }
      }
    }
    if (maxOrd != maxOrd2) {
      throw new RuntimeException("dv for field: " + fieldName + " reports wrong maxOrd=" + maxOrd + " but this is not the case: " + maxOrd2);
    }
    if (seenOrds.cardinality() != dv.getValueCount()) {
      throw new RuntimeException("dv for field: " + fieldName + " has holes in its ords, valueCount=" + dv.getValueCount() + " but only used: " + seenOrds.cardinality());
    }
   
    BytesRef lastValue = null;
    BytesRef scratch = new BytesRef();
    for (long i = 0; i <= maxOrd; i++) {
View Full Code Here

      in.seek(termsStart);
      final BytesRef lastTerm = new BytesRef(10);
      long lastDocsStart = -1;
      int docFreq = 0;
      long totalTermFreq = 0;
      OpenBitSet visitedDocs = new OpenBitSet();
      final IntsRef scratchIntsRef = new IntsRef();
      while(true) {
        SimpleTextUtil.readLine(in, scratch);
        if (scratch.equals(END) || StringHelper.startsWith(scratch, FIELD)) {
          if (lastDocsStart != -1) {
            b.add(Util.toIntsRef(lastTerm, scratchIntsRef),
                  outputs.newPair(lastDocsStart,
                                  outputsInner.newPair((long) docFreq, totalTermFreq)));
            sumTotalTermFreq += totalTermFreq;
          }
          break;
        } else if (StringHelper.startsWith(scratch, DOC)) {
          docFreq++;
          sumDocFreq++;
          UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
          int docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
          visitedDocs.set(docID);
        } else if (StringHelper.startsWith(scratch, FREQ)) {
          UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+FREQ.length, scratch.length-FREQ.length, scratchUTF16);
          totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
        } else if (StringHelper.startsWith(scratch, TERM)) {
          if (lastDocsStart != -1) {
            b.add(Util.toIntsRef(lastTerm, scratchIntsRef), outputs.newPair(lastDocsStart,
                                                                            outputsInner.newPair((long) docFreq, totalTermFreq)));
          }
          lastDocsStart = in.getFilePointer();
          final int len = scratch.length - TERM.length;
          if (len > lastTerm.length) {
            lastTerm.grow(len);
          }
          System.arraycopy(scratch.bytes, TERM.length, lastTerm.bytes, 0, len);
          lastTerm.length = len;
          docFreq = 0;
          sumTotalTermFreq += totalTermFreq;
          totalTermFreq = 0;
          termCount++;
        }
      }
      docCount = (int) visitedDocs.cardinality();
      fst = b.finish();
      /*
      PrintStream ps = new PrintStream("out.dot");
      fst.toDot(ps);
      ps.close();
View Full Code Here

    OpenBitSet dels;
    OpenBitSet oldDels = null;

    public FakeDeleteIndexReader(IndexReader in) {
      super(in);
      dels = new OpenBitSet(in.maxDoc());
      if (in.hasDeletions()) {
        oldDels = new OpenBitSet(in.maxDoc());
        for (int i = 0; i < in.maxDoc(); i++) {
          if (in.isDeleted(i)) oldDels.set(i);
        }
        dels.or(oldDels);
      }
View Full Code Here

     * Just removes our overlaid deletions - does not undelete the original
     * deletions.
     */
    @Override
    protected void doUndeleteAll() throws CorruptIndexException, IOException {
      dels = new OpenBitSet(in.maxDoc());
      if (oldDels != null) {
        dels.or(oldDels);
      }
    }
View Full Code Here

  public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    if ( quadTreeCellsIds.size() == 0 ) {
      return null;
    }

    OpenBitSet matchedDocumentsIds = new OpenBitSet( reader.maxDoc() );
    Boolean found = false;
    for ( int i = 0; i < quadTreeCellsIds.size(); i++ ) {
      Term quadTreeCellTerm = new Term( fieldName, quadTreeCellsIds.get( i ) );
      TermDocs quadTreeCellsDocs = reader.termDocs( quadTreeCellTerm );
      if ( quadTreeCellsDocs != null ) {
        while ( quadTreeCellsDocs.next() ) {
          matchedDocumentsIds.fastSet( quadTreeCellsDocs.doc() );
          found = true;
        }
      }
    }
View Full Code Here

TOP

Related Classes of org.apache.lucene.util.OpenBitSet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.