Package org.apache.lucene.util

Examples of org.apache.lucene.util.FixedBitSet


  }
 
  private static void checkSortedDocValues(String fieldName, AtomicReader reader, SortedDocValues dv, Bits docsWithField) {
    checkBinaryDocValues(fieldName, reader, dv, docsWithField);
    final int maxOrd = dv.getValueCount()-1;
    FixedBitSet seenOrds = new FixedBitSet(dv.getValueCount());
    int maxOrd2 = -1;
    for (int i = 0; i < reader.maxDoc(); i++) {
      int ord = dv.getOrd(i);
      if (ord == -1) {
        if (docsWithField.get(i)) {
          throw new RuntimeException("dv for field: " + fieldName + " has -1 ord but is not marked missing for doc: " + i);
        }
      } else if (ord < -1 || ord > maxOrd) {
        throw new RuntimeException("ord out of bounds: " + ord);
      } else {
        if (!docsWithField.get(i)) {
          throw new RuntimeException("dv for field: " + fieldName + " is missing but has ord=" + ord + " for doc: " + i);
        }
        maxOrd2 = Math.max(maxOrd2, ord);
        seenOrds.set(ord);
      }
    }
    if (maxOrd != maxOrd2) {
      throw new RuntimeException("dv for field: " + fieldName + " reports wrong maxOrd=" + maxOrd + " but this is not the case: " + maxOrd2);
    }
    if (seenOrds.cardinality() != dv.getValueCount()) {
      throw new RuntimeException("dv for field: " + fieldName + " has holes in its ords, valueCount=" + dv.getValueCount() + " but only used: " + seenOrds.cardinality());
    }
    BytesRef lastValue = null;
    BytesRef scratch = new BytesRef();
    for (int i = 0; i <= maxOrd; i++) {
      dv.lookupOrd(i, scratch);
View Full Code Here


   * @lucene.experimental
   */
  public static Status.TermVectorStatus testTermVectors(AtomicReader reader, PrintStream infoStream, boolean verbose, boolean crossCheckTermVectors) {
    final Status.TermVectorStatus status = new Status.TermVectorStatus();
    final FieldInfos fieldInfos = reader.getFieldInfos();
    final Bits onlyDocIsDeleted = new FixedBitSet(1);
   
    try {
      if (infoStream != null) {
        infoStream.print("    test: term vectors........");
      }
View Full Code Here

    /** Return the new doc ID according to its old value. */
    public abstract int map(int old);

    /** Useful from an assert. */
    boolean isConsistent(int maxDoc) {
      final FixedBitSet targets = new FixedBitSet(maxDoc);
      for (int i = 0; i < maxDoc; ++i) {
        final int target = map(i);
        if (target < 0 || target >= maxDoc) {
          assert false : "out of range: " + target + " not in [0-" + maxDoc + "[";
          return false;
        } else if (targets.get(target)) {
          assert false : target + " is already taken (" + i + ")";
          return false;
        }
      }
      return true;
View Full Code Here

  @Test
  public void test() throws IOException {
    Random random = new Random(_seed);
    int numBits = random.nextInt(10000000);
    FixedBitSet fixedBitSet = new FixedBitSet(numBits);
    populate(random, numBits, fixedBitSet);
    String id = "id";
    String segmentName = "seg1";
    RAMDirectory directory = new RAMDirectory();
    IndexFileBitSet indexFileBitSet = new IndexFileBitSet(numBits, id, segmentName, directory);
    assertFalse(indexFileBitSet.exists());
    indexFileBitSet.create(fixedBitSet.iterator());
    indexFileBitSet.load();
    checkEquals(fixedBitSet.iterator(), indexFileBitSet.iterator(), numBits);
    indexFileBitSet.close();
   
    String[] listAll = directory.listAll();
    for (String s : listAll) {
      System.out.println(s + " " + directory.fileLength(s));
View Full Code Here

      }
    }

    @Override
    public DocIdSet getDocIdSet(IndexReader reader) {
      final FixedBitSet set = new FixedBitSet(reader.maxDoc());
      final int docBase = docBasePerSub.get(reader);
      final int limit = docBase+reader.maxDoc();
      for (;index < docs.length; index++) {
        final int docId = docs[index];
        if (docId > limit)
          break;
        if (docId >= docBase) {
          set.set(docId-docBase);
        }
      }
      return set.cardinality() == 0 ? null:set;
    }
View Full Code Here

    try {
      // if current term in enum is null, the enum is empty -> shortcut
      if (enumerator.term() == null)
        return DocIdSet.EMPTY_DOCIDSET;
      // else fill into a FixedBitSet
      final FixedBitSet bitSet = new FixedBitSet(reader.maxDoc());
      final int[] docs = new int[32];
      final int[] freqs = new int[32];
      TermDocs termDocs = reader.termDocs();
      try {
        int termCount = 0;
        do {
          Term term = enumerator.term();
          if (term == null)
            break;
          termCount++;
          termDocs.seek(term);
          while (true) {
            final int count = termDocs.read(docs, freqs);
            if (count != 0) {
              for(int i=0;i<count;i++) {
                bitSet.set(docs[i]);
              }
            } else {
              break;
            }
          }
View Full Code Here

   * @see org.apache.lucene.search.Filter#getDocIdSet(org.apache.lucene.index.IndexReader)
   */
  @Override
  public DocIdSet getDocIdSet(IndexReader reader) throws IOException
  {
    FixedBitSet result=new FixedBitSet(reader.maxDoc());
        TermDocs td = reader.termDocs();
        try
        {
            for (Iterator<Term> iter = terms.iterator(); iter.hasNext();)
            {
                Term term = iter.next();
                td.seek(term);
                while (td.next())
                {
                    result.set(td.doc());
                }
            }
        }
        finally
        {
View Full Code Here

  }
 
  private FixedBitSet correctBits(IndexReader reader) throws IOException
  {
   
    FixedBitSet bits=new FixedBitSet(reader.maxDoc()); //assume all are INvalid
    Term startTerm=new Term(fieldName);
    TermEnum te = reader.terms(startTerm);
    if(te!=null)
    {
      Term currTerm=te.term();
      while((currTerm!=null)&&(currTerm.field()==startTerm.field())) //term fieldnames are interned
      {
        int lastDoc=-1;
        //set non duplicates
        TermDocs td = reader.termDocs(currTerm);
        if(td.next())
        {
          if(keepMode==KM_USE_FIRST_OCCURRENCE)
          {
            bits.set(td.doc());
          }
          else
          {
            do
            {
              lastDoc=td.doc();
            }while(td.next());
            bits.set(lastDoc);
          }
        }
        if(!te.next())
        {
          break;
View Full Code Here

  }
 
  private FixedBitSet fastBits(IndexReader reader) throws IOException
  {
   
    FixedBitSet bits=new FixedBitSet(reader.maxDoc());
    bits.set(0,reader.maxDoc()); //assume all are valid
    Term startTerm=new Term(fieldName);
    TermEnum te = reader.terms(startTerm);
    if(te!=null)
    {
      Term currTerm=te.term();
     
      while((currTerm!=null)&&(currTerm.field()==startTerm.field())) //term fieldnames are interned
      {
        if(te.docFreq()>1)
        {
          int lastDoc=-1;
          //unset potential duplicates
          TermDocs td = reader.termDocs(currTerm);
          td.next();
          if(keepMode==KM_USE_FIRST_OCCURRENCE)
          {
            td.next();
          }
          do
          {
            lastDoc=td.doc();
            bits.clear(lastDoc);
          }while(td.next());
          if(keepMode==KM_USE_LAST_OCCURRENCE)
          {
            //restore the last bit
            bits.set(lastDoc);
          }         
        }
        if(!te.next())
        {
          break;
View Full Code Here

  private boolean openBitSetContains(int[] expectedDocs, FixedBitSet actual, int maxDoc) throws IOException {
    if (expectedDocs.length != actual.cardinality()) {
      return false;
    }

    FixedBitSet expected = new FixedBitSet(maxDoc);
    for (int expectedDoc : expectedDocs) {
      expected.set(expectedDoc);
    }

    int docId;
    DocIdSetIterator iterator = expected.iterator();
    while ((docId = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
      if (!actual.get(docId)) {
        return false;
      }
    }
View Full Code Here

TOP

Related Classes of org.apache.lucene.util.FixedBitSet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.