Package it.unimi.dsi.mg4j.index

Examples of it.unimi.dsi.mg4j.index.IndexReader


    do {
      int frequency;
      int left = 0; // The left extreme of the current block
      long count = 0; // Number of documents/occurrences in the current block

      final IndexReader indexReader = index.getReader();
      long blockSize = total / blockSizeDivisor++; // The approximate size of a block
      IndexIterator indexIterator;
     
      for ( int i = k = 0; i < terms; i++ ) {
        indexIterator = indexReader.nextIterator();
        frequency = indexIterator.frequency();
        if ( ! index.hasPositions ) count += frequency;
        for ( int j = frequency; j-- != 0; ) {
          indexIterator.nextDocument();
          if ( index.hasPositions ) count += indexIterator.count();
        }
       
        if ( i == terms - 1 ) i++; // To fool the next
        if ( count >= blockSize && k < numberOfLocalIndices - 1 || i == terms ) {
          LOGGER.info( "New term interval [" + left + ".." + i + "] (" + termMap.list().get( left ) + " -> " + ( i == terms ? "" : termMap.list().get( i ) ) + ")" );
          cutPoint[ ++k ] = i;
          if ( i != terms ) cutPointTerm[ k ] = termMap.list().get( i );
          left = i;
          count = 0;
        }
      }
      indexReader.close();
      // In case we did not generate enough blocks, we try again with a smaller block size.
    } while( k < numberOfLocalIndices );
   
    return new ContiguousLexicalStrategy( cutPoint, cutPointTerm );
  }
View Full Code Here


  private static String firstBaseName;
  private static String secondBaseName;
  private static boolean textTerm =false;

  public static void testIndexIterator() throws IOException{
    IndexReader firstIndexReader =firstIndex.getReader();
    IndexReader secondIndexReader =secondIndex.getReader(1000);
    IndexIterator firstIterator = null;
    IndexIterator secondIterator = null;
   
    for(int i = 0;i<firstIndex.numberOfTerms;i++){
      try{
        System.out.println("term: " + i);
        firstIterator = firstIndexReader.documents(i);       
        secondIterator = !textTerm?secondIndexReader.documents(i):secondIndexReader.documents(firstIndex.termMap.list().get(i));       
       
        /** Compare hasNext*/
        Assert.assertEquals(firstIterator.hasNext(), secondIterator.hasNext());
       
        /** Compare frequency*/
        Assert.assertEquals(firstIterator.frequency(), secondIterator.frequency());               
       
        /** Compare positions & count*/     
        while(firstIterator.hasNext()){
          int fr = firstIterator.nextDocument();
          int sr = secondIterator.nextDocument();   
          Assert.assertEquals(fr,sr);               
          /** Compare count*/
          Assert.assertEquals(firstIterator.count(), secondIterator.count());
         
         
         
         
          int[] firstPos = new int[1000];
          int[] secondPos = new int[1000];
          int fRet = firstIterator.positions(firstPos);
          int sRet = secondIterator.positions(secondPos);
          System.out.println(fRet + "  " + sRet);
          Assert.assertTrue(fRet == sRet);
         
          for(int j = 0;j<fRet;j++)
            Assert.assertEquals(firstPos[j],secondPos[j]);       
        }
       
        /** Compare positions int[] positionArray()*/
        while(firstIterator.hasNext()){
          secondIterator.next();
          int []firstPos = firstIterator.positionArray();
          int []secondPos = secondIterator.positionArray();
          Assert.assertTrue(firstPos.length == secondPos.length);
          for(int j = 0;j<firstPos.length;j++)
            Assert.assertTrue(firstPos[j] == secondPos[j]);       
        }

        /** Compare IntIterator from positions() method */
        firstIterator = firstIndexReader.documents(i);
        secondIterator = !textTerm?secondIndexReader.documents(i):secondIndexReader.documents(firstIndex.termMap.list().get(i));       
        while(firstIterator.hasNext()){
          firstIterator.next();
          secondIterator.next();
 
          IntIterator firstIntIt = firstIterator.positions();
          IntIterator secondIntIt = secondIterator.positions();       
          while(firstIntIt.hasNext()){                 
            Assert.assertEquals(firstIntIt.nextInt(),secondIntIt.nextInt());
          }       
          Assert.assertEquals(firstIntIt.skip(2),secondIntIt.skip(2));       
          if(firstIntIt.hasNext()){
            Assert.assertEquals(firstIntIt.nextInt(),secondIntIt.nextInt());
          }       
          Assert.assertEquals(firstIntIt.skip(9999999),secondIntIt.skip(9999999));       
          if(firstIntIt.hasNext()){       
            Assert.assertEquals(firstIntIt.nextInt(),secondIntIt.nextInt());
          }
         
        }
       
        /** Compare IntervalIterator from Interval() method */
        firstIterator = firstIndexReader.documents(i);
        secondIterator = !textTerm?secondIndexReader.documents(i):secondIndexReader.documents(firstIndex.termMap.list().get(i));       
        while(firstIterator.hasNext()){
          firstIterator.next();
          secondIterator.next()
          /** Compare position IntIterator*/
          IntervalIterator firstIntervalIt = firstIterator.intervalIterator(firstIndex);
View Full Code Here

      }
     }
   

    if ( ! jsapResult.getBoolean( "noComp" ) ) {
      IndexReader additionalReader;
      IntLinkedOpenHashSet s0 = new IntLinkedOpenHashSet();
      IntOpenHashSet s1 = new IntOpenHashSet();
      IntAVLTreeSet s2 = new IntAVLTreeSet();
      IntIterator it;
      IndexIterator indexIterator, additionalIterator;
      it.unimi.dsi.mg4j.search.DocumentIterator documentIterator;
      int u = 0;
     
      try {
        for (i = 0; i < index.length; i++) {
          pl.expectedUpdates = numberOfTerms[ i ];
          pl.start("Verifying composite iterators in " + index[i] + "...");
          additionalReader = index[ i ].getReader();
         
          for (t = 0; t < numberOfTerms[ i ]; t++) {
            for (u = 0; u < numberOfTerms[ i ]; u++) {
              s0.clear();
              s1.clear();
              // TODO: in case we have positions, we should check them, too
              IntIterators.pour( termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t ), s0 );
              IntIterators.pour( termLists ? indexReader[ i ].documents( terms[ i ].get( u ) ) : indexReader[ i ].documents( u ), s1 );
              s0.retainAll( s1 );
              indexIterator =  termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t );
              additionalIterator = termLists ? additionalReader.documents( terms[ i ].get( u ) ) : additionalReader.documents( u );
              it = s0.iterator();
              documentIterator = AndDocumentIterator.getInstance( indexIterator, additionalIterator );
              for( int j = s0.size(); j-- != 0; ) if ( it.nextInt() != documentIterator.nextDocument() ) throw new AssertionError();
              if ( documentIterator.hasNext() ) throw new AssertionError();

              s2.clear();
              IntIterators.pour( termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t ), s2 );
              IntIterators.pour( termLists ? indexReader[ i ].documents( terms[ i ].get( u ) ) : indexReader[ i ].documents( u ), s2 );

              indexIterator =  termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t );
              additionalIterator = termLists ? additionalReader.documents( terms[ i ].get( u ) ) : additionalReader.documents( u );

              it = s2.iterator();
              documentIterator = OrDocumentIterator.getInstance( indexIterator, additionalIterator );
              for( int j = s2.size(); j-- != 0; ) if ( it.nextInt() != documentIterator.nextDocument() ) throw new AssertionError();
              if ( documentIterator.hasNext() ) throw new AssertionError();
           
            pl.update();
          }
          pl.done();
          additionalReader.close();
        }
      }
      catch( Throwable e  ) {
        System.err.println( "Exception during composite iterator test (index=" + index[ i ] + ", first term=" + t + ", second term =" + u + ")" );
        throw e;
View Full Code Here

    final int numTerms = index0.numberOfTerms;
    int document;
    int[] p0 = IntArrays.EMPTY_ARRAY, p1 = IntArrays.EMPTY_ARRAY;
    boolean hasCounts = index0.hasCounts, hasPositions = index0.hasPositions;
    final IndexReader reader0 = index0.getReader(), reader1 = index1.getReader();
    IndexIterator i0, i1;
    for ( int i = 0; i < numTerms; i++ ) {
      if ( terms != null ) {
        final CharSequence term = terms.next();
        i0 = reader0.documents( term );
        i1 = reader1.documents( term );
      }
      else {
        i0 = reader0.documents( i );
        i1 = reader1.documents( i );
      }

      while ( i0.hasNext() && i1.hasNext() ) {
        assertEquals( "term " + i, document = i0.nextDocument(), i1.nextDocument() );
        if ( hasCounts ) {
          assertEquals( "term " + i + ", document " + document, i0.count(), i1.count() );
          if ( i0.count() > p0.length ) p0 = new int[ i0.count() ];
          if ( i1.count() > p1.length ) p1 = new int[ i1.count() ];
          if ( hasPositions ) for ( int p = i0.count(); p-- != 0; )
            assertEquals( "term " + i + ", document " + document + ", position " + p, p0[ p ], p1[ p ] );
        }
      }

      assertEquals( "term " + i, i0.hasNext(), i1.hasNext() );
    }
    reader0.close();
    reader1.close();
  }
View Full Code Here

          for ( int[] p : l )
            occurrences += p.length - 1;
        }
        assertEquals( index[ i ].toString(), postings, index[ i ].numberOfPostings );
        assertEquals( occurrences, index[ i ].numberOfOccurrences );
        IndexReader indexReader = index[ i ].getReader();
        for ( MutableString term : new ObjectRBTreeSet<MutableString>( termMap[ i ].keySet() ).toArray( new MutableString[ termMap[ i ].size() ] ) ) {
          String msg = index[ i ] + ":" + term;
          IndexIterator indexIterator = indexReader.documents( term );
          ObjectArrayList<int[]> list = termMap[ i ].get( term );
          int k = 0;
          while ( indexIterator.hasNext() ) {
            assertEquals( msg, list.get( k )[ 0 ], indexIterator.nextDocument() ); // Document
                                                // pointer
            if ( index[ i ].hasCounts ) assertEquals( msg, list.get( k ).length - 1, indexIterator.count() ); // Count
            if ( index[ i ].hasPositions ) {
              final int[] position = indexIterator.positionArray();
              for ( int p = 0; p < indexIterator.count(); p++ )
                assertEquals( msg, list.get( k )[ p + 1 ], position[ p ] ); // Positions
            }
            k++;
          }
          assertEquals( k, list.size() ); // This implicitly checks the frequency
        }
        indexReader.close();
        break;
      case INT:
      case DATE:
        assertEquals( index[ i ].toString(), payloadPointers[ i ].size(), index[ i ].numberOfPostings );
        assertEquals( index[ i ].toString(), documentIndex != 0 ? 1 : 0, index[ i ].numberOfTerms );
View Full Code Here

TOP

Related Classes of it.unimi.dsi.mg4j.index.IndexReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.