Examples of it.unimi.dsi.mg4j.index.IndexIterator

it.unimi.dsi.mg4j.index.IndexIterator
An iterator over an inverted list.
An index iterator scans the inverted list of an indexed term. Each integer returned by {@link DocumentIterator#nextDocument() nextDocument()} is the index of a document containing the term. If the index contains counts, they can be obtained after each call to {@link #nextDocument()} using {@link #count()}. Then, if the index contains positions they can be obtained as an array using {@link #positionArray()}, as an iterator using {@link #positions()}, or stored into an array using {@link #positions(int[])}.
Note that this interface extends {@link it.unimi.dsi.mg4j.search.DocumentIterator}. The intervals returned for a document are exactly length-one intervals corresponding to the positions returned by {@link #positions()}. If the index to which an instance of this class refers does not contain positions, an {@link UnsupportedOperationException}will be thrown.
Additionally, this interface strengthens {@link DocumentIterator#weight(double)} so thatit {@linkplain #weight(double) returns an index iterator}.

          pl.start( "Verifying sequentially index " + index[ i ] + "..." );


          if ( allBitStreamIndices ) {
            for ( t = 0; t < numberOfTerms[ i ]; t++ ) {
              pl.update();
              IndexIterator indexIterator = indexReader[ i ].nextIterator();
              indexFrequency = indexIterator.frequency();
              numberOfPostings += indexFrequency;
              if ( frequencies[ i ] != null && indexFrequency != ( f = frequencies[ i ].readGamma() ) ) {
                System.err.println( "Error in frequency for term " + t + ": expected " + f + " documents, found " + indexFrequency );
                return;
              }


              while ( indexFrequency-- != 0 ) {
                p = indexIterator.nextDocument();
                if (index[i].hasCounts) count[i][p] += indexIterator.count();
                if (index[i].hasPositions) indexIterator.positionArray(); // Just to force reading in high-performance indices
              }
              if ( indexIterator.nextDocument() != -1 ) throw new AssertionError( "nextDocument() is not -1 after exhaustive iteration" );
            }
            
            // Check document sizes
            if ( ! isVirtual && ( (BitStreamIndex) index[ i ] ).sizes != null && index[ i ].hasCounts )
              for ( p = 0; p < index[ i ].numberOfDocuments; p++ )
                if ( index[ i ].sizes.getInt( p ) != count[ i ][ p ] )
                  System.err.println( "Document " + p + " has size " + ( (BitStreamIndex) index[ i ] ).sizes.getInt( p ) + " but " + count[ i ][ p ] + " occurrences have been stored." );
            
          }
          else { // Non-bitstream indices
            for (t = 0; t < numberOfTerms[ i ]; t++) {
              pl.update();
              IndexIterator indexIterator = termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t );
              indexFrequency = indexIterator.frequency();
              numberOfPostings += indexFrequency;
              if (frequencies[i] != null && indexFrequency != (f = frequencies[i].readGamma())) {
                System.err.println("Error in frequency for term " + t
                    + ": expected " + f + " documents, found "
                    + indexFrequency);
                return;
              }
              
              int prevp = -1;
              while (indexFrequency-- != 0) {
                p = indexIterator.nextDocument();
                if ( prevp >= p ) throw new AssertionError( "previous pointer: " + prevp + "; current pointer: " + p );
                prevp = p;
                if (index[i].hasCounts) count[i][p] += indexIterator.count();
              }
            }
          }
          pl.done();
          
          if ( ! isVirtual && numberOfPostings != index[ i ].numberOfPostings ) System.err.println( "Index declares " + index[ i ].numberOfPostings + " postings, but we found " + numberOfPostings );
          long numberOfOccurrences = 0;
          if ( index[ i ].hasCounts ) {
            for ( p = 0; p < index[ i ].numberOfDocuments; p++ ) numberOfOccurrences += count[ i ][ p ];
            if ( numberOfOccurrences != index[ i ].numberOfOccurrences ) System.err.println( "Index declares " + index[ i ].numberOfOccurrences + " occurrences, but we found " + numberOfOccurrences );
          }
        }
      } catch ( Exception e ) {
        System.err.println( "Exception while scanning sequentially term " + t + " of index " + index[ i ] );
        System.err.println( "Term frequency was " + f + " and position " + ( f - indexFrequency - 1 ) );
        throw e;
      }
    }
  
    IntArrayList l = new IntArrayList();
    ObjectArrayList<int[]> positions = new ObjectArrayList<int[]>();
    
    if ( ! jsapResult.getBoolean( "noSkip" ) ) {
      int start = 0, end = 0, result;
      try {
        for (i = 0; i < index.length; i++) {
          
          pl.expectedUpdates = numberOfTerms[ i ];
          pl.start("Verifying all skips in " + index[i] + "...");


          for (t = 0; t < numberOfTerms[ i ]; t++) {
            l.clear();
            positions.clear();
            IndexIterator documents = termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t );
            int d;
            while( ( d = documents.nextDocument() ) != -1 ) {
              l.add( d );
              if ( index[ i ].hasPositions ) positions.add( ArrayUtils.subarray( documents.positionArray(), 0, documents.count() ) );
            }
            
            for( start = 0; start < l.size(); start++ ) {
              for( end = start + 1; end < l.size(); end++ ) {
                IndexIterator indexIterator = termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t );
                
                result = indexIterator.skipTo( l.getInt( start ) );
                if ( indexIterator.document() != l.getInt( start ) || result != l.getInt( start ) ) throw new AssertionError( "Trying to skip to document " + l.getInt( start ) + " (term " + t + ") moved to " + indexIterator.document() + "(skipTo() returned " + result + ")" );
                result = indexIterator.skipTo( l.getInt( end ) );
                if ( indexIterator.document() != l.getInt( end ) || result != l.getInt( end ) ) throw new AssertionError( "Trying to skip to document " + l.getInt( end ) + " (term " + t + ") after a skip to " + start + " moved to " + indexIterator.document() + "(skipTo() returned " + result + ")" );
                
                if ( index[ i ].hasPositions ) {
                  // This catches wrong state reconstruction after skips.
                  indexIterator = termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t );
                  indexIterator.skipTo( l.getInt( start ) );
                  if ( indexIterator.document() != l.getInt( start ) ) throw new AssertionError(indexIterator.document() + " != " + l.getInt( start ) );
                  if ( indexIterator.count() != positions.get( start ).length ) throw new AssertionError(indexIterator.count() + " != " + positions.get( start ).length );
                  if ( ! Arrays.equals( positions.get( start ), ArrayUtils.subarray( indexIterator.positionArray(), 0, indexIterator.count() ) )
                     ) throw new AssertionError(Arrays.toString( positions.get( start ) ) + "!=" + Arrays.toString( ArrayUtils.subarray( indexIterator.positionArray(), 0, indexIterator.count() ) ) );
                  indexIterator.skipTo( l.getInt( end ) );
                  if ( indexIterator.document() != l.getInt( end )  ) throw new AssertionError(indexIterator.document() + " != " + l.getInt( end ) );
                  if ( indexIterator.count() != positions.get( end ).length ) throw new AssertionError(indexIterator.count() + " != " + positions.get( end ).length );
                  if ( ! Arrays.equals( positions.get( end ), ArrayUtils.subarray( indexIterator.positionArray(), 0, indexIterator.count() ) )
                     ) throw new AssertionError(Arrays.toString( positions.get( end ) ) + "!=" + Arrays.toString( ArrayUtils.subarray( indexIterator.positionArray(), 0, indexIterator.count() ) ) );
                }
                
              }
              
              IndexIterator indexIterator = termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t );
              
              result = indexIterator.skipTo( l.getInt( start ) );
              if ( indexIterator.document() != l.getInt( start ) || result != l.getInt( start ) ) throw new AssertionError("Trying to skip to document " + l.getInt( start ) + " (term " + t + ") moved to " + indexIterator.document() + "(skipTo() returned " + result + ")" );
              result = indexIterator.skipTo( Integer.MAX_VALUE );
              if ( indexIterator.hasNext() || result != Integer.MAX_VALUE ) throw new AssertionError("Trying to skip beyond end of list (term " + t + ") after a skip to " + start + " returned " + result + " (hasNext()=" + indexIterator.hasNext() + ")" );
              
              
            }
            pl.update();
          }
          pl.done();
        }
      }
      catch( Throwable e  ) {
        System.err.println( "Exception during all-skip test (index=" + index[ i ] + ", term=" + t + ", start=" + start + ", end=" + end + ")" );
        throw e;
      }
     }
    


    if ( ! jsapResult.getBoolean( "noComp" ) ) {
      IndexReader additionalReader;
      IntLinkedOpenHashSet s0 = new IntLinkedOpenHashSet();
      IntOpenHashSet s1 = new IntOpenHashSet();
      IntAVLTreeSet s2 = new IntAVLTreeSet();
      IntIterator it;
      IndexIterator indexIterator, additionalIterator;
      it.unimi.dsi.mg4j.search.DocumentIterator documentIterator;
      int u = 0;
      
      try {
        for (i = 0; i < index.length; i++) {
          pl.expectedUpdates = numberOfTerms[ i ];
          pl.start("Verifying composite iterators in " + index[i] + "...");
          additionalReader = index[ i ].getReader();
          
          for (t = 0; t < numberOfTerms[ i ]; t++) {
            for (u = 0; u < numberOfTerms[ i ]; u++) {
              s0.clear();
              s1.clear();
              // TODO: in case we have positions, we should check them, too
              IntIterators.pour( termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t ), s0 );
              IntIterators.pour( termLists ? indexReader[ i ].documents( terms[ i ].get( u ) ) : indexReader[ i ].documents( u ), s1 );
              s0.retainAll( s1 );
              indexIterator =  termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t );
              additionalIterator = termLists ? additionalReader.documents( terms[ i ].get( u ) ) : additionalReader.documents( u );
              it = s0.iterator();
              documentIterator = AndDocumentIterator.getInstance( indexIterator, additionalIterator );
              for( int j = s0.size(); j-- != 0; ) if ( it.nextInt() != documentIterator.nextDocument() ) throw new AssertionError();
              if ( documentIterator.hasNext() ) throw new AssertionError();


              s2.clear();
              IntIterators.pour( termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t ), s2 );
              IntIterators.pour( termLists ? indexReader[ i ].documents( terms[ i ].get( u ) ) : indexReader[ i ].documents( u ), s2 );


              indexIterator =  termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t );
              additionalIterator = termLists ? additionalReader.documents( terms[ i ].get( u ) ) : additionalReader.documents( u );


              it = s2.iterator();
              documentIterator = OrDocumentIterator.getInstance( indexIterator, additionalIterator ); 
              for( int j = s2.size(); j-- != 0; ) if ( it.nextInt() != documentIterator.nextDocument() ) throw new AssertionError();
              if ( documentIterator.hasNext() ) throw new AssertionError();
            }  
            pl.update();
          }
          pl.done();
          additionalReader.close();
        }
      }
      catch( Throwable e  ) {
        System.err.println( "Exception during composite iterator test (index=" + index[ i ] + ", first term=" + t + ", second term =" + u + ")" );
        throw e;
      }  
    }
    
    if ( ! isVirtual && jsapResult.getBoolean( "random" ) ) {
      
      // Random access scan
      pl.expectedUpdates = index[ 0 ].numberOfDocuments;
      pl.itemsName = "documents";
      pl.start( "Verifying random access..." );


      if ( allBitStreamIndices ) {
        it.unimi.dsi.mg4j.document.DocumentIterator documentIterator = documentSequence.iterator();
        Document document;
        Reader reader;
        WordReader wordReader;
        
        final MutableString word = new MutableString(), nonWord = new MutableString();
        
        int docCounter = 0;
        
        while( ( document = documentIterator.nextDocument() ) != null ) {
          currDoc = permutation != null ? permutation[ docCounter ] : docCounter;


          for( i = 0; i < index.length; i++ ) {
            Object content = document.content( stem || index[ i ].field == null ? indexedField[ i ] : factory.fieldIndex( index[ i ].field ) );
            if ( index[ i ].hasPayloads ) {
              // TODO: write tests for the other case
              if ( allBitStreamIndices ) {
                IndexIterator indexIterator = indexReader[ i ].documents( 0 );
                int pointer = indexIterator.skipTo( currDoc );
                if ( pointer == currDoc ) {
                  Payload payload = indexIterator.payload();
                  if ( ! payload.get().equals( content ) ) LOGGER.error( index[ i ] + ": Document " + currDoc + " has payload " + content + " but the index says " + payload );  
                }
                else LOGGER.error( index[ i ] + ": Document " + currDoc + " does not appear in the inverted list of term " + t );
              }
              else {
                IndexIterator indexIterator = indexReader[ i ].documents(  0  );
                if ( indexIterator.skipTo( currDoc ) == currDoc ) {
                  if ( ! indexIterator.payload().get().equals( content ) )
                    LOGGER.error( index[ i ] + ": Document " + currDoc + " has payload " + content + " but the index says " + indexIterator.payload().get() );
                } 
                else LOGGER.error( index[ i ] + ": Document " + currDoc + " does not appear in the inverted list of term " + t );
              }
            }
            else {
              // text index
              pos = 0;
              termsInDoc[ i ].clear();
              reader = (Reader)content;
              wordReader = document.wordReader( stem || index[ i ].field == null ? indexedField[ i ] : factory.fieldIndex( index[ i ].field ) );
              wordReader.setReader( reader );
              while( wordReader.next( word, nonWord ) ) {
                if ( word.length() == 0 || index[ i ].termProcessor != null && ! index[ i ].termProcessor.processTerm( word ) ) continue;
                if ( ( t = (int)( (BitStreamIndex)index[ i ] ).termMap.getLong( word ) ) == -1 ) LOGGER.error( index[ i ] + ": Could not find term " + word + " in term index" );
                else {
                  if ( index[ i ].hasCounts ) termsInDoc[ i ].put( t, termsInDoc[ i ].get( t ) + 1 );
                  if ( index[ i ].hasPositions ) wordInPos[ i ][ pos++ ] = t;
                }
              }


              if ( allBitStreamIndices ) {
                for( IntIterator x = termsInDoc[ i ].keySet().iterator(); x.hasNext(); ) {
                  t = x.nextInt();


                  IndexIterator indexIterator = indexReader[ i ].documents( t );


                  int pointer = indexIterator.skipTo( currDoc );
                  if ( pointer == currDoc ) {
                    if ( index[ i ].hasCounts ) {
                      int c = indexIterator.count();
                      if ( termsInDoc[ i ].get( t ) !=  c ) 
                        LOGGER.error( index[ i ] + ": The count for term " + t + " in document " + currDoc + " is " + c + " instead of " + termsInDoc[ i ].get( t ) );
                      else {
                        if ( index[ i ].hasPositions ) {
                          indexIterator.positions( occ[ i ] );


                          for( int j = 0; j < c; j++ ) 
                            if ( wordInPos[ i ][ occ[ i ][ j ] ] != t )  
                              LOGGER.error( index[ i ] + ": The occurrence of index " + i + " of term " + t + " (position " + occ[ i ] +") in document " + currDoc + " is occupied instead by term " + wordInPos[ i ][ occ[ i ][ j ] ] );
                        }
                      }
                    } 
                  }
                  else LOGGER.error( index[ i ] + ": Document " + currDoc + " does not appear in the inverted list of term " + t + "(skipTo returned " + pointer + ")" );
                }
              }
              else {
                for( IntIterator x = termsInDoc[ i ].keySet().iterator(); x.hasNext(); ) {
                  t = x.nextInt();
                  IndexIterator indexIterator = termLists ? indexReader[ i ].documents( terms[ i ].get( t ) ) : indexReader[ i ].documents( t );


                  if ( indexIterator.skipTo( currDoc ) == currDoc ) {
                    if ( index[ i ].hasCounts ) {
                      int c = indexIterator.count();
                      if ( termsInDoc[ i ].get( t ) !=  c ) 
                        LOGGER.error( index[ i ] + ": The count for term " + t + " in document " + currDoc + " is " + c + " instead of " + termsInDoc[ i ].get( t ) );
                      else {
                        if ( index[ i ].hasPositions ) {
                          indexIterator.positions( occ[ i ] );


                          for( int j = 0; j < c; j++ ) 
                            if ( wordInPos[ i ][ occ[ i ][ j ] ] != t )  
                              LOGGER.error( index[ i ] + ": The occurrence of index " + i + " of term " + t + " (position " + occ[ i ] +") in document " + currDoc + " is occupied instead by term " + wordInPos[ i ][ occ[ i ][ j ] ] );
                        }
                      }
                    }
                  } 
                  else LOGGER.error( index[ i ] + ": Document " + currDoc + " does not appear in the inverted list of term " + t );
                }
              }
            }
          }
          docCounter++;
          document.close();
          pl.update();
        }
      }
      else {
        LOGGER.warn( "Random access tests require very slow single-term scanning as not all indices are disk based" );


        it.unimi.dsi.mg4j.document.DocumentIterator documentIterator = documentSequence.iterator();
        Document document;
        Reader reader;
        WordReader wordReader;
        
        final MutableString word = new MutableString(), nonWord = new MutableString();
        
        int docCounter = 0;
        
        while( ( document = documentIterator.nextDocument() ) != null ) {
          currDoc = permutation != null ? permutation[ docCounter ] : docCounter;


          for( i = 0; i < index.length; i++ ) {
            Object content = document.content( stem || index[ i ].field == null ? indexedField[ i ] : factory.fieldIndex( index[ i ].field ) );
            if ( index[ i ].hasPayloads ) {
              if ( allBitStreamIndices ) {
                IndexIterator indexIterator = indexReader[ i ].documents( 0 );
                int pointer = indexIterator.skipTo( currDoc );
                if ( pointer == currDoc ) {
                  Payload payload = indexIterator.payload();
                  if ( ! payload.get().equals( content ) ) LOGGER.error( index[ i ] + ": Document " + currDoc + " has payload " + content + " but the index says " + payload );  
                }
                else LOGGER.error( index[ i ] + ": Document " + currDoc + " does not appear in the inverted list of term " + t );
              }
              else {
                IndexIterator indexIterator = indexReader[ i ].documents( "#" );
                if ( indexIterator.skipTo( currDoc ) == currDoc ) {
                  if ( ! indexIterator.payload().get().equals( content ) )
                    LOGGER.error( index[ i ] + ": Document " + currDoc + " has payload " + content + " but the index says " + indexIterator.payload().get() );
                } 
                else LOGGER.error( index[ i ] + ": Document " + currDoc + " does not appear in the inverted list of term " + t );
              }
            }
            else {
              pos = 0;
              reader = (Reader)content;
              wordReader = document.wordReader( stem || index[ i ].field == null ? indexedField[ i ] : factory.fieldIndex( index[ i ].field ) );
              wordReader.setReader( reader );
              while( wordReader.next( word, nonWord ) ) {
                if ( word.length() == 0 || index[ i ].termProcessor != null && ! index[ i ].termProcessor.processTerm( word ) ) continue;
                IndexIterator indexIterator = indexReader[ i ].documents( word );
                if ( currDoc != indexIterator.skipTo( currDoc ) )
                  LOGGER.error( index[ i ] + ": Document " + currDoc + " does not appear in the inverted list of term " + word );
                else if ( index[ i ].hasPositions ) {
                  indexIterator.positions( occ[ i ] );
                  if ( IntArrayList.wrap( occ[ i ], indexIterator.count() ).indexOf( pos ) == -1 )
                    LOGGER.error( index[ i ] + ": Position " + pos + " does not appear in the position list of term " + word + " in document " + currDoc );
                }
                pos++;
              }
            }

View Full Code Here

    final InputBitStream[] direct = new InputBitStream[ numIndices ];
    final InputBitStream[] indirect = new InputBitStream[ numIndices ];
    final BloomFilter[] bloomFilter = bloomFilterPrecision != 0 ? new BloomFilter[ numIndices ] : null;
    final File[] tempFile = new File[ numIndices ];
    final CachingOutputBitStream[] temp = new CachingOutputBitStream[ numIndices ];
    IndexIterator indexIterator;
    
    for ( int i = 0; i < numIndices; i++ ) {
      tempFile[ i ] = new File( localBasename[ i ] + ".temp" );
      temp[ i ] = new CachingOutputBitStream( tempFile[ i ], bufferSize );
      direct[ i ] = new InputBitStream( temp[ i ].buffer() );
      indirect[ i ] = new InputBitStream( tempFile[ i ] );
      if ( bloomFilterPrecision != 0 ) bloomFilter[ i ] = new BloomFilter( globalIndex.numberOfTerms, bloomFilterPrecision );
    }
    int usedIndices;
    MutableString currentTerm = new MutableString();
    Payload payload = null;
    int frequency, globalPointer, localIndex, localPointer, count = -1;


    pl.expectedUpdates = globalIndex.numberOfPostings;
    pl.itemsName = "postings";
    pl.logInterval = logInterval;
    pl.start( "Partitioning index..." );


    for ( int t = 0; t < globalIndex.numberOfTerms; t++ ) {
      terms.readLine( currentTerm );
      indexIterator = indexReader.nextIterator();
      usedIndices = 0;
      frequency = indexIterator.frequency();
      
      for ( int j = 0; j < frequency; j++ ) {
        globalPointer = indexIterator.nextDocument();                
        localIndex = strategy.localIndex( globalPointer );  


        if ( localFrequency[ localIndex ] == 0 ) {
          // First time we see a document for this index.
          currentTerm.println( localTerms[ localIndex ] );
          numTerms[ localIndex ]++;
          usedIndex[ usedIndices++ ] = localIndex;
          if ( bloomFilterPrecision != 0 ) bloomFilter[ localIndex ].add( currentTerm );
        }
        
        /* Store temporarily posting data; note that we save the global pointer as we
         * will have to access the size list. */
        
        localFrequency[ localIndex ]++;
        numPostings[ localIndex ]++;
        temp[ localIndex ].writeGamma( globalPointer );


        if ( globalIndex.hasPayloads ) payload = indexIterator.payload();
        if ( havePayloads ) payload.write( temp[ localIndex ] );
        
        if ( haveCounts ) {
          count = indexIterator.count();
          temp[ localIndex ].writeGamma( count );
          globCount[ localIndex ] += count;        
          if ( maxDocPos[ localIndex ] < count ) maxDocPos[ localIndex ] = count;         
          if ( havePositions ) {
            final int[] pos = indexIterator.positionArray();
            // TODO: compress this stuff
            for( int p = 0; p < count; p++ ) temp[ localIndex ].writeGamma( pos[ p ] ); 
          }
        }
      }

View Full Code Here

    final int numTerms = index0.numberOfTerms;
    int document;
    int[] p0 = IntArrays.EMPTY_ARRAY, p1 = IntArrays.EMPTY_ARRAY;
    boolean hasCounts = index0.hasCounts, hasPositions = index0.hasPositions;
    final IndexReader reader0 = index0.getReader(), reader1 = index1.getReader();
    IndexIterator i0, i1;
    for ( int i = 0; i < numTerms; i++ ) {
      if ( terms != null ) {
        final CharSequence term = terms.next();
        i0 = reader0.documents( term );
        i1 = reader1.documents( term );
      }
      else {
        i0 = reader0.documents( i );
        i1 = reader1.documents( i );
      }


      while ( i0.hasNext() && i1.hasNext() ) {
        assertEquals( "term " + i, document = i0.nextDocument(), i1.nextDocument() );
        if ( hasCounts ) {
          assertEquals( "term " + i + ", document " + document, i0.count(), i1.count() );
          if ( i0.count() > p0.length ) p0 = new int[ i0.count() ];
          if ( i1.count() > p1.length ) p1 = new int[ i1.count() ];
          if ( hasPositions ) for ( int p = i0.count(); p-- != 0; )
            assertEquals( "term " + i + ", document " + document + ", position " + p, p0[ p ], p1[ p ] );
        }
      }


      assertEquals( "term " + i, i0.hasNext(), i1.hasNext() );
    }
    reader0.close();
    reader1.close();
  }

View Full Code Here

        assertEquals( index[ i ].toString(), postings, index[ i ].numberOfPostings );
        assertEquals( occurrences, index[ i ].numberOfOccurrences );
        IndexReader indexReader = index[ i ].getReader();
        for ( MutableString term : new ObjectRBTreeSet<MutableString>( termMap[ i ].keySet() ).toArray( new MutableString[ termMap[ i ].size() ] ) ) {
          String msg = index[ i ] + ":" + term;
          IndexIterator indexIterator = indexReader.documents( term );
          ObjectArrayList<int[]> list = termMap[ i ].get( term );
          int k = 0;
          while ( indexIterator.hasNext() ) {
            assertEquals( msg, list.get( k )[ 0 ], indexIterator.nextDocument() ); // Document
                                                // pointer
            if ( index[ i ].hasCounts ) assertEquals( msg, list.get( k ).length - 1, indexIterator.count() ); // Count
            if ( index[ i ].hasPositions ) {
              final int[] position = indexIterator.positionArray();
              for ( int p = 0; p < indexIterator.count(); p++ )
                assertEquals( msg, list.get( k )[ p + 1 ], position[ p ] ); // Positions
            }
            k++;
          }
          assertEquals( k, list.size() ); // This implicitly checks the frequency
        }
        indexReader.close();
        break;
      case INT:
      case DATE:
        assertEquals( index[ i ].toString(), payloadPointers[ i ].size(), index[ i ].numberOfPostings );
        assertEquals( index[ i ].toString(), documentIndex != 0 ? 1 : 0, index[ i ].numberOfTerms );
        assertEquals( index[ i ].toString(), -1, index[ i ].numberOfOccurrences );
        if ( documentIndex != 0 ) {
          IndexIterator indexIterator = index[ i ].documents( 0 );
          int k = 0;
          while ( indexIterator.hasNext() ) {
            assertEquals( payloadPointers[ i ].getInt( k ), indexIterator.nextDocument() );
            if ( factory.fieldType( field[ i ] ) == FieldType.INT ) assertEquals( ( (Number)payloadContent[ i ].get( k ) ).longValue(), ( (Number)indexIterator.payload().get() )
                .longValue() );
            else assertEquals( payloadContent[ i ].get( k ), indexIterator.payload().get() );
            k++;
          }
          indexIterator.dispose();
          assertEquals( k, payloadContent[ i ].size() );
        }
      }
    }
  }

View Full Code Here

    documentIterator.dispose();
  }
  


  public void test() throws IOException {
    IndexIterator i0 = new IntArrayIndexIterator( new int[] { 0, 1, 2 }, 
        new int[][] { 
        { 0, 3 }, 
        { 0 }, 
        { 0 }, 
        } );
    IndexIterator i1 = new IntArrayIndexIterator( new int[] { 0, 2 }, 
        new int[][] { 
        { 1 },
        { 1 },
        } );
    IndexIterator i2 = new IntArrayIndexIterator( new int[] { 0, 1, 3 }, 
        new int[][] { 
        { 2 },
        { 2 },
        { 0 },
        } );

View Full Code Here


  public void testEverywhereTerms() throws IOException, ConfigurationException, SecurityException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
    
    String basename = File.createTempFile( getClass().getSimpleName(), "everywhereTerms" ).getCanonicalPath();
    new IndexBuilder( basename, new StringArrayDocumentCollection( "a a" ) ).keepBatches( true ).run();
    IndexIterator indexIterator = Index.getInstance( basename + "-text@0" ).documents( 0 );
    indexIterator.next();
    assertEquals( 2, indexIterator.count() );
    int[] position = new int[ 2 ];
    indexIterator.positions( position );
    assertEquals( 0, position[ 0 ] );
    assertEquals( 1, position[ 1 ] );
  }

View Full Code Here

  public void testSkipToEndOfList() throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
    String basename = File.createTempFile( getClass().getSimpleName(), "test" ).getCanonicalPath();
    new IndexBuilder( basename, new StringArrayDocumentCollection( new String[] { "a", "a", "c" } ) ).run();


    Index index = DiskBasedIndex.getInstance( basename + "-text", true, true );
    IndexIterator indexIterator = index.documents( "a" );
    assertEquals( Integer.MAX_VALUE, indexIterator.skipTo( Integer.MAX_VALUE ) );
    indexIterator.dispose();


    indexIterator = index.documents( "a" );
    assertEquals( 0, indexIterator.skipTo( 0 ) );
    assertEquals( 1, indexIterator.skipTo( 1 ) );
    assertEquals( Integer.MAX_VALUE, indexIterator.skipTo( 2 ) );
    indexIterator.dispose();
}

View Full Code Here

0 1

TOP

Related Classes of it.unimi.dsi.mg4j.index.IndexIterator

it.unimi.dsi.mg4j.index.cluster.DocumentalCluster

it.unimi.dsi.mg4j.index.cluster.DocumentalClusterIndexReader

it.unimi.dsi.mg4j.index.cluster.LexicalStrategies

it.unimi.dsi.mg4j.index.MultiTermIndexIteratorTest

it.unimi.dsi.mg4j.search.AbstractIntersectionDocumentIterator

it.unimi.dsi.mg4j.search.BitStreamIndexReaderTest

it.unimi.dsi.mg4j.search.DocumentIteratorBuilderVisitor

it.unimi.dsi.mg4j.test.RemoteIndexIteratorTest

it.unimi.dsi.mg4j.test.Verifier

it.unimi.dsi.mg4j.tool.Concatenate

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.