Package it.unimi.dsi.mg4j.index

Examples of it.unimi.dsi.mg4j.index.IndexIterator


  }
 
  public DocumentIterator visitPost( final MultiTerm node, final DocumentIterator subNode[] ) throws QueryBuilderVisitorException {
    final IndexIterator[] indexIterator = new IndexIterator[ subNode.length ];
    System.arraycopy( subNode, 0, indexIterator, 0, indexIterator.length );
    IndexIterator result;
    try {
      result = MultiTermIndexIterator.getInstance( curr.top(), indexIterator ).weight( weights.popDouble() );
    }
    catch ( IOException e ) {
      throw new QueryBuilderVisitorException( e );
    }
    result.term( node.toString() );
    return result;
  }
View Full Code Here


    if ( ! metadataOnly ) {
      int currIndex, numPrevDocs = 0, currDoc, count;
      OutputBitStream obs;
      Index i;
      IndexIterator ii;

      if ( p != 0 ) variableQuantumIndexWriter.newInvertedList(totalFrequency, p, predictedSize, predictedLengthNumBits );
      else indexWriter.newInvertedList();
      
      indexWriter.writeFrequency( totalFrequency );

      for( int k = currIndex = 0; k < numUsedIndices; k++ ) { // We can just concatenated posting lists.

        // We must update the number of previously seen documents, possibly adding those in skipped indices.
        while( currIndex < usedIndex[ k ] ) numPrevDocs += index[ currIndex++ ].numberOfDocuments;

        i = index[ currIndex ];
        ii = indexIterator[ currIndex ];

        for( int j = frequency[ currIndex ]; j-- != 0; ) {
          obs = indexWriter.newDocumentRecord();
          currDoc = ii.nextDocument() + numPrevDocs;
          indexWriter.writeDocumentPointer( obs, currDoc );

          if ( i.hasPayloads ) indexWriter.writePayload( obs, ii.payload() );

          if ( i.hasCounts ) {
            count = ii.count();
            if ( hasCounts ) indexWriter.writePositionCount( obs, count );
            if ( hasPositions ) indexWriter.writeDocumentPositions( obs, ii.positionArray(), 0, count, size != null ? size[ currDoc ] : -1 );
          }   
        }
      }
    }
   
View Full Code Here

      indexWriter.writeFrequency( totalFrequency );

      int currDoc = -1, count;
      OutputBitStream obs;
      Index i;
      IndexIterator ir;

      while( ! documentQueue.isEmpty() ) {
        // We extract the smallest document pointer, and enqueue it in the new index.
        if ( currDoc == doc[ currIndex = documentQueue.first() ] ) throw new IllegalStateException( "The indices to be merged contain document " + currDoc + " at least twice (once in index " + inputBasename[ lastIndex ] + " and once in index " + inputBasename[ currIndex ] + ")" );
        currDoc = doc[ currIndex ];

        obs = indexWriter.newDocumentRecord();
        indexWriter.writeDocumentPointer( obs, currDoc );
        i = index[ currIndex ];
        ir = indexIterator[ currIndex ];

        if ( i.hasPayloads ) indexWriter.writePayload( obs, ir.payload() );

        if ( i.hasCounts ) {
          count = ir.count();
          if ( hasCounts ) indexWriter.writePositionCount( obs, count );
          if ( hasPositions ) indexWriter.writeDocumentPositions( obs, ir.positionArray(), 0, count, size == null ? -1 : size[ currDoc ] );
        }

        // If we just wrote the last document pointer of this term in index j, we dequeue it.
        if ( --frequency[ currIndex ] == 0 ) documentQueue.dequeue();
        else {
          doc[ currIndex ] = ir.nextDocument();
          documentQueue.changed();
        }
        lastIndex = currIndex;
      }
    }
View Full Code Here

       
        if ( p0 != null && p1 != null ) return 0;
        if ( p0 != null ) return -1;
        if ( p1 != null ) return 1;
       
        final IndexIterator i0 = d0 instanceof IndexIterator ? (IndexIterator)d0 : null;
        final IndexIterator i1 = d1 instanceof IndexIterator ? (IndexIterator)d1 : null;
        if ( i0 == null && i1 == null ) return 0;
        if ( ( i0 != null ) != ( i1 != null ) ) return ( i0 != null ) ? 1 : -1;
        try {
          return i1.frequency() - i0.frequency();
        }
        catch ( IOException e ) {
          throw new RuntimeException( e );
        }
      }
View Full Code Here

     * the maximum over all indices. */
    int currIndex, prevDoc = -1, currDoc, count;
    int temp[];
    OutputBitStream obs;
    Index i;
    IndexIterator ii;
 
    // Note that the total frequency can be computed only during the merge.
    for( int k = numUsedIndices; k-- != 0; ) {
      currIndex = usedIndex[ k ];
      frequency[ currIndex ] = indexIterator[ currIndex ].frequency();
      doc[ currIndex ] = indexIterator[ currIndex ].nextDocument();
      documentQueue.enqueue( currIndex );
    }
   
    // First phase: we write the inverted list using a quick-and-dirty format in the cache.
    cacheBitStreamOut.position( 0 );
    int  totalFrequency = 0, increment, prevIndex, totalCount;
   
    while( ! documentQueue.isEmpty() ) {
      // We extract the smallest document pointer, and enqueue it in the new index.
      currDoc = doc[ currIndex = documentQueue.firstInt() ];
      totalFrequency++;
      if ( ! metadataOnly ) cacheBitStreamOut.writeDelta( currDoc - prevDoc - 1 );
     
      totalCount = prevIndex = increment = 0;
     
      do {
        if ( incremental)
          while( prevIndex < currIndex ) {
            /* Note that some virtual documents could not exist at all in some index (in which
             * case we extend the size list with zeroes). */
            if ( sizesSize[ prevIndex ] > currDoc ) increment += index[ prevIndex ].sizes.getInt( currDoc );
            prevIndex++;
          }
        i = index[ currIndex ];

        i = index[ currIndex ];
        ii = indexIterator[ currIndex ];
     
        if ( ! metadataOnly && i.hasCounts ) {
          count = ii.count();
          if ( i.hasPositions ) {
            temp = ii.positionArray();
            if ( ! incremental && totalCount > 0 && temp[ 0 ] <= position[ totalCount - 1 ] ) throw new IllegalStateException( "Positions in document " + currDoc + " are not increasing; you probably need to require an incremental pasting" );
            for( int k = count; k-- != 0; ) position[ totalCount + k ] = temp[ k ] + increment;
          }
          totalCount += count;
        }
       
        // If we just wrote the last document pointer of this term in index j, we dequeue it.
        if ( --frequency[ currIndex ] == 0 ) documentQueue.dequeue();
        else {
          doc[ currIndex ] = ii.nextDocument();
          documentQueue.changed();
        }
      } while( ! documentQueue.isEmpty() && doc[ currIndex = documentQueue.firstInt() ] == currDoc );
 
      if ( totalCount > maxCount ) maxCount = totalCount;
View Full Code Here

      int left = 0; // The left extreme of the current block
      long count = 0; // Number of documents/occurrences in the current block

      final IndexReader indexReader = index.getReader();
      long blockSize = total / blockSizeDivisor++; // The approximate size of a block
      IndexIterator indexIterator;
     
      for ( int i = k = 0; i < terms; i++ ) {
        indexIterator = indexReader.nextIterator();
        frequency = indexIterator.frequency();
        if ( ! index.hasPositions ) count += frequency;
        for ( int j = frequency; j-- != 0; ) {
          indexIterator.nextDocument();
          if ( index.hasPositions ) count += indexIterator.count();
        }
       
        if ( i == terms - 1 ) i++; // To fool the next
        if ( count >= blockSize && k < numberOfLocalIndices - 1 || i == terms ) {
          LOGGER.info( "New term interval [" + left + ".." + i + "] (" + termMap.list().get( left ) + " -> " + ( i == terms ? "" : termMap.list().get( i ) ) + ")" );
View Full Code Here

  private static boolean textTerm =false;

  public static void testIndexIterator() throws IOException{
    IndexReader firstIndexReader =firstIndex.getReader();
    IndexReader secondIndexReader =secondIndex.getReader(1000);
    IndexIterator firstIterator = null;
    IndexIterator secondIterator = null;
   
    for(int i = 0;i<firstIndex.numberOfTerms;i++){
      try{
        System.out.println("term: " + i);
        firstIterator = firstIndexReader.documents(i);       
        secondIterator = !textTerm?secondIndexReader.documents(i):secondIndexReader.documents(firstIndex.termMap.list().get(i));       
       
        /** Compare hasNext*/
        Assert.assertEquals(firstIterator.hasNext(), secondIterator.hasNext());
       
        /** Compare frequency*/
        Assert.assertEquals(firstIterator.frequency(), secondIterator.frequency());               
       
        /** Compare positions & count*/     
        while(firstIterator.hasNext()){
          int fr = firstIterator.nextDocument();
          int sr = secondIterator.nextDocument();   
          Assert.assertEquals(fr,sr);               
          /** Compare count*/
          Assert.assertEquals(firstIterator.count(), secondIterator.count());
         
         
         
         
          int[] firstPos = new int[1000];
          int[] secondPos = new int[1000];
          int fRet = firstIterator.positions(firstPos);
          int sRet = secondIterator.positions(secondPos);
          System.out.println(fRet + "  " + sRet);
          Assert.assertTrue(fRet == sRet);
         
          for(int j = 0;j<fRet;j++)
            Assert.assertEquals(firstPos[j],secondPos[j]);       
        }
       
        /** Compare positions int[] positionArray()*/
        while(firstIterator.hasNext()){
          secondIterator.next();
          int []firstPos = firstIterator.positionArray();
          int []secondPos = secondIterator.positionArray();
          Assert.assertTrue(firstPos.length == secondPos.length);
          for(int j = 0;j<firstPos.length;j++)
            Assert.assertTrue(firstPos[j] == secondPos[j]);       
        }

        /** Compare IntIterator from positions() method */
        firstIterator = firstIndexReader.documents(i);
        secondIterator = !textTerm?secondIndexReader.documents(i):secondIndexReader.documents(firstIndex.termMap.list().get(i));       
        while(firstIterator.hasNext()){
          firstIterator.next();
          secondIterator.next();
 
          IntIterator firstIntIt = firstIterator.positions();
          IntIterator secondIntIt = secondIterator.positions();       
          while(firstIntIt.hasNext()){                 
            Assert.assertEquals(firstIntIt.nextInt(),secondIntIt.nextInt());
          }       
          Assert.assertEquals(firstIntIt.skip(2),secondIntIt.skip(2));       
          if(firstIntIt.hasNext()){
            Assert.assertEquals(firstIntIt.nextInt(),secondIntIt.nextInt());
          }       
          Assert.assertEquals(firstIntIt.skip(9999999),secondIntIt.skip(9999999));       
          if(firstIntIt.hasNext()){       
            Assert.assertEquals(firstIntIt.nextInt(),secondIntIt.nextInt());
          }
         
        }
       
        /** Compare IntervalIterator from Interval() method */
        firstIterator = firstIndexReader.documents(i);
        secondIterator = !textTerm?secondIndexReader.documents(i):secondIndexReader.documents(firstIndex.termMap.list().get(i));       
        while(firstIterator.hasNext()){
          firstIterator.next();
          secondIterator.next()
          /** Compare position IntIterator*/
          IntervalIterator firstIntervalIt = firstIterator.intervalIterator(firstIndex);
          IntervalIterator secondIntervalIt = secondIterator.intervalIterator(secondIndex);
          while(firstIntervalIt.hasNext()){             
            Interval firstIntv = firstIntervalIt.nextInterval();
            Interval secondIntv = secondIntervalIt.nextInterval();
           
            System.out.println("left:" +  firstIntv.left + "   " + "right:" + firstIntv.right);
View Full Code Here

  @Override
  public IndexIterator documents( final CharSequence prefix, final int limit ) throws IOException, TooManyTermsException {
    final ArrayList<DocumentIterator> iterators = new ArrayList<DocumentIterator>( localIndex.length );
    final IntArrayList usedIndices = new IntArrayList();

    IndexIterator documentIterator;
    for ( int i = 0; i < localIndex.length; i++ ) {
      // TODO: check for limit globally
      documentIterator = localIndex[ i ].documents( prefix, limit );
      if ( documentIterator.hasNext() ) {
        iterators.add( documentIterator );
        usedIndices.add( i );
      }
    }
    // TODO: test that this clustered multiterm does work
    final IndexIterator result = concatenated ?
        new DocumentalConcatenatedClusterIndexIterator( (DocumentalClusterIndexReader)getReader(), iterators.toArray( IndexIterators.EMPTY_ARRAY ), usedIndices.toIntArray() ) :
          new DocumentalMergedClusterIndexIterator( (DocumentalClusterIndexReader)getReader(), iterators.toArray( IndexIterators.EMPTY_ARRAY ), usedIndices.toIntArray() );
    result.term( prefix );
    return result;
   
  }
View Full Code Here

    if ( ! index.flat ) throw new UnsupportedOperationException( "Only flat documental clusters allow access by term number" );

    final IndexIterator[] iterator = new IndexIterator[ indexReader.length ];
    for ( int i = 0; i < indexReader.length; i++ ) iterator[ i ] = indexReader[ i ].documents( term );

    final IndexIterator indexIterator =
      index.concatenated ?
          new DocumentalConcatenatedClusterIndexIterator( this, iterator, index.allIndices ) :
            new DocumentalMergedClusterIndexIterator( this, iterator, index.allIndices ) ;
         
    return indexIterator;
View Full Code Here

  public IndexIterator documents( final CharSequence term ) throws IOException {
    final ArrayList<IndexIterator> iterators = new ArrayList<IndexIterator>( indexReader.length );
    final IntArrayList usedIndices = new IntArrayList();
    for ( int i = 0; i < indexReader.length; i++ ) {
      if ( index.termFilter == null || index.termFilter[ i ].contains( term ) ) {
        IndexIterator it = indexReader[ i ].documents( term );
        if ( it.hasNext() ) {
          iterators.add( it );
          usedIndices.add( i );
        }
      }
    }

    if ( DEBUG ) LOGGER.debug( "Indices used for " + term + ": " + usedIndices );

    if ( iterators.isEmpty() ) return index.getEmptyIndexIterator( term );
    final IndexIterator indexIterator =
      index.concatenated ?
          new DocumentalConcatenatedClusterIndexIterator( this, iterators.toArray( IndexIterators.EMPTY_ARRAY ), usedIndices.toIntArray() ) :
            new DocumentalMergedClusterIndexIterator( this, iterators.toArray( IndexIterators.EMPTY_ARRAY ), usedIndices.toIntArray() ) ;
         
    indexIterator.term( term );
    return indexIterator;
  }
View Full Code Here

TOP

Related Classes of it.unimi.dsi.mg4j.index.IndexIterator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.