Source Code of it.unimi.dsi.mg4j.index.cluster.DocumentalMergedClusterDocumentIterator

package it.unimi.dsi.mg4j.index.cluster;


/*     
 * MG4J: Managing Gigabytes for Java
 *
 * Copyright (C) 2006-2010 Sebastiano Vigna 
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
 *
 */


import it.unimi.dsi.fastutil.ints.IntHeapSemiIndirectPriorityQueue;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
import it.unimi.dsi.fastutil.objects.ReferenceArraySet;
import it.unimi.dsi.fastutil.objects.ReferenceSet;
import it.unimi.dsi.mg4j.index.Index;
import it.unimi.dsi.mg4j.search.AbstractDocumentIterator;
import it.unimi.dsi.mg4j.search.DocumentIterator;
import it.unimi.dsi.mg4j.search.IntervalIterator;
import it.unimi.dsi.mg4j.search.IntervalIterators;
import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;


import java.io.IOException;


/** A document iterator merging iterators from local indices.
 * 
 * @author Sebastiano Vigna
 */


public class DocumentalMergedClusterDocumentIterator extends AbstractDocumentIterator implements DocumentIterator {
  /** The component document iterators. */
  final protected DocumentIterator[] documentIterator;
  /** The number of component iterators. */
  final protected int n;
  /** The indices corresponding to each underlying document iterator. */
  protected final int[] usedIndex;
  /** The cached strategy of the index we refer to. */
  protected final DocumentalClusteringStrategy strategy;
  /** The queue of document iterator indices (offsets into {@link #documentIterator} and {@link #usedIndex}). */
  protected final IntHeapSemiIndirectPriorityQueue queue;
  /** The reference array for the queue (containing <em>global</em> document pointers). */
  protected final int[] globalDocumentPointer;
  /** The set of indices involved in this iterator. */
  protected final ReferenceSet<Index> indices = new ReferenceArraySet<Index>();


  /** The underlying index reader. */
  private final DocumentalClusterIndexReader indexReader;


  /** The current iterator. */
  protected int currentIterator = -1;
  /** Whether there are no more documents to be returned. */
  protected boolean exhausted;
  
  /** Creates a new document iterator for a documental cluster.
   * 
   * <p>This constructor uses an array of document iterators that it is not required to be full.
   * This is very useful with rare terms.
   * 
   * @param indexReader the underlying index reader.
   * @param documentIterator an array of document iterators.
   * @param usedIndex an array parallel to <code>documentIterator</code> containing the ordinal numbers
   * of the indices corresponding to the iterators.
   */
  
  public DocumentalMergedClusterDocumentIterator( final DocumentalClusterIndexReader indexReader, final DocumentIterator[] documentIterator, int[] usedIndex ) throws IOException {
    this.documentIterator = documentIterator;
    this.n = documentIterator.length;
    this.indexReader = indexReader;
    this.usedIndex = usedIndex;
    
    strategy = indexReader.index.strategy;
    globalDocumentPointer = new int[ n ];
    queue = new IntHeapSemiIndirectPriorityQueue( globalDocumentPointer, n );
    
    int result;
    for( int i = n; i-- != 0; ) {
      if ( ( result = documentIterator[ i ].nextDocument() ) != -1 ) {
        indices.addAll( documentIterator[ i ].indices() );
        globalDocumentPointer[ i ] = strategy.globalPointer( usedIndex[ i ], result );
        queue.enqueue( i );
      }
    }
    
    if ( queue.isEmpty() ) exhausted = true;
    else {
      currentIterator = queue.first();
      next = globalDocumentPointer[ currentIterator ];
    }
  }


  public IntervalIterator intervalIterator() throws IOException {
    if ( last == -1 ) throw new IllegalStateException();
    return documentIterator[ currentIterator ].intervalIterator();
  }
  
  public IntervalIterator intervalIterator( Index index ) throws IOException {
    if ( last == -1 ) throw new IllegalStateException();
    if ( ! indices.contains( index ) ) return IntervalIterators.TRUE;
    return documentIterator[ currentIterator ].intervalIterator( index );
  }


  public Reference2ReferenceMap<Index,IntervalIterator> intervalIterators() throws IOException {
    if ( last == -1 ) throw new IllegalStateException();
    return documentIterator[ currentIterator ].intervalIterators();
  }


  public ReferenceSet<Index> indices() {
    return indices;
  }


  // TODO: this needs tests
  public int skipTo( final int p ) throws IOException {
    int i, d;


    if ( p <= last ) return last;
    
    //System.err.println( "Advancing to " + n  + " doc: " + Arrays.toString( doc ) + " first: " + queue.first() );
    next = -1;
    while( ! queue.isEmpty() && globalDocumentPointer[ i = queue.first() ] < p ) {
      d = documentIterator[ i ].skipTo( strategy.localPointer( p ) );
      if ( d == Integer.MAX_VALUE ) queue.dequeue();
      else {
        globalDocumentPointer[ i ] = strategy.globalPointer( usedIndex[ i ], d );
        if ( globalDocumentPointer[ i ] < p ) queue.dequeue(); // This covers the case of getting to the end of list without finding p 
        else queue.changed();
      }
    }
    
    if ( queue.isEmpty() ) {
      exhausted = true;
      last = -1;
      return Integer.MAX_VALUE;
    }
    
    return last = globalDocumentPointer[ currentIterator = queue.first() ];
  }


  public int nextDocument() throws IOException {
    if ( next >= 0 ) {
      last = next;
      next = -1;
      return last;
    }
    
    if ( exhausted ) return last = -1;
    
    final int result;
    if ( ( result = documentIterator[ currentIterator ].nextDocument() ) != -1 ) {
      globalDocumentPointer[ currentIterator ] = strategy.globalPointer( usedIndex[ currentIterator ], result );
      queue.changed();
    }
    else queue.dequeue();


    if ( queue.isEmpty() ) {
      exhausted = true;
      return last = -1;
    }


    currentIterator = queue.first();
    return last = globalDocumentPointer[ currentIterator ];
  }
  
  public <T> T accept( DocumentIteratorVisitor<T> visitor ) throws IOException {
    return documentIterator[ currentIterator ].accept( visitor );
  }


  public <T> T acceptOnTruePaths( DocumentIteratorVisitor<T> visitor ) throws IOException {
    return documentIterator[ currentIterator ].acceptOnTruePaths( visitor );
  }


  public void dispose() throws IOException {      
    indexReader.close();
  }
}
Source Code of it.unimi.dsi.mg4j.index.cluster.DocumentalMergedClusterDocumentIterator

Related Classes of it.unimi.dsi.mg4j.index.cluster.DocumentalMergedClusterDocumentIterator