Package it.unimi.dsi.mg4j.index.cluster

Source Code of it.unimi.dsi.mg4j.index.cluster.DocumentalMergedClusterDocumentIterator

package it.unimi.dsi.mg4j.index.cluster;

/*    
* MG4J: Managing Gigabytes for Java
*
* Copyright (C) 2006-2010 Sebastiano Vigna
*
*  This library is free software; you can redistribute it and/or modify it
*  under the terms of the GNU Lesser General Public License as published by the Free
*  Software Foundation; either version 3 of the License, or (at your option)
*  any later version.
*
*  This library is distributed in the hope that it will be useful, but
*  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
*  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
*  for more details.
*
*  You should have received a copy of the GNU Lesser General Public License
*  along with this program; if not, see <http://www.gnu.org/licenses/>.
*
*/

import it.unimi.dsi.fastutil.ints.IntHeapSemiIndirectPriorityQueue;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
import it.unimi.dsi.fastutil.objects.ReferenceArraySet;
import it.unimi.dsi.fastutil.objects.ReferenceSet;
import it.unimi.dsi.mg4j.index.Index;
import it.unimi.dsi.mg4j.search.AbstractDocumentIterator;
import it.unimi.dsi.mg4j.search.DocumentIterator;
import it.unimi.dsi.mg4j.search.IntervalIterator;
import it.unimi.dsi.mg4j.search.IntervalIterators;
import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;

import java.io.IOException;

/** A document iterator merging iterators from local indices.
*
* @author Sebastiano Vigna
*/

public class DocumentalMergedClusterDocumentIterator extends AbstractDocumentIterator implements DocumentIterator {
  /** The component document iterators. */
  final protected DocumentIterator[] documentIterator;
  /** The number of component iterators. */
  final protected int n;
  /** The indices corresponding to each underlying document iterator. */
  protected final int[] usedIndex;
  /** The cached strategy of the index we refer to. */
  protected final DocumentalClusteringStrategy strategy;
  /** The queue of document iterator indices (offsets into {@link #documentIterator} and {@link #usedIndex}). */
  protected final IntHeapSemiIndirectPriorityQueue queue;
  /** The reference array for the queue (containing <em>global</em> document pointers). */
  protected final int[] globalDocumentPointer;
  /** The set of indices involved in this iterator. */
  protected final ReferenceSet<Index> indices = new ReferenceArraySet<Index>();

  /** The underlying index reader. */
  private final DocumentalClusterIndexReader indexReader;

  /** The current iterator. */
  protected int currentIterator = -1;
  /** Whether there are no more documents to be returned. */
  protected boolean exhausted;
 
  /** Creates a new document iterator for a documental cluster.
   *
   * <p>This constructor uses an array of document iterators that it is not required to be full.
   * This is very useful with rare terms.
   *
   * @param indexReader the underlying index reader.
   * @param documentIterator an array of document iterators.
   * @param usedIndex an array parallel to <code>documentIterator</code> containing the ordinal numbers
   * of the indices corresponding to the iterators.
   */
 
  public DocumentalMergedClusterDocumentIterator( final DocumentalClusterIndexReader indexReader, final DocumentIterator[] documentIterator, int[] usedIndex ) throws IOException {
    this.documentIterator = documentIterator;
    this.n = documentIterator.length;
    this.indexReader = indexReader;
    this.usedIndex = usedIndex;
   
    strategy = indexReader.index.strategy;
    globalDocumentPointer = new int[ n ];
    queue = new IntHeapSemiIndirectPriorityQueue( globalDocumentPointer, n );
   
    int result;
    for( int i = n; i-- != 0; ) {
      if ( ( result = documentIterator[ i ].nextDocument() ) != -1 ) {
        indices.addAll( documentIterator[ i ].indices() );
        globalDocumentPointer[ i ] = strategy.globalPointer( usedIndex[ i ], result );
        queue.enqueue( i );
      }
    }
   
    if ( queue.isEmpty() ) exhausted = true;
    else {
      currentIterator = queue.first();
      next = globalDocumentPointer[ currentIterator ];
    }
  }

  public IntervalIterator intervalIterator() throws IOException {
    if ( last == -1 ) throw new IllegalStateException();
    return documentIterator[ currentIterator ].intervalIterator();
  }
 
  public IntervalIterator intervalIterator( Index index ) throws IOException {
    if ( last == -1 ) throw new IllegalStateException();
    if ( ! indices.contains( index ) ) return IntervalIterators.TRUE;
    return documentIterator[ currentIterator ].intervalIterator( index );
  }

  public Reference2ReferenceMap<Index,IntervalIterator> intervalIterators() throws IOException {
    if ( last == -1 ) throw new IllegalStateException();
    return documentIterator[ currentIterator ].intervalIterators();
  }

  public ReferenceSet<Index> indices() {
    return indices;
  }

  // TODO: this needs tests
  public int skipTo( final int p ) throws IOException {
    int i, d;

    if ( p <= last ) return last;
   
    //System.err.println( "Advancing to " + n  + " doc: " + Arrays.toString( doc ) + " first: " + queue.first() );
    next = -1;
    while( ! queue.isEmpty() && globalDocumentPointer[ i = queue.first() ] < p ) {
      d = documentIterator[ i ].skipTo( strategy.localPointer( p ) );
      if ( d == Integer.MAX_VALUE ) queue.dequeue();
      else {
        globalDocumentPointer[ i ] = strategy.globalPointer( usedIndex[ i ], d );
        if ( globalDocumentPointer[ i ] < p ) queue.dequeue(); // This covers the case of getting to the end of list without finding p
        else queue.changed();
      }
    }
   
    if ( queue.isEmpty() ) {
      exhausted = true;
      last = -1;
      return Integer.MAX_VALUE;
    }
   
    return last = globalDocumentPointer[ currentIterator = queue.first() ];
  }

  public int nextDocument() throws IOException {
    if ( next >= 0 ) {
      last = next;
      next = -1;
      return last;
    }
   
    if ( exhausted ) return last = -1;
   
    final int result;
    if ( ( result = documentIterator[ currentIterator ].nextDocument() ) != -1 ) {
      globalDocumentPointer[ currentIterator ] = strategy.globalPointer( usedIndex[ currentIterator ], result );
      queue.changed();
    }
    else queue.dequeue();

    if ( queue.isEmpty() ) {
      exhausted = true;
      return last = -1;
    }

    currentIterator = queue.first();
    return last = globalDocumentPointer[ currentIterator ];
  }
 
  public <T> T accept( DocumentIteratorVisitor<T> visitor ) throws IOException {
    return documentIterator[ currentIterator ].accept( visitor );
  }

  public <T> T acceptOnTruePaths( DocumentIteratorVisitor<T> visitor ) throws IOException {
    return documentIterator[ currentIterator ].acceptOnTruePaths( visitor );
  }

  public void dispose() throws IOException {     
    indexReader.close();
  }
}
TOP

Related Classes of it.unimi.dsi.mg4j.index.cluster.DocumentalMergedClusterDocumentIterator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.