package it.unimi.dsi.mg4j.index.cluster;
/*
* MG4J: Managing Gigabytes for Java
*
* Copyright (C) 2006-2010 Sebastiano Vigna
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*
*/
import it.unimi.dsi.fastutil.ints.IntHeapSemiIndirectPriorityQueue;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
import it.unimi.dsi.fastutil.objects.ReferenceArraySet;
import it.unimi.dsi.fastutil.objects.ReferenceSet;
import it.unimi.dsi.mg4j.index.Index;
import it.unimi.dsi.mg4j.search.AbstractDocumentIterator;
import it.unimi.dsi.mg4j.search.DocumentIterator;
import it.unimi.dsi.mg4j.search.IntervalIterator;
import it.unimi.dsi.mg4j.search.IntervalIterators;
import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;
import java.io.IOException;
/** A document iterator merging iterators from local indices.
*
* @author Sebastiano Vigna
*/
public class DocumentalMergedClusterDocumentIterator extends AbstractDocumentIterator implements DocumentIterator {
/** The component document iterators. */
final protected DocumentIterator[] documentIterator;
/** The number of component iterators. */
final protected int n;
/** The indices corresponding to each underlying document iterator. */
protected final int[] usedIndex;
/** The cached strategy of the index we refer to. */
protected final DocumentalClusteringStrategy strategy;
/** The queue of document iterator indices (offsets into {@link #documentIterator} and {@link #usedIndex}). */
protected final IntHeapSemiIndirectPriorityQueue queue;
/** The reference array for the queue (containing <em>global</em> document pointers). */
protected final int[] globalDocumentPointer;
/** The set of indices involved in this iterator. */
protected final ReferenceSet<Index> indices = new ReferenceArraySet<Index>();
/** The underlying index reader. */
private final DocumentalClusterIndexReader indexReader;
/** The current iterator. */
protected int currentIterator = -1;
/** Whether there are no more documents to be returned. */
protected boolean exhausted;
/** Creates a new document iterator for a documental cluster.
*
* <p>This constructor uses an array of document iterators that it is not required to be full.
* This is very useful with rare terms.
*
* @param indexReader the underlying index reader.
* @param documentIterator an array of document iterators.
* @param usedIndex an array parallel to <code>documentIterator</code> containing the ordinal numbers
* of the indices corresponding to the iterators.
*/
public DocumentalMergedClusterDocumentIterator( final DocumentalClusterIndexReader indexReader, final DocumentIterator[] documentIterator, int[] usedIndex ) throws IOException {
this.documentIterator = documentIterator;
this.n = documentIterator.length;
this.indexReader = indexReader;
this.usedIndex = usedIndex;
strategy = indexReader.index.strategy;
globalDocumentPointer = new int[ n ];
queue = new IntHeapSemiIndirectPriorityQueue( globalDocumentPointer, n );
int result;
for( int i = n; i-- != 0; ) {
if ( ( result = documentIterator[ i ].nextDocument() ) != -1 ) {
indices.addAll( documentIterator[ i ].indices() );
globalDocumentPointer[ i ] = strategy.globalPointer( usedIndex[ i ], result );
queue.enqueue( i );
}
}
if ( queue.isEmpty() ) exhausted = true;
else {
currentIterator = queue.first();
next = globalDocumentPointer[ currentIterator ];
}
}
public IntervalIterator intervalIterator() throws IOException {
if ( last == -1 ) throw new IllegalStateException();
return documentIterator[ currentIterator ].intervalIterator();
}
public IntervalIterator intervalIterator( Index index ) throws IOException {
if ( last == -1 ) throw new IllegalStateException();
if ( ! indices.contains( index ) ) return IntervalIterators.TRUE;
return documentIterator[ currentIterator ].intervalIterator( index );
}
public Reference2ReferenceMap<Index,IntervalIterator> intervalIterators() throws IOException {
if ( last == -1 ) throw new IllegalStateException();
return documentIterator[ currentIterator ].intervalIterators();
}
public ReferenceSet<Index> indices() {
return indices;
}
// TODO: this needs tests
public int skipTo( final int p ) throws IOException {
int i, d;
if ( p <= last ) return last;
//System.err.println( "Advancing to " + n + " doc: " + Arrays.toString( doc ) + " first: " + queue.first() );
next = -1;
while( ! queue.isEmpty() && globalDocumentPointer[ i = queue.first() ] < p ) {
d = documentIterator[ i ].skipTo( strategy.localPointer( p ) );
if ( d == Integer.MAX_VALUE ) queue.dequeue();
else {
globalDocumentPointer[ i ] = strategy.globalPointer( usedIndex[ i ], d );
if ( globalDocumentPointer[ i ] < p ) queue.dequeue(); // This covers the case of getting to the end of list without finding p
else queue.changed();
}
}
if ( queue.isEmpty() ) {
exhausted = true;
last = -1;
return Integer.MAX_VALUE;
}
return last = globalDocumentPointer[ currentIterator = queue.first() ];
}
public int nextDocument() throws IOException {
if ( next >= 0 ) {
last = next;
next = -1;
return last;
}
if ( exhausted ) return last = -1;
final int result;
if ( ( result = documentIterator[ currentIterator ].nextDocument() ) != -1 ) {
globalDocumentPointer[ currentIterator ] = strategy.globalPointer( usedIndex[ currentIterator ], result );
queue.changed();
}
else queue.dequeue();
if ( queue.isEmpty() ) {
exhausted = true;
return last = -1;
}
currentIterator = queue.first();
return last = globalDocumentPointer[ currentIterator ];
}
public <T> T accept( DocumentIteratorVisitor<T> visitor ) throws IOException {
return documentIterator[ currentIterator ].accept( visitor );
}
public <T> T acceptOnTruePaths( DocumentIteratorVisitor<T> visitor ) throws IOException {
return documentIterator[ currentIterator ].acceptOnTruePaths( visitor );
}
public void dispose() throws IOException {
indexReader.close();
}
}