package it.unimi.dsi.mg4j.search;
/*
* MG4J: Managing Gigabytes for Java
*
* Copyright (C) 2003-2010 Paolo Boldi and Sebastiano Vigna
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*
*/
import it.unimi.dsi.fastutil.ints.IntHeapSemiIndirectPriorityQueue;
import it.unimi.dsi.fastutil.ints.IntSet;
import it.unimi.dsi.fastutil.objects.ObjectHeapSemiIndirectPriorityQueue;
import it.unimi.dsi.mg4j.index.Index;
import it.unimi.dsi.mg4j.index.IndexIterator;
import it.unimi.dsi.util.Interval;
import it.unimi.dsi.util.Intervals;
import java.io.IOException;
/** An iterator on documents that returns the OR of a number of document iterators.
*
* <P>This class adds to {@link it.unimi.dsi.mg4j.search.AbstractUnionDocumentIterator}
* an interval iterator generating the OR of the intervals returned for each of the documents involved.
*/
public class OrDocumentIterator extends AbstractUnionDocumentIterator implements DocumentIterator {
@SuppressWarnings("unused")
private final static boolean DEBUG = false;
/** Returns a document iterator that computes the OR of the given array of iterators.
*
* <P>Note that the special case of the empty and of the singleton arrays
* are handled efficiently.
*
* @param index the default index; relevant only if <code>it</code> has zero length.
* @param documentIterator the iterators to be joined.
* @return a document iterator that computes the OR of <code>it</code>.
* @throws IOException
*/
public static DocumentIterator getInstance( final Index index, DocumentIterator... documentIterator ) throws IOException {
if ( documentIterator.length == 0 ) return FalseDocumentIterator.getInstance( index );
if ( documentIterator.length == 1 ) return documentIterator[ 0 ];
return new OrDocumentIterator( documentIterator );
}
/** Returns a document iterator that computes the OR of the given nonzero-length array of iterators.
*
* <P>Note that the special case of the singleton array is handled efficiently.
*
* @param documentIterator the iterators to be joined.
* @return a document iterator that computes the OR of <code>it</code>.
* @throws IOException
*/
public static DocumentIterator getInstance( DocumentIterator... documentIterator ) throws IOException {
if ( documentIterator.length == 0 ) throw new IllegalArgumentException();
return getInstance( null, documentIterator );
}
/** Creates a new document iterator that computes the OR of the given array of iterators.
* @param documentIterator the iterators to be joined.
* @throws IOException
*/
protected OrDocumentIterator( final DocumentIterator... documentIterator ) throws IOException {
super( documentIterator );
}
protected IntervalIterator getComposedIntervalIterator( final Index index ) {
return indexIterator == null ? new OrIntervalIterator( index ) : new OrIndexIntervalIterator( index );
}
/** An interval iterator of nondecreasing disjoint intervals
* obtained ORing the output of a set of iterators with
* the same property.
*/
private class OrIntervalIterator extends AbstractCompositeIntervalIterator {
/** The index of this iterator. */
final Index index;
/** A heap-based indirect priority queue used to keep track of the currently scanned intervals. */
private ObjectHeapSemiIndirectPriorityQueue<Interval> intervalQueue;
/** The left extreme of the last returned interval, or {@link Integer#MIN_VALUE} after a {@link #reset()}. */
private int lastLeft;
/** An array to hold the front of the interval queue. */
private final int[] intervalFront;
/** Creates a new OR interval iterator.
*
* @param index the index of the iterator.
*/
public OrIntervalIterator( final Index index ) {
super( n );
// We just set up some internal data, but we perform no initialisation.
this.index = index;
intervalQueue = new ObjectHeapSemiIndirectPriorityQueue<Interval>( curr, Intervals.ENDS_BEFORE_OR_IS_SUFFIX );
intervalFront = new int[ n ];
}
public void reset() throws IOException {
lastLeft = Integer.MIN_VALUE;
next = null;
intervalQueue.clear();
for ( int i = computeFront(), k; i-- != 0; ) {
k = front[ i ];
intervalIterator[ k ] = documentIterator[ k ].intervalIterator( index );
if ( intervalIterator[ k ] != IntervalIterators.TRUE && ( curr[ k ] = intervalIterator[ k ].nextInterval() ) != null ) intervalQueue.enqueue( k );
}
}
public void intervalTerms( final IntSet terms ) {
final int frontSize = intervalQueue.front( intervalFront );
final int[] intervalFront = this.intervalFront;
for( int i = frontSize; i-- != 0; ) intervalIterator[ intervalFront[ i ] ].intervalTerms( terms );
}
public Interval nextInterval () throws IOException {
if ( next != null ) {
final Interval result = next;
next = null;
return result;
}
if ( intervalQueue.isEmpty() ) return null;
int first;
while ( curr[ first = intervalQueue.first() ].left <= lastLeft ) {
if ( ( curr[ first ] = intervalIterator[ first ].nextInterval() ) != null ) intervalQueue.changed();
else {
intervalQueue.dequeue();
if ( intervalQueue.isEmpty() ) return null;
}
}
lastLeft = curr[ first ].left;
return curr[ first ];
}
public int extent() {
int e, s;
s = Integer.MAX_VALUE;
for ( int i = computeFront(), k; i-- != 0; ) {
k = front[ i ];
e = curr[ k ] != null ? intervalIterator[ k ].extent() : Integer.MAX_VALUE;
if ( e < s ) s = e;
}
return s;
}
}
/** An optimised interval iterator with the same semantics as that implemented
* by {@link OrDocumentIterator}, but using just {@link IndexIterator#positionArray()}.
*/
protected class OrIndexIntervalIterator extends AbstractCompositeIndexIntervalIterator implements IntervalIterator {
@SuppressWarnings({ "unused", "hiding" })
private final static boolean DEBUG = false;
private final static boolean ASSERTS = false;
/** The index of this iterator. */
final Index index;
/** A heap-based semi-indirect priority queue used to keep track of the currently scanned positions. */
private final IntHeapSemiIndirectPriorityQueue positionQueue;
/** An array to hold the front of the position queue. */
private final int[] positionFront;
public OrIndexIntervalIterator( final Index index ) {
super( n );
this.index = index;
positionQueue = new IntHeapSemiIndirectPriorityQueue( curr );
positionFront = new int[ n ];
}
public void reset() throws IOException {
positionQueue.clear();
for ( int i = computeFront(), k; i-- != 0; ) {
k = front[ i ];
if ( indexIterator[ k ].index() == index ) {
position[ k ] = indexIterator[ k ].positionArray();
count[ k ] = indexIterator[ k ].count();
curr[ k ] = position[ k ][ currPos[ k ] = 0 ];
positionQueue.enqueue( k );
}
}
if ( ASSERTS ) assert ! positionQueue.isEmpty();
next = Interval.valueOf( curr[ positionQueue.first() ] );
}
public void intervalTerms( final IntSet terms ) {
final int frontSize = positionQueue.front( positionFront );
final int[] positionFront = this.positionFront;
for( int i = frontSize; i-- != 0; ) terms.add( indexIterator[ positionFront[ i ] ].termNumber() );
}
public Interval nextInterval() {
if ( next != null ) {
final Interval result = next;
next = null;
return result;
}
if ( positionQueue.isEmpty() ) return null;
int first = positionQueue.first();
final int previous = curr[ first ];
do
if ( ++currPos[ first ] == count[ first ] ) {
positionQueue.dequeue();
if ( positionQueue.isEmpty() ) return null;
}
else {
curr[ first ] = position[ first ][ currPos[ first ] ];
positionQueue.changed();
}
while ( curr[ first = positionQueue.first() ] == previous );
return Interval.valueOf( curr[ first ] );
}
public int extent() {
return 1;
}
}
}