package it.unimi.dsi.mg4j.search;
/*
* MG4J: Managing Gigabytes for Java
*
* Copyright (C) 2003-2010 Paolo Boldi and Sebastiano Vigna
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*
*/
import it.unimi.dsi.fastutil.IndirectPriorityQueue;
import it.unimi.dsi.fastutil.ints.IntHeapSemiIndirectPriorityQueue;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceArrayMap;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;
import it.unimi.dsi.mg4j.index.Index;
import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;
import java.io.IOException;
import java.util.Iterator;
/** A document iterator on documents, generating the union of the documents returned
* by a number of document iterators.
*
* <p>The pattern of this class is the same as that of {@link AbstractIntersectionDocumentIterator}.
* Additionally, this class provides a mechanism that makes accessible the set of component
* document iterators that are {@linkplain #computeFront() positioned on the current document}.
*/
public abstract class AbstractUnionDocumentIterator extends AbstractCompositeDocumentIterator {
private final static boolean DEBUG = false;
//private final static boolean ASSERTS = false;
/** A heap-based semi-indirect priority queue used to keep track of the currently scanned integers. */
final protected IntHeapSemiIndirectPriorityQueue queue;
/** The {@link IndirectPriorityQueue#front(int[])} of {@link #queue}, if {@link #frontSize} is not -1. */
final protected int[] front;
/** The reference array used for the queue. */
final protected int[] curr;
/** A map from indices to interval iterators. */
final private Reference2ReferenceArrayMap<Index,IntervalIterator> intervalIterators;
/** A map from indices to the iterators returned for the current document. The key set may
* not contain an index because the related iterator has never been requested. Moreover,
* the iterator in this map for a given index may differ from the one in {@link #intervalIterators}
* because it could be {@link IntervalIterators#TRUE} (in fact, in that case it may even
* happen that {@link #intervalIterators} does not contain the index). */
final private Reference2ReferenceArrayMap<Index,IntervalIterator> currentIterators;
/** An unmodifiable wrapper around {@link #currentIterators}. */
final private Reference2ReferenceMap<Index,IntervalIterator> unmodifiableCurrentIterators;
/** The number of valid entries in {@link #front}, or -1 if the front has not been computed for the current document. */
protected int frontSize = -1;
/** Creates a new document iterator that computes the OR of the given array of iterators.
* @param documentIterator the iterators to be joined.
* @throws IOException
*/
protected AbstractUnionDocumentIterator( final DocumentIterator... documentIterator ) throws IOException {
super( documentIterator );
this.curr = new int[ n ];
queue = new IntHeapSemiIndirectPriorityQueue( curr );
intervalIterators = new Reference2ReferenceArrayMap<Index,IntervalIterator>( indices.size() );
currentIterators = new Reference2ReferenceArrayMap<Index,IntervalIterator>( indices.size() );
unmodifiableCurrentIterators = Reference2ReferenceMaps.unmodifiable( currentIterators );
// Only add to the queue nonempty iterators...
for ( int i = 0; i < n; i++ ) if ( ( curr[ i ] = documentIterator[ i ].nextDocument() ) != -1 ) queue.enqueue( i );
// If queue is empty, the process is over
if ( ! queue.isEmpty() ) next = curr[ queue.first() ];
front = new int[ queue.size() ];
}
public int skipTo( final int n ) throws IOException {
if ( last >= n ) return last;
if ( next >= n ) return nextDocument();
if ( queue.isEmpty() ) return Integer.MAX_VALUE;
last = next = -1;
currentIterators.clear();
frontSize = -1; // Invalidate front
int first, res;
while( curr[ first = queue.first() ] < n ) {
res = documentIterator[ first ].skipTo( n );
// Cannot advance the minimum
if ( res == Integer.MAX_VALUE ) {
// Remove it
queue.dequeue();
// If nothing else remains, we are done
if ( queue.isEmpty() ) return Integer.MAX_VALUE;
}
else {
// Advance the top element, and signal this fact to the queue
curr[ first ] = res;
queue.changed();
}
}
return last = curr[ first ];
}
public int nextDocument() throws IOException {
if ( next != -1 ) {
// We already know what to return
last = next;
next = -1;
return last;
}
currentIterators.clear();
frontSize = -1; // Invalidate front
if ( queue.isEmpty() ) return last = -1;
// The least element
int first, c = curr[ queue.first() ];
// Advance all elements equal to the least one
while( curr[ first = queue.first() ] == c ) {
if ( ( curr[ first ] = documentIterator[ first ].nextDocument() ) != - 1 ) queue.changed();
else {
// Remove it
queue.dequeue();
// If nothing else remains, we are done
if ( queue.isEmpty() ) return last = -1;
}
}
return last = curr[ first ];
}
/** Forces computation of the current front, returning the number of indices it contains.
*
* <p>After a call to this method,
* the first elements of {@link #front} contain
* the indices of the {@linkplain AbstractCompositeDocumentIterator#documentIterator component document iterators}
* that are positioned on the current document. If the front has already been
* computed for the current document, this method has no side effects.
*
* @return the size of the current front (the number of valid entries in {@link #front}).
*/
protected int computeFront() {
if ( frontSize == -1 ) frontSize = queue.front( front );
return frontSize;
}
public Reference2ReferenceMap<Index,IntervalIterator> intervalIterators() throws IOException {
final Iterator<Index> i = indices.iterator();
while( i.hasNext() ) intervalIterator( i.next() );
return unmodifiableCurrentIterators;
}
public IntervalIterator intervalIterator( final Index index ) throws IOException {
if ( DEBUG ) System.err.println( this + ".intervalIterator(" + index + ")" );
if ( last == -1 ) throw new IllegalStateException();
if ( ! indices.contains( index ) ) return IntervalIterators.TRUE;
IntervalIterator intervalIterator;
// If the iterator has been created and it's ready, we just return it.
if ( ( intervalIterator = currentIterators.get( index ) ) != null ) return intervalIterator;
boolean atLeastOneIsTrue = false;
/* If all iterators are TRUE, we return TRUE. Otherwise,
* we skip all FALSE iterators and all iterators whose document
* is not last, and merge the remaining ones. If
* all iterators are FALSE, we return FALSE.
*/
// ALERT: this logic should be in OrDocumentIterator
int c = 0; // This counts non-TRUE, non-FALSE interval iterators
IntervalIterator oneNotTrue = null;
for( int i = computeFront(); i -- != 0; ) {
intervalIterator = documentIterator[ front[ i ] ].intervalIterator( index );
if ( intervalIterator == IntervalIterators.TRUE ) atLeastOneIsTrue = true;
else {
oneNotTrue = intervalIterator;
if ( intervalIterator != IntervalIterators.FALSE ) c++;
}
}
// If c == 0, all component iterators are either TRUE or FALSE. We return TRUE if at least one is TRUE.
if ( c == 0 ) intervalIterator = atLeastOneIsTrue? IntervalIterators.TRUE : IntervalIterators.FALSE;
else if ( c == 1 ) intervalIterator = oneNotTrue; // If we exclude TRUE and FALSE iterators, just oneNotTrue remains: we pass it upwards.
else {
intervalIterator = intervalIterators.get( index );
if ( intervalIterator == null ) intervalIterators.put( index, intervalIterator = getComposedIntervalIterator( index ) );
intervalIterator.reset();
}
currentIterators.put( index, intervalIterator );
return intervalIterator;
}
abstract protected IntervalIterator getComposedIntervalIterator( Index index );
/** Invokes {@link #acceptOnTruePaths(DocumentIteratorVisitor)} only on component
* iterators positioned on the current document.
*
* @param visitor a visitor.
* @return true if the visit should continue.
* @throws IOException
*/
@Override
public <T> T acceptOnTruePaths( DocumentIteratorVisitor<T> visitor ) throws IOException {
if ( ! visitor.visitPre( this ) ) return null;
final int s = computeFront();
final T[] a = visitor.newArray( s );
if ( a == null ) {
for( int i = 0; i < s; i++ ) if ( documentIterator[ front[ i ] ].acceptOnTruePaths( visitor ) == null ) return null;
}
else {
for( int i = 0; i < s; i++ ) if ( ( a[ i ] = documentIterator[ front[ i ] ].acceptOnTruePaths( visitor ) ) == null ) return null;
}
return visitor.visitPost( this, a );
}
}