package it.unimi.dsi.mg4j.search;
/*
* MG4J: Managing Gigabytes for Java
*
* Copyright (C) 2006-2010 Sebastiano Vigna
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*
*/
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.objects.ReferenceArraySet;
import it.unimi.dsi.fastutil.objects.ReferenceSet;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.mg4j.index.Index;
import it.unimi.dsi.mg4j.index.IndexIterator;
import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;
import it.unimi.dsi.util.Interval;
import java.io.IOException;
/** An abstract iterator on documents, based on a list of component iterators.
*
* <p>The {@linkplain #AbstractCompositeDocumentIterator(DocumentIterator[]) constructor} caches
* into {@link #documentIterator} the component iterators, and sets up a number of protected
* fields that can be useful to implementors. It also provide abstract member classes that make it
* easier to implement interval iterators.
*
* <p>Note that this class implements both {@link #accept(DocumentIteratorVisitor)}
* and {@link #acceptOnTruePaths(DocumentIteratorVisitor)} with a series of recursive
* calls on <em>all</em> component iterator. If you desire a different behaviour
* for {@link #acceptOnTruePaths(DocumentIteratorVisitor)} (see, e.g.,
* {@link it.unimi.dsi.mg4j.search.AbstractUnionDocumentIterator}, please override it.
*/
public abstract class AbstractCompositeDocumentIterator extends AbstractDocumentIterator implements DocumentIterator {
/** The number of component iterators. */
public final int n;
/** The component document iterators. */
protected final DocumentIterator[] documentIterator;
/** A cached copy of {@link #documentIterator}, if all
* underlying iterators are {@linkplain IndexIterator index iterators}; <code>null</code>, otherwise. */
protected final IndexIterator[] indexIterator;
/** The set of indices involved in this iterator. */
protected final ReferenceArraySet<Index> indices = new ReferenceArraySet<Index>();
/** If not <code>null</code>, the sole index involved in this iterator. */
protected final Index soleIndex;
/** Creates a new composite document iterator using a given list of component document iterators and
* a specified index.
*
* @param index an index that will constitute the only index for which this iterator will return intervals,
* or <code>null</code> to require the computation of the set of indices as the union of the indices
* of all component iterators.
* @param documentIterator the component iterators.
*/
protected AbstractCompositeDocumentIterator( final Index index, final DocumentIterator... documentIterator ) {
this.documentIterator = documentIterator;
this.n = documentIterator.length;
if ( index == null ) {
/* Now, for each index involved we build a corresponding interval iterator.
* Note that the set indices() may contain indices from empty subqueries, too. */
for( int i = n; i-- != 0; ) indices.addAll( documentIterator[ i ].indices() );
soleIndex = indices.size() == 1 ? indices.iterator().next() : null;
}
else {
soleIndex = index;
indices.add( index );
}
int i = n;
while( i-- != 0 ) if ( ! ( documentIterator[ i ] instanceof IndexIterator ) ) break;
if ( i == -1 ) {
indexIterator = new IndexIterator[ n ];
System.arraycopy( documentIterator, 0, indexIterator, 0, n );
}
else indexIterator = null;
}
/** Creates a new composite document iterator using a given list of component document iterators.
*
* @param documentIterator the component iterators.
*/
protected AbstractCompositeDocumentIterator( final DocumentIterator... documentIterator ) {
this( null, documentIterator );
}
public <T> T accept( final DocumentIteratorVisitor<T> visitor ) throws IOException {
if ( ! visitor.visitPre( this ) ) return null;
final T[] a = visitor.newArray( n );
if ( a == null ) {
for( int i = 0; i < n; i++ ) if ( documentIterator[ i ] != null && documentIterator[ i ].accept( visitor ) == null ) return null;
}
else {
for( int i = 0; i < n; i++ ) if ( documentIterator[ i ] != null && ( a[ i ] = documentIterator[ i ].accept( visitor ) ) == null ) return null;
}
return visitor.visitPost( this, a );
}
public <T> T acceptOnTruePaths( final DocumentIteratorVisitor<T> visitor ) throws IOException {
if ( ! visitor.visitPre( this ) ) return null;
final T[] a = visitor.newArray( n );
if ( a == null ) {
for( int i = 0; i < n; i++ ) if ( documentIterator[ i ] != null && documentIterator[ i ].acceptOnTruePaths( visitor ) == null ) return null;
}
else {
for( int i = 0; i < n; i++ ) if ( documentIterator[ i ] != null && ( a[ i ] = documentIterator[ i ].acceptOnTruePaths( visitor ) ) == null ) return null;
}
return visitor.visitPost( this, a );
}
public ReferenceSet<Index> indices() { return indices; }
public IntervalIterator intervalIterator() throws IOException {
if ( soleIndex == null ) throw new IllegalStateException();
return intervalIterator( soleIndex );
}
public void dispose() throws IOException {
for( int i = n; i-- != 0; ) documentIterator[ i ].dispose();
}
public String toString() {
StringBuilder res = new StringBuilder();
res.append( this.getClass().getSimpleName() ).append( "(" );
for ( int i = 0; i < n; i++ ) res.append( i > 0 ? "," : "" ).append( documentIterator[ i ] );
res.append( ")" );
if ( weight != 1 ) res.append( '{' ).append( weight ).append( '}' );
return res.toString();
}
/** An abstract interval iterator. Provide mainly storage for the {@linkplain #intervalIterator component interval iterators},
* place for {@linkplain #curr the last interval returned by each iterator} and {@link #toString()}. */
protected abstract static class AbstractCompositeIntervalIterator extends AbstractIntervalIterator {
/** The underlying iterators. */
protected IntervalIterator[] intervalIterator;
/** The last interval returned by each iterator. */
protected Interval[] curr;
public AbstractCompositeIntervalIterator( final int n ) {
// We just set up some internal data, but we perform no initialisation.
curr = new Interval[ n ];
intervalIterator = new IntervalIterator[ n ];
}
public String toString() {
MutableString res = new MutableString();
res.append( this.getClass().getName() ).append( "(" ).delete( 0, res.lastIndexOf( '.' ) + 1 );
for ( int i = 0; i < intervalIterator.length; i++ ) res.append( i > 0 ? "," : "" ).append( intervalIterator[ i ] );
return res.append( ")" ).toString();
}
}
/** An abstract {@link IndexIterator}-based interval iterator. The difference with {@link AbstractCompositeIntervalIterator}
* is that this class assumes that all document iterators are actually index iterators.
* The algorithms in this (very common) case can be significantly simplified, obtaining
* a large gain in performance. */
protected abstract static class AbstractCompositeIndexIntervalIterator extends AbstractIntervalIterator {
/** The position arrays returned by each index iterator. */
protected int[][] position;
/** The index of current element of {@link #position} for each index iterator. */
protected int[] currPos;
/** At any time, <code>curr[ i ]</code> contains <code>position[ i ][ currPos[i ] ]</code>. */
protected int[] curr;
/** The number of elements of {@link #position} for each index iterator. */
protected int[] count;
public AbstractCompositeIndexIntervalIterator( final int n ) {
// We just set up some internal data, but we perform no initialisation.
position = new int[ n ][];
count = new int[ n ];
currPos = new int[ n ];
curr = new int[ n ];
}
public String toString() {
MutableString res = new MutableString();
res.append( this.getClass().getName() ).append( "(" ).delete( 0, res.lastIndexOf( '.' ) + 1 );
for ( int i = 0; i < position.length; i++ ) res.append( i > 0 ? "," : "" ).append( IntArrayList.wrap( position[ i ], count[ i ] ) );
return res.append( ")" ).toString();
}
}
}