* MG4J: Managing Gigabytes for Java
* Copyright (C) 2008-2010 Sebastiano Vigna
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see <>.
import it.unimi.dsi.fastutil.ints.IntSet;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceArrayMap;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;
import it.unimi.dsi.fastutil.objects.ReferenceSet;
import it.unimi.dsi.mg4j.index.Index;
import it.unimi.dsi.mg4j.index.IndexIterator;
import it.unimi.dsi.util.Interval;
/** A document iterator that aligns the results of a number of document iterators over
* different indices.
* <p>This class is an example of cross-index computation. As in the case of an
* {@link AndDocumentIterator}, we intersect the posting lists. However, once
* we get to the index level, we actually return just intervals that appear in
* <em>all</em> component iterators. Of course, this is meaningful only if all
* indices represent different views on the same data, a typical example being
* semantic tagging.
* <p>An instance of this class exposes a single interval iterator associated to
* the index of the <em>first</em> component iterator, as all interval iterators
* are exhausted during the computation of their intersection.
* Correspondingly, a call to {@link IntervalIterator#intervalTerms(IntSet)} just
* returns the terms related to the <em>first</em> component iterator.
public class AlignDocumentIterator extends AbstractDocumentIterator {
private final static boolean DEBUG = false;
/** The first operand, to be aligned. */
final private DocumentIterator aligneeIterator;
/** The second operand, to be used to align the first operand. */
final private DocumentIterator alignerIterator;
/** {@link #aligneeIterator}, if it is an {@link IndexIterator}. */
final private IndexIterator aligneeIndexIterator;
/** {@link #alignerIterator}, if it is an {@link IndexIterator}. */
final private IndexIterator alignerIndexIterator;
/** The sole index involved in this iterator. */
final private Index index;
/** A singleton containing {@link #currentIterator}. */
final private Reference2ReferenceMap<Index,IntervalIterator> currentIterators;
/** An unmodifiable wrapper around {@link #currentIterator}. */
final private Reference2ReferenceMap<Index,IntervalIterator> unmodifiableCurrentIterators;
/** The interval iterator associated to this document iterator, or <code>null</code>. */
private IntervalIterator intervalIterator;
/** The iterator returned for the current document, if any, or <code>null</code>. */
private IntervalIterator currentIterator;
/** Returns a document iterator that aligns the first iterator to the second.
* @param aligneeIterator the iterator to be aligned.
* @param alignerIterator the iterator used to align <code>aligneeIterator</code>.
* @return a document iterator that computes the alignment of <code>aligneeIterator</code> on <code>alignerIterator</code>.
public static DocumentIterator getInstance( final DocumentIterator aligneeIterator, final DocumentIterator alignerIterator ) {
return new AlignDocumentIterator( aligneeIterator, alignerIterator );
protected AlignDocumentIterator( final DocumentIterator aligneeIterator, final DocumentIterator alignerIterator ) {
this.aligneeIterator = aligneeIterator;
this.alignerIterator = alignerIterator;
if ( aligneeIterator instanceof IndexIterator && alignerIterator instanceof IndexIterator ) {
aligneeIndexIterator = (IndexIterator)aligneeIterator;
alignerIndexIterator = (IndexIterator)alignerIterator;
else aligneeIndexIterator = alignerIndexIterator = null;
if ( aligneeIterator.indices().size() != 1 || alignerIterator.indices().size() != 1 ) throw new IllegalArgumentException( "You can align single-index iterators only" );
index = aligneeIterator.indices().iterator().next();
currentIterators = new Reference2ReferenceArrayMap<Index,IntervalIterator>( 1 );
unmodifiableCurrentIterators = Reference2ReferenceMaps.unmodifiable( currentIterators );
public ReferenceSet<Index> indices() {
return aligneeIterator.indices();
public int nextDocument() throws IOException {
if ( next >= 0 ) {
last = next;
next = -1;
return last;
currentIterator = null;
int alignee, aligner;
alignee = aligneeIterator.nextDocument();
aligner = alignerIterator.nextDocument();
while( alignee != -1 && aligner != -1 ) {
currentIterator = null;
if ( alignee < aligner ) alignee = aligneeIterator.nextDocument();
else if ( aligner < alignee ) aligner = alignerIterator.nextDocument();
else {
last = alignee;
if ( intervalIterator().hasNext() ) return last;
alignee = aligneeIterator.nextDocument();
aligner = alignerIterator.nextDocument();
return last = next = -1;
public int skipTo( final int n ) throws IOException {
// The easy case.
if ( last >= n ) return last;
if ( next >= n ) return nextDocument();
last = next = -1;
currentIterator = null;
int alignee, aligner;
alignee = aligneeIterator.skipTo( n );
aligner = alignerIterator.skipTo( n );
while( alignee != Integer.MAX_VALUE && aligner != Integer.MAX_VALUE ) {
currentIterator = null;
if ( alignee < aligner ) alignee = aligneeIterator.skipTo( aligner );
else if ( aligner < alignee ) aligner = alignerIterator.skipTo( alignee );
else {
last = alignee;
if ( intervalIterator().hasNext() ) {
next = -1;
return last;
alignee = aligneeIterator.nextDocument();
aligner = alignerIterator.nextDocument();
if ( aligner == -1 || alignee == -1 ) break;
last = -1;
return Integer.MAX_VALUE;
public IntervalIterator intervalIterator() throws IOException {
return intervalIterator( index );
public Reference2ReferenceMap<Index, IntervalIterator> intervalIterators() throws IOException {
currentIterators.put( index, intervalIterator() );
return unmodifiableCurrentIterators;
public IntervalIterator intervalIterator( final Index index ) throws IOException {
if ( DEBUG ) System.err.println( this + ".intervalIterator(" + index + ")" );
if ( last == -1 ) throw new IllegalStateException();
if ( index != this.index ) return IntervalIterators.TRUE;
// If the iterator has been created and it's ready, we just return it.
if ( currentIterator != null ) return currentIterator;
final IntervalIterator aligneeIntervalIterator = aligneeIterator.intervalIterator(), alignerIntervalIterator = alignerIterator.intervalIterator();
if ( aligneeIntervalIterator == IntervalIterators.TRUE || alignerIntervalIterator == IntervalIterators.TRUE )
return currentIterator = aligneeIntervalIterator == alignerIntervalIterator ? IntervalIterators.TRUE : IntervalIterators.FALSE;
if ( intervalIterator == null ) intervalIterator = aligneeIndexIterator == null ? new AlignIntervalIterator() : new AlignIndexIntervalIterator();
return currentIterator = intervalIterator;
public void dispose() throws IOException {
public <T> T accept( final DocumentIteratorVisitor<T> visitor ) throws IOException {
if ( ! visitor.visitPre( this ) ) return null;
final T[] a = visitor.newArray( 2 );
if ( a == null ) {
if ( aligneeIterator.accept( visitor ) == null ) return null;
if ( alignerIterator.accept( visitor ) == null ) return null;
else {
if ( ( a[ 0 ] = aligneeIterator.accept( visitor ) ) == null ) return null;
if ( ( a[ 1 ] = alignerIterator.accept( visitor ) ) == null ) return null;
return visitor.visitPost( this, a );
public <T> T acceptOnTruePaths( final DocumentIteratorVisitor<T> visitor ) throws IOException {
if ( ! visitor.visitPre( this ) ) return null;
final T[] a = visitor.newArray( 1 );
if ( a == null ) {
if ( aligneeIterator.acceptOnTruePaths( visitor ) == null ) return null;
else {
if ( ( a[ 0 ] = aligneeIterator.acceptOnTruePaths( visitor ) ) == null ) return null;
return visitor.visitPost( this, a );
/** An interval iterator returning the intersection of the component interval iterators. */
private class AlignIntervalIterator extends AbstractIntervalIterator implements IntervalIterator {
/** The interval iterator of the alignee iterator. */
private IntervalIterator aligneeIntervalIterator;
/** The interval iterator of the aligner iterator. */
private IntervalIterator alignerIntervalIterator;
/** Whether the scan is over. */
private boolean endOfProcess;
public void reset() throws IOException {
next = null;
endOfProcess = false;
aligneeIntervalIterator = aligneeIterator.intervalIterator();
alignerIntervalIterator = alignerIterator.intervalIterator();
if ( aligneeIntervalIterator == IntervalIterators.TRUE || alignerIntervalIterator == IntervalIterators.TRUE ) {
// If by any chance we meet a TRUE iterator we are just false
endOfProcess = true;
public void intervalTerms( final IntSet terms ) {
aligneeIntervalIterator.intervalTerms( terms );
public Interval nextInterval() throws IOException {
if ( next != null ) {
final Interval result = next;
next = null;
return result;
if ( endOfProcess ) return null;
Interval aligneeInterval = null, alignerInterval = null;
aligneeInterval = aligneeIntervalIterator.nextInterval();
alignerInterval = alignerIntervalIterator.nextInterval();
if ( aligneeInterval == null || alignerInterval == null ) {
endOfProcess = true;
return null;
while ( ! aligneeInterval.equals( alignerInterval ) ) {
if ( aligneeInterval.left <= alignerInterval.left ) {
if ( ( aligneeInterval = aligneeIntervalIterator.nextInterval() ) == null ) {
endOfProcess = true;
return null;
else {
if ( ( alignerInterval = alignerIntervalIterator.nextInterval() ) == null ) {
endOfProcess = true;
return null;
return aligneeInterval;
public int extent() {
return aligneeIntervalIterator.extent();
/** An interval iterator returning the intersection of the component interval iterators. */
private class AlignIndexIntervalIterator extends AbstractIntervalIterator implements IntervalIterator {
/** Whether the scan is over. */
private boolean endOfProcess;
/** The positions of the alignee iterator. */
private int[] aligneePosition;
/** The positions of the aligner iterator. */
private int[] alignerPosition;
/** The count of the alignee iterator. */
private int aligneeCount;
/** The count of the aligner iterator. */
private int alignerCount;
/** The position of the alignee iterator. */
private int aligneeCurr;
/** The position of the aligner iterator. */
private int alignerCurr;
public void reset() throws IOException {
next = null;
endOfProcess = false;
aligneePosition = aligneeIndexIterator.positionArray();
alignerPosition = alignerIndexIterator.positionArray();
aligneeCount = aligneeIndexIterator.count();
alignerCount = alignerIndexIterator.count();
aligneeCurr = alignerCurr = -1;
public void intervalTerms( final IntSet terms ) {
terms.add( aligneeIndexIterator.termNumber() );
public Interval nextInterval() {
if ( next != null ) {
final Interval result = next;
next = null;
return result;
if ( endOfProcess ) return null;
final int[] aligneePosition = this.aligneePosition, alignerPosition = this.alignerPosition;
if ( ++aligneeCurr == aligneeCount || ++alignerCurr == alignerCount ) {
endOfProcess = true;
return null;
while ( aligneePosition[ aligneeCurr ] != alignerPosition[ alignerCurr ] ) {
if ( aligneePosition[ aligneeCurr ] < alignerPosition[ alignerCurr ] ) {
if ( ++aligneeCurr == aligneeCount ) {
endOfProcess = true;
return null;
else {
if ( ++alignerCurr == alignerCount ) {
endOfProcess = true;
return null;
return Interval.valueOf( alignerPosition[ alignerCurr ] );
public int extent() {
return 1;