Package it.unimi.dsi.mg4j.search

Source Code of it.unimi.dsi.mg4j.search.OrderedAndDocumentIterator$OrderedAndIndexIntervalIterator

package it.unimi.dsi.mg4j.search;

/*    
* MG4J: Managing Gigabytes for Java
*
* Copyright (C) 2003-2010 Paolo Boldi and Sebastiano Vigna
*
*  This library is free software; you can redistribute it and/or modify it
*  under the terms of the GNU Lesser General Public License as published by the Free
*  Software Foundation; either version 3 of the License, or (at your option)
*  any later version.
*
*  This library is distributed in the hope that it will be useful, but
*  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
*  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
*  for more details.
*
*  You should have received a copy of the GNU Lesser General Public License
*  along with this program; if not, see <http://www.gnu.org/licenses/>.
*
*/

import it.unimi.dsi.fastutil.ints.IntArrays;
import it.unimi.dsi.fastutil.ints.IntSet;
import it.unimi.dsi.mg4j.index.Index;
import it.unimi.dsi.util.Interval;
import it.unimi.dsi.util.Intervals;

import java.io.IOException;

/** An iterator returning documents containing nonoverlapping intervals in query order
* satisfying the underlying queries.
*
* <p>In practice, this iterator implements <em>strictly ordered AND</em>, which is
* satisfied when the subqueries are satisfied by nonoverlapping intervals in query order.
*/

public class OrderedAndDocumentIterator extends AbstractOrderedIntervalDocumentIterator {

  @SuppressWarnings("hiding")
  private final static boolean ASSERTS = false;
 
  /** Returns a document iterator that computes the ordered AND of the given array of iterators.
   *
   * <P>Note that the special case of the empty and of the singleton arrays
   * are handled efficiently.
   *
    * @param index the default index; relevant only if <code>it</code> has zero length.
   * @param documentIterator the iterators to be joined.
   * @return a document iterator that computes the ordered AND of <code>it</code>.
   * @throws IOException
   */
  public static DocumentIterator getInstance( final Index index, final DocumentIterator... documentIterator ) throws IOException {
    if ( documentIterator.length == 0 ) return TrueDocumentIterator.getInstance( index );
    if ( documentIterator.length == 1 ) return documentIterator[ 0 ];
    return new OrderedAndDocumentIterator( documentIterator );
  }
 
  /** Returns a document iterator that computes the ordered AND of the given nonzero-length array of iterators.
   *
   * <P>Note that the special case of the singleton array is handled efficiently.
   *
   * @param documentIterator the iterators to be joined (at least one).
   * @return a document iterator that computes the ordered AND of <code>it</code>.
   * @throws IOException
   */
  public static DocumentIterator getInstance( final DocumentIterator... documentIterator ) throws IOException {
    if ( documentIterator.length == 0 ) throw new IllegalArgumentException();
    return getInstance( null, documentIterator );
  }
 
  protected OrderedAndDocumentIterator( final DocumentIterator[] documentIterator ) throws IOException {
    super( documentIterator );
  }

  protected IntervalIterator getComposedIntervalIterator( final Index unused ) {
    if ( ASSERTS ) assert unused == soleIndex;
    return indexIterator == null ? new OrderedAndIntervalIterator() : new OrderedAndIndexIntervalIterator();
  }

  /** An interval iterator returning the ordered AND of the component iterators
   * (i.e., intervals made of sequences of intervals
   * of the component iterator, in the given order).
   *
   * <p>In this implementation, {@link #advanced} can be true
   * even when {@link AbstractOrderedIntervalIterator#endOfProcess} is true, as a candidate
   * can be ready to be returned even if the do-while loop in {@link #hasNext()} has
   * set {@link AbstractOrderedIntervalIterator#endOfProcess}.
   */
 
  private class OrderedAndIntervalIterator extends AbstractOrderedIntervalIterator {
    @SuppressWarnings("hiding")
    private final static boolean DEBUG = false;
    /** Whether the scan is over. */
    private boolean endOfProcess;
    /** The index of the next list to be aligned (from 0 to {@link #m}). */
    private int toBeAligned;
    /** The number of non-{@link IntervalIterators#TRUE} interval iterator. Only
     * elements with index smaller than this value are valid in {@link AbstractCompositeIntervalIterator#intervalIterator}. */
    private int m;

    /** Loads {@link #curr} with the first interval from each non-{@link IntervalIterators#TRUE} iterator, leaving
     * in {@link #m} the number of non-{@link IntervalIterators#TRUE} iterators.
     */
   
    public void reset() throws IOException {
      m = 0;
      next = null;
      toBeAligned = 1;
      endOfProcess = false;

      for( int i = 0; i < n; i++ ) {
        intervalIterator[ m ] = documentIterator[ i ].intervalIterator();
        if ( intervalIterator[ m ] != IntervalIterators.TRUE ) {
          if ( ASSERTS ) assert intervalIterator[ m ].hasNext();
          curr[ m++ ] = Intervals.MINUS_INFINITY;
        }
      }

      if ( m == 0 ) throw new IllegalStateException();
      endOfProcess = ( curr[ 0 ] = intervalIterator[ 0 ].nextInterval() ) == null;
    }

    public void intervalTerms( final IntSet terms ) {
      for( int i = n; i-- != 0; ) intervalIterator[ i ].intervalTerms( terms );
    }

    public Interval nextInterval() throws IOException {
      if ( next != null ) {
        final Interval result = next;
        next = null;
        return result;
      }

      if ( endOfProcess ) return null;
     
      final Interval[] curr = this.curr;
      final IntervalIterator[] intervalIterator = this.intervalIterator;
      final int m = this.m;
      // We have to decrease leftOfLast to avoid overflows. Do not test it against Integer.MAX_VALUE.
      int nextLeft = Integer.MAX_VALUE, nextRight = Integer.MAX_VALUE, leftOfLast = Integer.MAX_VALUE - 1;

      int i = toBeAligned;

      for(;;) {
        if ( DEBUG ) System.err.println( "Current candidate: " + Interval.valueOf( nextLeft, nextRight ) );

        for(;;) {

          if ( curr[ i - 1 ].right >= leftOfLast - ( m - i - 1 ) ) {
            // If we're here the last interval we obtained is aligned, but it cannot completed to an alignment smaller than [nextLeft..nextRight]
            toBeAligned = i;
            if ( ASSERTS ) assert nextLeft != Integer.MAX_VALUE;
            return Interval.valueOf( nextLeft, nextRight );
          }

          if ( i == m || curr[ i ].left > curr[ i - 1 ].right ) break;

          do {
            if ( curr[ i ].right >= leftOfLast - ( m - i - 2 ) || ( curr[ i ] = intervalIterator[ i ].nextInterval() ) == null ) {
              toBeAligned = i;
              endOfProcess = curr[ i ] == null;
              return nextLeft == Integer.MAX_VALUE ? null : Interval.valueOf( nextLeft, nextRight );
            }
          } while ( curr[ i ].left <= curr[ i - 1 ].right );
         
          i++;
        }
       
        nextLeft = curr[ 0 ].left;
        nextRight = curr[ m - 1 ].right;
        leftOfLast = curr[ m - 1 ].left;
        i = 1;
       
        if ( ( curr[ 0 ] = intervalIterator[ 0 ].nextInterval() ) == null ) {
          endOfProcess = true;
          toBeAligned = 1;
          return Interval.valueOf( nextLeft, nextRight );
        }
      }
    }
   
    public int extent() {
      int s = 0;
      for ( int i = m; i-- != 0; ) s += intervalIterator[ i ].extent();
      return s;
    }
  }
 
  /** An interval iterator returning the BLOCK of the component iterator
   * (i.e., intervals made of sequences of consecutive intervals
   * of the component iterator, in the given order).
   *
   * <p>In this implementation, {@link #advanced} is
   * never true when {@link AbstractOrderedIntervalIterator#endOfProcess} is true.
   */
 
  private class OrderedAndIndexIntervalIterator extends AbstractOrderedIndexIntervalIterator {
    /** Whether the scan is over. */
    private boolean endOfProcess;
    /** The index of the next list to be aligned. */
    private int toBeAligned;
   
    public void reset() throws IOException {
      final int[][] position = this.position;
      final int[] curr = this.curr;
      final int[] count = this.count;

      IntArrays.fill( currPos, -1 );
      for( int i = n; i-- != 0; ) {
        count[ i ] = indexIterator[ i ].count();
        position[ i ] = indexIterator[ i ].positionArray();
        curr[ i ] = Integer.MIN_VALUE;
      }
      next = null;
      toBeAligned = 1;
      endOfProcess = false;
      curr[ 0 ] = position[ 0 ][ currPos[ 0 ] = 0 ];
    }
 
    public void intervalTerms( final IntSet terms ) {
      for( int i = n; i-- != 0; ) terms.add( indexIterator[ i ].termNumber() );
    }

    public Interval nextInterval() {
      if ( next != null ) {
        final Interval result = next;
        next = null;
        return result;
      }

      if ( endOfProcess ) return null;
     
      // We have to decrease nextRight to avoid overflows. Do not test it against Integer.MAX_VALUE.
      int nextLeft = Integer.MAX_VALUE, nextRight = Integer.MAX_VALUE - 1;
      final int[][] position = this.position;
      final int[] currPos = this.currPos;
      final int[] count = this.count;
      final int[] curr = this.curr;
      final int n = OrderedAndDocumentIterator.this.n;
     
      int i = toBeAligned;

      for(;;) {
        if ( DEBUG ) System.err.println( "Current candidate: " + Interval.valueOf( nextLeft, nextRight ) );
        for(;;) {
          if ( curr[ i - 1 ] >= nextRight - ( n - i - 1 ) ) {
            // If we're here the last position we obtained is aligned, but it cannot completed to an alignment smaller than [nextLeft..nextRight]
            toBeAligned = i;
            if ( ASSERTS ) assert nextLeft != Integer.MAX_VALUE;
            return Interval.valueOf( nextLeft, nextRight );
          }

          // Note that in this particular case we must check that this is not the first iteration of the external loop
          if ( i == n || curr[ i ] > curr[ i - 1 ] ) break;
         
          do {
            // For singletons, curr[ i ] >= nextRight - ( n - i - 2 ) is always false here.
            if ( ASSERTS ) assert curr[ i ] < nextRight - ( n - i - 2 );
            if ( ++currPos[ i ] == count[ i ] ) {
              endOfProcess = true;
              return nextLeft == Integer.MAX_VALUE ? null : Interval.valueOf( nextLeft, nextRight );
            }
            else curr[ i ] = position[ i ][ currPos[ i ] ];
          } while ( curr[ i ] <= curr[ i - 1 ] );
         
          i++;
        }
       
        nextLeft = curr[ 0 ];
        nextRight = curr[ n - 1 ];
        i = 1;
       
        if ( ++currPos[ 0 ] == count[ 0 ]  ) {
          endOfProcess = true;
          return Interval.valueOf( nextLeft, nextRight );
        }

        curr[ 0 ] = position[ 0 ][ currPos[ 0 ] ];
      }
    }
  }
}
TOP

Related Classes of it.unimi.dsi.mg4j.search.OrderedAndDocumentIterator$OrderedAndIndexIntervalIterator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.