Package it.unimi.dsi.mg4j.search

Source Code of it.unimi.dsi.mg4j.search.AbstractUnionDocumentIterator

package it.unimi.dsi.mg4j.search;

/*    
* MG4J: Managing Gigabytes for Java
*
* Copyright (C) 2003-2010 Paolo Boldi and Sebastiano Vigna
*
*  This library is free software; you can redistribute it and/or modify it
*  under the terms of the GNU Lesser General Public License as published by the Free
*  Software Foundation; either version 3 of the License, or (at your option)
*  any later version.
*
*  This library is distributed in the hope that it will be useful, but
*  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
*  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
*  for more details.
*
*  You should have received a copy of the GNU Lesser General Public License
*  along with this program; if not, see <http://www.gnu.org/licenses/>.
*
*/

import it.unimi.dsi.fastutil.IndirectPriorityQueue;
import it.unimi.dsi.fastutil.ints.IntHeapSemiIndirectPriorityQueue;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceArrayMap;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;
import it.unimi.dsi.mg4j.index.Index;
import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;

import java.io.IOException;
import java.util.Iterator;

/**  A document iterator on documents, generating the union of the documents returned
* by a number of document iterators.
*
* <p>The pattern of this class is the same as that of {@link AbstractIntersectionDocumentIterator}.
* Additionally, this class provides a mechanism that makes accessible the set of component
* document iterators that are {@linkplain #computeFront() positioned on the current document}.
*/

public abstract class AbstractUnionDocumentIterator extends AbstractCompositeDocumentIterator {
  private final static boolean DEBUG = false;
  //private final static boolean ASSERTS = false;

  /** A heap-based semi-indirect priority queue used to keep track of the currently scanned integers. */
  final protected IntHeapSemiIndirectPriorityQueue queue;
  /** The {@link IndirectPriorityQueue#front(int[])} of {@link #queue}, if {@link #frontSize} is not -1. */
  final protected int[] front;
  /** The reference array used for the queue. */
  final protected int[] curr;
  /** A map from indices to interval iterators. */
  final private Reference2ReferenceArrayMap<Index,IntervalIterator> intervalIterators;
  /** A map from indices to the iterators returned for the current document. The key set may
   * not contain an index because the related iterator has never been requested. Moreover,
   * the iterator in this map for a given index may differ from the one in {@link #intervalIterators}
   * because it could be {@link IntervalIterators#TRUE} (in fact, in that case it may even
   * happen that {@link #intervalIterators} does not contain the index). */
  final private Reference2ReferenceArrayMap<Index,IntervalIterator> currentIterators;
  /** An unmodifiable wrapper around {@link #currentIterators}. */
  final private Reference2ReferenceMap<Index,IntervalIterator> unmodifiableCurrentIterators;

  /** The number of valid entries in {@link #front}, or -1 if the front has not been computed for the current document. */
  protected int frontSize = -1;

  /** Creates a new document iterator that computes the OR of the given array of iterators.
   *  @param documentIterator the iterators to be joined.
   * @throws IOException
   */
  protected AbstractUnionDocumentIterator( final DocumentIterator... documentIterator ) throws IOException {
    super( documentIterator );
    this.curr = new int[ n ];

    queue = new IntHeapSemiIndirectPriorityQueue( curr );

    intervalIterators = new Reference2ReferenceArrayMap<Index,IntervalIterator>( indices.size() );
    currentIterators = new Reference2ReferenceArrayMap<Index,IntervalIterator>( indices.size() );
    unmodifiableCurrentIterators = Reference2ReferenceMaps.unmodifiable( currentIterators );

    // Only add to the queue nonempty iterators...
    for ( int i = 0; i < n; i++ ) if ( ( curr[ i ] = documentIterator[ i ].nextDocument() ) != -1 ) queue.enqueue( i );
    // If queue is empty, the process is over
   
    if ( ! queue.isEmpty() ) next = curr[ queue.first() ];
    front = new int[ queue.size() ];
  }


  public int skipTo( final int n ) throws IOException {
    if ( last >= n ) return last;
    if ( next >= n ) return nextDocument();
    if ( queue.isEmpty() ) return Integer.MAX_VALUE;
    last = next = -1;
    currentIterators.clear();
    frontSize = -1; // Invalidate front

    int first, res;
    while( curr[ first = queue.first() ] < n ) {
      res = documentIterator[ first ].skipTo( n );

      // Cannot advance the minimum
      if ( res == Integer.MAX_VALUE ) {
        // Remove it
        queue.dequeue();
        // If nothing else remains, we are done
        if ( queue.isEmpty() ) return Integer.MAX_VALUE;
      }
      else {
        // Advance the top element, and signal this fact to the queue
        curr[ first ] = res;
        queue.changed();
      }
    }
   
    return last = curr[ first ];
  }

 
  public int nextDocument() throws IOException {
    if ( next != -1 ) {
      // We already know what to return
      last = next;
      next = -1;
      return last;
    }

    currentIterators.clear();
    frontSize = -1; // Invalidate front
    if ( queue.isEmpty() ) return last = -1;
   
    // The least element
    int first, c = curr[ queue.first() ];
 
    // Advance all elements equal to the least one
    while( curr[ first = queue.first() ] == c ) {
      if ( ( curr[ first ] = documentIterator[ first ].nextDocument() ) != - 1 ) queue.changed();
      else {
        // Remove it
        queue.dequeue();
        // If nothing else remains, we are done
        if ( queue.isEmpty() ) return last = -1;
      }
    }

    return last = curr[ first ];
  }
 
  /** Forces computation of the current front, returning the number of indices it contains.
   *
   * <p>After a call to this method,
   * the first elements of {@link #front} contain
   * the indices of the {@linkplain AbstractCompositeDocumentIterator#documentIterator component document iterators}
   * that are positioned on the current document. If the front has already been
   * computed for the current document, this method has no side effects.
   *
   * @return the size of the current front (the number of valid entries in {@link #front}).
   */
 
  protected int computeFront() {
    if ( frontSize == -1 ) frontSize = queue.front( front );
    return frontSize;
  }

  public Reference2ReferenceMap<Index,IntervalIterator> intervalIterators() throws IOException {
    final Iterator<Index> i = indices.iterator();
    while( i.hasNext() ) intervalIterator( i.next() );
    return unmodifiableCurrentIterators;
  }

  public IntervalIterator intervalIterator( final Index index ) throws IOException {
    if ( DEBUG ) System.err.println( this + ".intervalIterator(" + index + ")" );
    if ( last == -1 ) throw new IllegalStateException();
    if ( ! indices.contains( index ) ) return IntervalIterators.TRUE;

    IntervalIterator intervalIterator;

    // If the iterator has been created and it's ready, we just return it.   
    if ( ( intervalIterator = currentIterators.get( index ) ) != null ) return intervalIterator;

    boolean atLeastOneIsTrue = false;
 
    /* If all iterators are TRUE, we return TRUE. Otherwise,
     * we skip all FALSE iterators and all iterators whose document
     * is not last, and merge the remaining ones. If
     * all iterators are FALSE, we return FALSE.
     */

    // ALERT: this logic should be in OrDocumentIterator
    int c = 0; // This counts non-TRUE, non-FALSE interval iterators
    IntervalIterator oneNotTrue = null;
    for( int i = computeFront(); i -- != 0; ) {
      intervalIterator = documentIterator[ front[ i ] ].intervalIterator( index );
      if ( intervalIterator == IntervalIterators.TRUE ) atLeastOneIsTrue = true;
      else {
        oneNotTrue = intervalIterator;
        if ( intervalIterator != IntervalIterators.FALSE ) c++;
      }
    }

    // If c == 0, all component iterators are either TRUE or FALSE. We return TRUE if at least one is TRUE.
    if ( c == 0 ) intervalIterator = atLeastOneIsTrue? IntervalIterators.TRUE : IntervalIterators.FALSE;
    else if ( c == 1 ) intervalIterator = oneNotTrue; // If we exclude TRUE and FALSE iterators, just oneNotTrue remains: we pass it upwards.
    else {
      intervalIterator = intervalIterators.get( index );
      if ( intervalIterator == null ) intervalIterators.put( index, intervalIterator = getComposedIntervalIterator( index ) );
      intervalIterator.reset();
    }
       
    currentIterators.put( index, intervalIterator )
    return intervalIterator;
  }

  abstract protected IntervalIterator getComposedIntervalIterator( Index index );

  /** Invokes {@link #acceptOnTruePaths(DocumentIteratorVisitor)} only on component
   * iterators positioned on the current document.
   *
   * @param visitor a visitor.
   * @return true if the visit should continue.
   * @throws IOException
   */
 
  @Override
  public <T> T acceptOnTruePaths( DocumentIteratorVisitor<T> visitor ) throws IOException {
    if ( ! visitor.visitPre( this ) ) return null;
    final int s  = computeFront();
    final T[] a = visitor.newArray( s );
    if ( a == null ) {
      for( int i = 0; i < s; i++ ) if ( documentIterator[ front[ i ] ].acceptOnTruePaths( visitor ) == null ) return null;
    }
    else {
      for( int i = 0; i < s; i++ ) if ( ( a[ i ] = documentIterator[ front[ i ] ].acceptOnTruePaths( visitor ) ) == null ) return null;
    }
    return visitor.visitPost( this, a );
  }
}
TOP

Related Classes of it.unimi.dsi.mg4j.search.AbstractUnionDocumentIterator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.