Package it.unimi.dsi.mg4j.search

Source Code of it.unimi.dsi.mg4j.search.AlignDocumentIterator

package it.unimi.dsi.mg4j.search;

/*    
* MG4J: Managing Gigabytes for Java
*
* Copyright (C) 2008-2010 Sebastiano Vigna
*
*  This library is free software; you can redistribute it and/or modify it
*  under the terms of the GNU Lesser General Public License as published by the Free
*  Software Foundation; either version 3 of the License, or (at your option)
*  any later version.
*
*  This library is distributed in the hope that it will be useful, but
*  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
*  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
*  for more details.
*
*  You should have received a copy of the GNU Lesser General Public License
*  along with this program; if not, see <http://www.gnu.org/licenses/>.
*
*/

import it.unimi.dsi.fastutil.ints.IntSet;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceArrayMap;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;
import it.unimi.dsi.fastutil.objects.ReferenceSet;
import it.unimi.dsi.mg4j.index.Index;
import it.unimi.dsi.mg4j.index.IndexIterator;
import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;
import it.unimi.dsi.util.Interval;

import java.io.IOException;


/** A document iterator that aligns the results of a number of document iterators over
* different indices.
*
* <p>This class is an example of cross-index computation. As in the case of an
* {@link AndDocumentIterator}, we intersect the posting lists. However, once
* we get to the index level, we actually return just intervals that appear in
* <em>all</em> component iterators. Of course, this is meaningful only if all
* indices represent different views on the same data, a typical example being
* semantic tagging.
*
* <p>An instance of this class exposes a single interval iterator associated to
* the index of the <em>first</em> component iterator, as all interval iterators
* are exhausted during the computation of their intersection.
* Correspondingly, a call to {@link IntervalIterator#intervalTerms(IntSet)} just
* returns the terms related to the <em>first</em> component iterator.
*/

public class AlignDocumentIterator extends AbstractDocumentIterator {
  private final static boolean DEBUG = false;

  /** The first operand, to be aligned. */
  final private DocumentIterator aligneeIterator;
  /** The second operand, to be used to align the first operand. */
  final private DocumentIterator alignerIterator;
  /** {@link #aligneeIterator}, if it is an {@link IndexIterator}. */
  final private IndexIterator aligneeIndexIterator;
  /** {@link #alignerIterator}, if it is an {@link IndexIterator}. */
  final private IndexIterator alignerIndexIterator;
  /** The sole index involved in this iterator. */
  final private Index index;
  /** A singleton containing {@link #currentIterator}. */
  final private Reference2ReferenceMap<Index,IntervalIterator> currentIterators;
  /** An unmodifiable wrapper around {@link #currentIterator}. */
  final private Reference2ReferenceMap<Index,IntervalIterator> unmodifiableCurrentIterators;
  /** The interval iterator associated to this document iterator, or <code>null</code>. */
  private IntervalIterator intervalIterator;
  /** The iterator returned for the current document, if any, or <code>null</code>. */
  private IntervalIterator currentIterator;

  /** Returns a document iterator that aligns the first iterator to the second.
   *
   * @param aligneeIterator the iterator to be aligned.
   * @param alignerIterator the iterator used to align <code>aligneeIterator</code>.
   *
   * @return a document iterator that computes the alignment of <code>aligneeIterator</code> on <code>alignerIterator</code>.
   */
  public static DocumentIterator getInstance( final DocumentIterator aligneeIterator, final DocumentIterator alignerIterator ) {
    return new AlignDocumentIterator( aligneeIterator, alignerIterator );
  }

  protected AlignDocumentIterator( final DocumentIterator aligneeIterator, final DocumentIterator alignerIterator ) {
    this.aligneeIterator = aligneeIterator;
    this.alignerIterator = alignerIterator;
    if ( aligneeIterator instanceof IndexIterator && alignerIterator instanceof IndexIterator ) {
      aligneeIndexIterator = (IndexIterator)aligneeIterator;
      alignerIndexIterator = (IndexIterator)alignerIterator;
    }
    else aligneeIndexIterator = alignerIndexIterator = null;
    if ( aligneeIterator.indices().size() != 1 || alignerIterator.indices().size() != 1 ) throw new IllegalArgumentException( "You can align single-index iterators only" );
    index = aligneeIterator.indices().iterator().next();
    currentIterators = new Reference2ReferenceArrayMap<Index,IntervalIterator>( 1 );
    unmodifiableCurrentIterators = Reference2ReferenceMaps.unmodifiable( currentIterators );
  }

  public ReferenceSet<Index> indices() {
    return aligneeIterator.indices();
  }
 
  public int nextDocument() throws IOException {
    if ( next >= 0 ) {
      last = next;
      next = -1;
      return last;
    }
   
    currentIterators.clear();
    currentIterator = null;

    int alignee, aligner;
    alignee = aligneeIterator.nextDocument();
    aligner = alignerIterator.nextDocument();
   
    while( alignee != -1 && aligner != -1 ) {
      currentIterator = null;

      if ( alignee < aligner ) alignee = aligneeIterator.nextDocument();
      else if ( aligner < alignee ) aligner = alignerIterator.nextDocument();
      else {
        last = alignee;
        if ( intervalIterator().hasNext() ) return last;
        alignee = aligneeIterator.nextDocument();
        aligner = alignerIterator.nextDocument();
      }
    }

    return last = next = -1;
 
 
  public int skipTo( final int n ) throws IOException {
    // The easy case.
    if ( last >= n ) return last;
    if ( next >= n ) return nextDocument();
    last = next = -1;
   
    currentIterators.clear();
    currentIterator = null;

    int alignee, aligner;
    alignee = aligneeIterator.skipTo( n );
    aligner = alignerIterator.skipTo( n );
   
    while( alignee != Integer.MAX_VALUE && aligner != Integer.MAX_VALUE ) {
      currentIterator = null;

      if ( alignee < aligner ) alignee = aligneeIterator.skipTo( aligner );
      else if ( aligner < alignee ) aligner = alignerIterator.skipTo( alignee );
      else {
        last = alignee;
        if ( intervalIterator().hasNext() ) {
          next = -1;
          return last;
        }
        alignee = aligneeIterator.nextDocument();
        aligner = alignerIterator.nextDocument();
        if ( aligner == -1 || alignee == -1 ) break;
      }
    }
   
    last = -1;
    return Integer.MAX_VALUE;
  }

  public IntervalIterator intervalIterator() throws IOException {
    return intervalIterator( index );
  }

  public Reference2ReferenceMap<Index, IntervalIterator> intervalIterators() throws IOException {
    currentIterators.put( index, intervalIterator() );
    return unmodifiableCurrentIterators;
  }

  public IntervalIterator intervalIterator( final Index index ) throws IOException {
    if ( DEBUG ) System.err.println( this + ".intervalIterator(" + index + ")" );
    if ( last == -1 ) throw new IllegalStateException();
    if ( index != this.index ) return IntervalIterators.TRUE;

    // If the iterator has been created and it's ready, we just return it.   
    if ( currentIterator != null ) return currentIterator;
     
    final IntervalIterator aligneeIntervalIterator = aligneeIterator.intervalIterator(), alignerIntervalIterator = alignerIterator.intervalIterator();
   
    if ( aligneeIntervalIterator == IntervalIterators.TRUE || alignerIntervalIterator == IntervalIterators.TRUE )
      return currentIterator = aligneeIntervalIterator == alignerIntervalIterator ? IntervalIterators.TRUE : IntervalIterators.FALSE;

    if ( intervalIterator == null ) intervalIterator = aligneeIndexIterator == null ? new AlignIntervalIterator() : new AlignIndexIntervalIterator();

    intervalIterator.reset();
    return currentIterator = intervalIterator;
  }
 
  public void dispose() throws IOException {
    aligneeIterator.dispose();
    alignerIterator.dispose();
  }
 
  public <T> T accept( final DocumentIteratorVisitor<T> visitor ) throws IOException {
    if ( ! visitor.visitPre( this ) ) return null;
    final T[] a = visitor.newArray( 2 );
    if ( a == null ) {
      if ( aligneeIterator.accept( visitor ) == null ) return null;
      if ( alignerIterator.accept( visitor ) == null ) return null;
    }
    else {
      if ( ( a[ 0 ] = aligneeIterator.accept( visitor ) ) == null ) return null;
      if ( ( a[ 1 ] = alignerIterator.accept( visitor ) ) == null ) return null;
    }
    return visitor.visitPost( this, a );
  }

  public <T> T acceptOnTruePaths( final DocumentIteratorVisitor<T> visitor ) throws IOException {
    if ( ! visitor.visitPre( this ) ) return null;
    final T[] a = visitor.newArray( 1 );
    if ( a == null ) {
      if ( aligneeIterator.acceptOnTruePaths( visitor ) == null ) return null;
    }
    else {
      if ( ( a[ 0 ] = aligneeIterator.acceptOnTruePaths( visitor ) ) == null ) return null;
    }
    return visitor.visitPost( this, a );
  }
 
  /** An interval iterator returning the intersection of the component interval iterators. */
 
  private class AlignIntervalIterator extends AbstractIntervalIterator implements IntervalIterator {
    /** The interval iterator of the alignee iterator. */
    private IntervalIterator aligneeIntervalIterator;
    /** The interval iterator of the aligner iterator. */
    private IntervalIterator alignerIntervalIterator;
    /** Whether the scan is over. */
    private boolean endOfProcess;

    public void reset() throws IOException {
      next = null;
      endOfProcess = false;

      aligneeIntervalIterator = aligneeIterator.intervalIterator();
      alignerIntervalIterator = alignerIterator.intervalIterator();
      if ( aligneeIntervalIterator == IntervalIterators.TRUE || alignerIntervalIterator == IntervalIterators.TRUE ) {
        // If by any chance we meet a TRUE iterator we are just false
        endOfProcess = true;
        return;
      }
    }

    public void intervalTerms( final IntSet terms ) {
      aligneeIntervalIterator.intervalTerms( terms );
    }

    public Interval nextInterval() throws IOException {
      if ( next != null ) {
        final Interval result = next;
        next = null;
        return result;
      }

      if ( endOfProcess ) return null;
     
      Interval aligneeInterval = null, alignerInterval = null;
     
      aligneeInterval = aligneeIntervalIterator.nextInterval();
      alignerInterval = alignerIntervalIterator.nextInterval();
      if ( aligneeInterval == null || alignerInterval == null ) {
        endOfProcess = true;
        return null;
      }
     
      while ( ! aligneeInterval.equals( alignerInterval ) ) {
        if ( aligneeInterval.left <= alignerInterval.left ) {
          if ( ( aligneeInterval = aligneeIntervalIterator.nextInterval() ) == null ) {
            endOfProcess = true;
            return null;
          }
        }
        else {
          if ( ( alignerInterval = alignerIntervalIterator.nextInterval() ) == null ) {
            endOfProcess = true;
            return null;
          }
        }
      }

      return aligneeInterval;
    }
   
    public int extent() {
      return aligneeIntervalIterator.extent();
    }
  }


  /** An interval iterator returning the intersection of the component interval iterators. */
 
  private class AlignIndexIntervalIterator extends AbstractIntervalIterator implements IntervalIterator {
    /** Whether the scan is over. */
    private boolean endOfProcess;
    /** The positions of the alignee iterator. */
    private int[] aligneePosition;
    /** The positions of the aligner iterator. */
    private int[] alignerPosition;
    /** The count of the alignee iterator. */
    private int aligneeCount;
    /** The count of the aligner iterator. */
    private int alignerCount;
    /** The position of the alignee iterator. */
    private int aligneeCurr;
    /** The position of the aligner iterator. */
    private int alignerCurr;

    public void reset() throws IOException {
      next = null;
      endOfProcess = false;
     
      aligneePosition = aligneeIndexIterator.positionArray();
      alignerPosition = alignerIndexIterator.positionArray();
      aligneeCount = aligneeIndexIterator.count();
      alignerCount = alignerIndexIterator.count();
      aligneeCurr = alignerCurr = -1;
    }
   
    public void intervalTerms( final IntSet terms ) {
      terms.add( aligneeIndexIterator.termNumber() );
    }

    public Interval nextInterval() {
      if ( next != null ) {
        final Interval result = next;
        next = null;
        return result;
      }
     
      if ( endOfProcess ) return null;

      final int[] aligneePosition = this.aligneePosition, alignerPosition = this.alignerPosition;
     
      if ( ++aligneeCurr == aligneeCount || ++alignerCurr == alignerCount ) {
        endOfProcess = true;
        return null;
      }
     
      while ( aligneePosition[ aligneeCurr ] != alignerPosition[ alignerCurr ] ) {
        if ( aligneePosition[ aligneeCurr ] < alignerPosition[ alignerCurr ] ) {
          if ( ++aligneeCurr == aligneeCount ) {
            endOfProcess = true;
            return null;
          }
        }
        else {
          if ( ++alignerCurr == alignerCount ) {
            endOfProcess = true;
            return null;
          }
        }
      }

      return Interval.valueOf( alignerPosition[ alignerCurr ] );
    }
   
    public int extent() {
      return 1;
    }
  }
}
TOP

Related Classes of it.unimi.dsi.mg4j.search.AlignDocumentIterator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.
[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); ga('create', 'UA-20639858-1', 'auto'); ga('send', 'pageview');