Package it.unimi.dsi.mg4j.search

Source Code of it.unimi.dsi.mg4j.search.DocumentIteratorBuilderVisitor

package it.unimi.dsi.mg4j.search;

/*    
* MG4J: Managing Gigabytes for Java
*
* Copyright (C) 2006-2010 Sebastiano Vigna
*
*  This library is free software; you can redistribute it and/or modify it
*  under the terms of the GNU Lesser General Public License as published by the Free
*  Software Foundation; either version 3 of the License, or (at your option)
*  any later version.
*
*  This library is distributed in the hope that it will be useful, but
*  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
*  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
*  for more details.
*
*  You should have received a copy of the GNU Lesser General Public License
*  along with this program; if not, see <http://www.gnu.org/licenses/>.
*
*/

import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
import it.unimi.dsi.fastutil.objects.Object2ReferenceMap;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceArrayMap;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;
import it.unimi.dsi.mg4j.index.Index;
import it.unimi.dsi.mg4j.index.IndexIterator;
import it.unimi.dsi.mg4j.index.MultiTermIndexIterator;
import it.unimi.dsi.mg4j.index.TooManyTermsException;
import it.unimi.dsi.mg4j.index.payload.Payload;
import it.unimi.dsi.mg4j.query.nodes.AbstractQueryBuilderVisitor;
import it.unimi.dsi.mg4j.query.nodes.Align;
import it.unimi.dsi.mg4j.query.nodes.And;
import it.unimi.dsi.mg4j.query.nodes.Consecutive;
import it.unimi.dsi.mg4j.query.nodes.Difference;
import it.unimi.dsi.mg4j.query.nodes.False;
import it.unimi.dsi.mg4j.query.nodes.LowPass;
import it.unimi.dsi.mg4j.query.nodes.MultiTerm;
import it.unimi.dsi.mg4j.query.nodes.Not;
import it.unimi.dsi.mg4j.query.nodes.Or;
import it.unimi.dsi.mg4j.query.nodes.OrderedAnd;
import it.unimi.dsi.mg4j.query.nodes.Prefix;
import it.unimi.dsi.mg4j.query.nodes.QueryBuilderVisitorException;
import it.unimi.dsi.mg4j.query.nodes.Range;
import it.unimi.dsi.mg4j.query.nodes.Remap;
import it.unimi.dsi.mg4j.query.nodes.Select;
import it.unimi.dsi.mg4j.query.nodes.Term;
import it.unimi.dsi.mg4j.query.nodes.True;
import it.unimi.dsi.mg4j.query.nodes.Weight;

import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.Map;
import java.util.NoSuchElementException;

/** A {@link it.unimi.dsi.mg4j.query.nodes.QueryBuilderVisitor}
* that builds a {@link it.unimi.dsi.mg4j.search.DocumentIterator}
* resolving the queries using the objects in {@link it.unimi.dsi.mg4j.search}.
*
* <p>This elementary builder visitor invokes {@link it.unimi.dsi.mg4j.index.Index#documents(CharSequence)}
* to build the leaf {@linkplain it.unimi.dsi.mg4j.index.IndexIterator index iterators}. Thus, the
* resulting {@link it.unimi.dsi.mg4j.search.DocumentIterator} should be carefully
* {@linkplain it.unimi.dsi.mg4j.search.DocumentIterator#dispose() disposed} after usage (every
* index iterator may open a file or a socket).
*
* <p>{@link Prefix} and {@link MultiTerm} nodes cause the creation of a {@link MultiTermIndexIterator},
* in the first case by calling {@link it.unimi.dsi.mg4j.index.Index#documents(CharSequence,int)} and
* in the second case by creating a {@link MultiTermIndexIterator} with the name and frequency equal to the
* maximum frequency over all terms. Other implementations might choose differently.
*
* <p>At construction time, you must provide a map from strings to indices that will be used to resolve
* {@link it.unimi.dsi.mg4j.query.nodes.Select} nodes. The map may be <code>null</code>, in which case
* such nodes will cause an {@link java.lang.IllegalArgumentException}.
* If a {@link it.unimi.dsi.mg4j.query.nodes.Select}
* node contains an index name that does not appear in the map a {@link NoSuchElementException}
* will be thrown instead.
*
* <p>A production site will likely substitute this builder visitor with one that reuses
* {@linkplain it.unimi.dsi.mg4j.index.IndexReader index readers} out of a pool.
*
* <p>Instances of this class may be safely reused by calling {@link #prepare()}.
*/

public class DocumentIteratorBuilderVisitor extends AbstractQueryBuilderVisitor<DocumentIterator> {
 
  /** A map associating a textual key to indices. */
  private final Object2ReferenceMap<String, Index> indexMap;
  /** A map associating an object with a <code>parse(String)</code> method to each payload-based index. */
  private final Reference2ReferenceMap<Index, Object> index2Parser;
  /** The default index. */
  private final Index defaultIndex;
  /** The number of documents (fetched from the default index). */
  private final int numberOfDocuments;
  /** The limit on prefix queries provided in the constructor. */
  private final int limit;
  /** The stack of selected indices (changed by {@link Select} nodes). */
  private ObjectArrayList<Index> curr;
  /** The stack of weights. */
  private DoubleArrayList weights;
  /** The last seen, but still not consumed, weight, or {@link Double#NaN}. */
  private double weight;
 
  /** Creates a new builder visitor.
   *
   * @param indexMap a map from index names to indices, to be used in {@link Select} nodes, or <code>null</code>
   * if the only used index is the default index.
   * @param defaultIndex the default index.
   * @param limit a limit that will be used with {@link Prefix} nodes.
   */
  @SuppressWarnings("unchecked")
  public DocumentIteratorBuilderVisitor( final Object2ReferenceMap<String,Index> indexMap, final Index defaultIndex, final int limit ) {
    this( indexMap, Reference2ReferenceMaps.EMPTY_MAP, defaultIndex, limit );
  }
 
  /** Creates a new builder visitor with additional parsers for payload-based indices.
   *
   * @param indexMap a map from index names to indices, to be used in {@link Select} nodes, or <code>null</code>
   * if the only used index is the default index.
   * @param defaultIndex the default index.
   * @param limit a limit that will be used with {@link Prefix} nodes.
   */
  public DocumentIteratorBuilderVisitor( final Object2ReferenceMap<String,Index> indexMap, final Reference2ReferenceMap<Index,Object> index2Parser, final Index defaultIndex, final int limit ) {
    this.indexMap = indexMap;
    this.defaultIndex = defaultIndex;
    this.index2Parser = index2Parser;
    this.limit = limit;
    weights = new DoubleArrayList();
    weight = Double.NaN;
    curr = new ObjectArrayList<Index>();
    curr.push( defaultIndex );
    this.numberOfDocuments = defaultIndex.numberOfDocuments;
  }
 
  /** Pushes {@link #weight}, if it is not {@link Double#NaN}, or 1, otherwise, on the {@linkplain #weights stack of weights}; in either case, sets {@link #weight} to {@link Double#NaN}.
   */

  private void pushWeight() {
    weights.push( Double.isNaN( weight ) ? 1 : weight );
    weight = Double.NaN;
  }

  /** Returns {@link #weight}, if it is not {@link Double#NaN}, or 1, otherwise; in either case, sets {@link #weight} to {@link Double#NaN}.
   *
   * @return {@link #weight}, if it is not {@link Double#NaN}, or 1, otherwise.
   */

  private double weight() {
    final double result = Double.isNaN( weight ) ? 1 : weight;
    weight = Double.NaN;
    return result;
  }

  public DocumentIteratorBuilderVisitor copy() {
    return new DocumentIteratorBuilderVisitor( indexMap, defaultIndex, limit );
  }
 
  public DocumentIteratorBuilderVisitor prepare() {
    curr.size( 1 );
    weights.size( 0 );
    weight = Double.NaN;
    return this;
  }

  public DocumentIterator[] newArray( final int len ) { return new DocumentIterator[ len ]; }

  public DocumentIterator visit( final Term node ) throws QueryBuilderVisitorException {
    try {
      if ( node.termNumber != -1 ) return curr.top().documents( node.termNumber ).weight( weight() );
      return curr.top().documents( node.term ).weight( weight() );
    }
    catch ( IOException e ) {
      throw new QueryBuilderVisitorException( e );
    }
  }
 
  public DocumentIterator visit( final Prefix node ) throws QueryBuilderVisitorException {
    try {
      return curr.top().documents( node.prefix, limit ).weight( weight() );
    }
    catch ( IOException e ) {
      throw new QueryBuilderVisitorException( e );
    }
    catch ( TooManyTermsException e ) {
      throw new QueryBuilderVisitorException( e );
    }
  }
   
  public DocumentIterator visit( Range node ) throws QueryBuilderVisitorException {
    final Index index = curr.top();
    if ( ! index.hasPayloads ) throw new IllegalStateException( "Index " + index + " does not have payloads" );
    try {
      final Object parser = index2Parser.containsKey( index ) ? index2Parser.get( index ) : index.payload;
      final Method method = parser.getClass().getMethod( "parse", String.class );
      final Payload left = index.payload.copy(), right = index.payload.copy();
      if ( node.left != null ) left.set( method.invoke( parser, node.left.toString() ) );
      if ( node.right != null ) right.set( method.invoke( parser, node.right.toString() ) );
      return PayloadPredicateDocumentIterator.getInstance( index.documents( 0 ),
          index.payload.rangeFilter( node.left == null ? null : left, node.right == null ? null : right ) ).weight( weight() );
    }
    catch( InvocationTargetException e ) {
      throw new QueryBuilderVisitorException( e.getCause() );
    }
    catch ( Exception e ) {
      throw new QueryBuilderVisitorException( e );
    }
  }
 
  public boolean visitPre( final And node ) throws QueryBuilderVisitorException {
    pushWeight();
    return true;
  }
 
  public DocumentIterator visitPost( final And node, final DocumentIterator[] subNode ) throws QueryBuilderVisitorException {
    try {
      return AndDocumentIterator.getInstance( curr.top(), subNode ).weight( weights.popDouble() );
    }
    catch ( IOException e ) {
      throw new QueryBuilderVisitorException( e );
    }
  }

  public boolean visitPre( final Consecutive node ) throws QueryBuilderVisitorException {
    pushWeight();
    return true;
  }
 
  public DocumentIterator visitPost( final Consecutive node, final DocumentIterator[] subNode ) throws QueryBuilderVisitorException {
    try {
      return ConsecutiveDocumentIterator.getInstance( subNode, node.gap ).weight( weights.popDouble() );
    }
    catch ( IOException e ) {
      throw new QueryBuilderVisitorException( e );
    }
  }

  public boolean visitPre( final LowPass node ) throws QueryBuilderVisitorException {
    pushWeight();
    return true;
  }
 
  public DocumentIterator visitPost( final LowPass node, final DocumentIterator subNode ) {
    return LowPassDocumentIterator.getInstance( subNode, node.k ).weight( weights.popDouble() );
  }

  public boolean visitPre( final Not node ) throws QueryBuilderVisitorException {
    pushWeight();
    return true;
  }
 

  public DocumentIterator visitPost( final Not node, final DocumentIterator subNode ) throws QueryBuilderVisitorException {
    try {
      return NotDocumentIterator.getInstance( subNode, numberOfDocuments ).weight( weights.popDouble() );
    }
    catch ( IOException e ) {
      throw new QueryBuilderVisitorException( e );
    }
  }

  public boolean visitPre( final Or node ) throws QueryBuilderVisitorException {
    pushWeight();
    return true;
  }
 
  public DocumentIterator visitPost( final Or node, final DocumentIterator[] subNode ) throws QueryBuilderVisitorException {
    try {
      return OrDocumentIterator.getInstance( subNode ).weight( weights.popDouble() );
    }
    catch ( IOException e ) {
      throw new QueryBuilderVisitorException( e );
    }
  }
 
  public boolean visitPre( final OrderedAnd node ) throws QueryBuilderVisitorException {
    pushWeight();
    return true;
  }
 
  public DocumentIterator visitPost( final OrderedAnd node, final DocumentIterator[] subNode ) throws QueryBuilderVisitorException {
    try {
      return OrderedAndDocumentIterator.getInstance( curr.top(), subNode ).weight( weights.popDouble() );
    }
    catch ( IOException e ) {
      throw new QueryBuilderVisitorException( e );
    }
  }
 
  public boolean visitPre( final Align node ) throws QueryBuilderVisitorException {
    pushWeight();
    return true;
  }

  public DocumentIterator visitPost( final Align node, final DocumentIterator[] subNode ) throws QueryBuilderVisitorException {
    return AlignDocumentIterator.getInstance( subNode[ 0 ], subNode[ 1 ] ).weight( weights.popDouble() );
  }

  public boolean visitPre( final Difference node ) throws QueryBuilderVisitorException {
    pushWeight();
    return true;
  }

  public DocumentIterator visitPost( final Difference node, final DocumentIterator[] subNode ) {
    return DifferenceDocumentIterator.getInstance( subNode[ 0 ], subNode[ 1 ], node.leftMargin, node.rightMargin ).weight( weights.popDouble() );
  }

  public boolean visitPre( final MultiTerm node ) throws QueryBuilderVisitorException {
    pushWeight();
    return true;
  }
 
  public DocumentIterator visitPost( final MultiTerm node, final DocumentIterator subNode[] ) throws QueryBuilderVisitorException {
    final IndexIterator[] indexIterator = new IndexIterator[ subNode.length ];
    System.arraycopy( subNode, 0, indexIterator, 0, indexIterator.length );
    IndexIterator result;
    try {
      result = MultiTermIndexIterator.getInstance( curr.top(), indexIterator ).weight( weights.popDouble() );
    }
    catch ( IOException e ) {
      throw new QueryBuilderVisitorException( e );
    }
    result.term( node.toString() );
    return result;
  }

  public boolean visitPre( final Select node ) throws QueryBuilderVisitorException {
    if ( indexMap == null ) throw new IllegalArgumentException( "You cannot use Select nodes without an index map" );
    final Index index = indexMap.get( node.index.toString() );
    if ( index == null ) throw new NoSuchElementException( "The selected index (" + node.index + ")" + " does not appear in the index map (" + indexMap + ")" );
    curr.push( indexMap.get( node.index.toString() ) );
    return true;
  }

  public DocumentIterator visitPost( final Select node, final DocumentIterator subNode ) {
    curr.pop();
    return subNode;
  }

  public boolean visitPre( final Remap node ) throws QueryBuilderVisitorException {
    pushWeight();
    return true;
  }

  public DocumentIterator visitPost( final Remap node, final DocumentIterator subNode ) {
    if ( indexMap == null ) throw new IllegalArgumentException( "You cannot use Remap nodes without an index map" );
    final Reference2ReferenceArrayMap<Index, Index> indexInverseRemapping = new Reference2ReferenceArrayMap<Index, Index>( node.indexInverseRemapping.size() );
    for( Map.Entry<String,String> e: node.indexInverseRemapping.entrySet() ) {
      final Index externalIndex = indexMap.get( e.getKey() );
      final Index internalIndex = indexMap.get( e.getValue() );
      if ( internalIndex == null ) throw new NoSuchElementException( "The internal index \"" + e.getValue() + "\" does not appear in the index map (" + indexMap + ")" );
      if ( externalIndex == null ) throw new NoSuchElementException( "The external index \"" + e.getKey() + "\" does not appear in the index map (" + indexMap + ")" );
      indexInverseRemapping.put( externalIndex, internalIndex );
    }
    return new RemappingDocumentIterator( subNode, indexInverseRemapping );
  }

  public boolean visitPre( final Weight node ) throws QueryBuilderVisitorException {
    weight = node.weight;
    return true;
  }

  public DocumentIterator visitPost( final Weight node, final DocumentIterator subNode ) {
    return subNode;
  }

  public DocumentIterator visit( True node ) throws QueryBuilderVisitorException {
    return TrueDocumentIterator.getInstance( curr.top() );
  }

  public DocumentIterator visit( False node ) throws QueryBuilderVisitorException {
    return FalseDocumentIterator.getInstance( curr.top() );
  }
}
TOP

Related Classes of it.unimi.dsi.mg4j.search.DocumentIteratorBuilderVisitor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.