Package it.unimi.dsi.mg4j.index

Source Code of it.unimi.dsi.mg4j.index.DiskBasedIndex$GammaCodedIterableList

package it.unimi.dsi.mg4j.index;

/*    
* MG4J: Managing Gigabytes for Java
*
* Copyright (C) 2004-2010 Sebastiano Vigna
*
*  This library is free software; you can redistribute it and/or modify it
*  under the terms of the GNU Lesser General Public License as published by the Free
*  Software Foundation; either version 3 of the License, or (at your option)
*  any later version.
*
*  This library is distributed in the hope that it will be useful, but
*  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
*  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
*  for more details.
*
*  You should have received a copy of the GNU Lesser General Public License
*  along with this program; if not, see <http://www.gnu.org/licenses/>.
*
*/

import it.unimi.dsi.Util;
import it.unimi.dsi.fastutil.ints.AbstractIntIterator;
import it.unimi.dsi.fastutil.ints.AbstractIntList;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntIterable;
import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.fastutil.ints.IntList;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.longs.LongArrayList;
import it.unimi.dsi.fastutil.longs.LongList;
import it.unimi.dsi.fastutil.longs.LongLists;
import it.unimi.dsi.io.ByteBufferInputStream;
import it.unimi.dsi.io.InputBitStream;
import it.unimi.dsi.mg4j.index.CompressionFlags.Coding;
import it.unimi.dsi.mg4j.index.CompressionFlags.Component;
import it.unimi.dsi.mg4j.index.Index.UriKeys;
import it.unimi.dsi.mg4j.index.payload.Payload;
import it.unimi.dsi.mg4j.util.SemiExternalOffsetList;
import it.unimi.dsi.sux4j.util.EliasFanoLongBigList;
import it.unimi.dsi.util.PrefixMap;
import it.unimi.dsi.util.Properties;
import it.unimi.dsi.util.StringMap;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.channels.FileChannel.MapMode;
import java.util.EnumMap;
import java.util.Map;
import java.util.NoSuchElementException;

import org.apache.commons.configuration.ConfigurationException;
import org.apache.log4j.Logger;


/** A static container providing facilities to load an index based on data stored on disk.
*
* <P>This class contains several useful static methods
* such as {@link #readOffsets(InputBitStream, int)} and {@link #readSizes(CharSequence, int)},
* and static factor methods such as {@link #getInstance(CharSequence, boolean, boolean, boolean, EnumMap)}
* that take care of reading the properties associated to the index, identify
* the correct {@link it.unimi.dsi.mg4j.index.Index} implementation that
* should be used to load the index, and load the necessary data into memory.
*
* <p>As an option, a disk-based index can be <em>loaded</em> into main memory (key: {@link Index.UriKeys#INMEMORY}), returning
* an {@link it.unimi.dsi.mg4j.index.InMemoryIndex}/{@link InMemoryHPIndex}, or <em>mapped</em> into main memory (key: {@link Index.UriKeys#MAPPED}),
* returning a {@link MemoryMappedIndex}/{@link InMemoryHPIndex} (note that the value assigned to the keys is irrelevant).
* In both cases some insurmountable Java problems
* prevents using indices whose size exceeds two gigabytes (but see {@link MemoryMappedIndex} for
* some elaboration on this topic).
* <p>Moreover, by default the
* term-offset list is accessed using a {@link it.unimi.dsi.mg4j.util.SemiExternalOffsetList}
* with a step of {@link #DEFAULT_OFFSET_STEP}. This behaviour can be changed using
* the URI key {@link UriKeys#OFFSETSTEP}.
*
* <p>Disk-based indices are the workhorse of MG4J. All other indices (clustered,
* remote, etc.) ultimately rely on disk-based indices to provide results.
*
* <p>Note that not all data produced by {@link it.unimi.dsi.mg4j.tool.Scan} and
* by the other indexing utilities are actually necessary to run a disk-based
* index. Usually the property file and the index file (plus the positions file,
* for {@linkplain BitStreamHPIndex high-performance indices}) are sufficient: if one
* needs random access, also the offsets file must be present, and if the
* compression method requires document sizes or if sizes are requested explicitly,
* also the sizes file must be present. A {@link StringMap}
* and possibly a {@link PrefixMap} will be fetched
* automatically by {@link #getInstance(CharSequence, boolean, boolean)}
* using standard extensions.
*
* <h2>Thread safety</h2>
*
* <p>A disk-based index is thread safe as long as the offset list, the size list and
* the term/prefix map are. The static factory methods provided by this class load
* offsets and sizes using data structures that are thread safe. If you use directly
* a constructor, instead, it is your responsibility to pass thread-safe data structures.
*
* @author Sebastiano Vigna
* @since 1.1
*/

public class DiskBasedIndex {
  private static final Logger LOGGER = Util.getLogger( DiskBasedIndex.class );
  private static final long serialVersionUID = 0;

  /** The default value for the query parameter {@link Index.UriKeys#OFFSETSTEP}. */
  public final static int DEFAULT_OFFSET_STEP = 256;

  /** Standard extension for the index bitstream. */
  public static final String INDEX_EXTENSION = ".index";
  /** Standard extension for the positions bitstream of a {@linkplain BitStreamHPIndexWriter high-performance index}. */
  public static final String POSITIONS_EXTENSION = ".positions";
  /** Standard extension for the index properties. */
  public static final String PROPERTIES_EXTENSION = ".properties";
  /** Standard extension for the file of sizes. */
  public static final String SIZES_EXTENSION = ".sizes";
  /** Standard extension for the file of offsets. */
  public static final String OFFSETS_EXTENSION = ".offsets";
  /** Standard extension for the file of lengths of positions. */
  public static final String POSITIONS_NUMBER_OF_BITS_EXTENSION = ".posnumbits";
  /** Standard extension for the file of global counts. */
  public static final String GLOBCOUNTS_EXTENSION = ".globcounts";
  /** Standard extension for the file of frequencies. */
  public static final String FREQUENCIES_EXTENSION = ".frequencies";
  /** Standard extension for the file of terms. */
  public static final String TERMS_EXTENSION = ".terms";
  /** Standard extension for the file of terms, unsorted. */
  public static final String UNSORTED_TERMS_EXTENSION = ".terms.unsorted";
  /** Standard extension for the term map. */
  public static final String TERMMAP_EXTENSION = ".termmap";
  /** Standard extension for the prefix map. */
  public static final String PREFIXMAP_EXTENSION = ".prefixmap";
  /** Standard extension for the stats file. */
  public static final String STATS_EXTENSION = ".stats";
 
  private DiskBasedIndex() {}
 
  /** Utility method to load a compressed offset file into a list.
   *
   * @param in the input bit stream providing the offsets (see {@link BitStreamIndexWriter}).
   * @param T the number of terms indexed.
   * @return a list of longs backed by an array; the list has
   * an additional final element of index <code>T</code> that gives the number
   * of bytes of the index file.
   */

  public static LongList readOffsets( final InputBitStream in, final int T ) throws IOException {
    final long[] offset = new long[ T + 1 ];
    LOGGER.debug( "Loading offsets..." );
    offset[ 0 ] = in.readLongGamma();
    for( int i = 0; i < T; i++ ) offset[ i + 1 ] = in.readLongGamma() + offset[ i ];
    LOGGER.debug( "Completed." );
    return LongArrayList.wrap( offset );
  }

  /** Utility method to load a compressed offset file into a list.
   *
   * @param filename the file containing the offsets (see {@link BitStreamIndexWriter}).
   * @param T the number of terms indexed.
   * @return a list of longs backed by an array; the list has
   * an additional final element of index <code>T</code> that gives the number
   * of bytes of the index file.
   */

  public static LongList readOffsets( final CharSequence filename, final int T ) throws IOException {
    final InputBitStream in = new InputBitStream( filename.toString() );
    final long[] offset = new long[ T + 1 ];
    LOGGER.debug( "Loading offsets..." );
    offset[ 0 ] = in.readLongGamma();
    for( int i = 0; i < T; i++ ) offset[ i + 1 ] = in.readLongGamma() + offset[ i ];
    LOGGER.debug( "Completed." );
    in.close();
    return LongArrayList.wrap( offset );
  }

  /** Utility method to load a compressed size file into a list.
   *
   * @param filename the file containing the &gamma;-coded sizes (see {@link BitStreamIndexWriter}).
   * @param N the number of documents.
   * @return a list of integers backed by an array.
   */

  public static IntList readSizes( final CharSequence filename, final int N ) throws IOException {
    final int[] size = new int[ N ];
    final InputBitStream in = new InputBitStream( filename.toString() );
    LOGGER.debug( "Loading sizes..." );
    in.readGammas( size, N );     
    LOGGER.debug( "Completed." );
    in.close();
    return IntArrayList.wrap( size );
  }

  /** Utility method to load a compressed size file into an {@linkplain EliasFanoLongBigList Elias&ndash;Fano compressed list}.
   *
   * @param filename the filename containing the &gamma;-coded sizes (see {@link BitStreamIndexWriter}).
   * @param N the number of documents indexed.
   * @return a list of integers backed by an {@linkplain EliasFanoLongBigList Elias&ndash;Fano compressed list}.
   * @throws IOException
   */

  public static IntList readSizesSuccinct( final CharSequence filename, final int N ) throws IOException {
    LOGGER.debug( "Loading sizes..." );
    final IntList sizes = new AbstractIntList() {
      final EliasFanoLongBigList list = new EliasFanoLongBigList( new GammaCodedIterableList( BinIO.loadBytes( filename ), N ) );

      public int getInt( int index ) {
        return (int)list.getLong( index );
      }

      public int size() {
        return list.size();
      }
    };
    LOGGER.debug( "Completed." );
    return sizes;
  }

  // TODO: replace this with a general-purpose class
  private static class GammaCodedIterableList implements IntIterable {
    private final int n;
    private final byte[] array;

    public GammaCodedIterableList( final byte []array, final int n ) {
      this.array = array;
      this.n = n;
    }

    public IntIterator iterator() {
      return new AbstractIntIterator() {
        final InputBitStream ibs = new InputBitStream( array );
        int pos;
       
        public boolean hasNext() {
          return pos < n;
        }
       
        public int nextInt() {
          if ( ! hasNext() ) throw new NoSuchElementException();
          pos++;
          try {
            return ibs.readGamma();
          }
          catch ( IOException e ) {
            throw new RuntimeException( e );
          }
        }
      };
    }
  }
 
 
  /** Utility static method that loads a term map.
   *
   * @param filename the name of the file containing the term map.
   * @return the map, or <code>null</code> if the file did not exist.
   * @throws IOException if some IOException (other than {@link FileNotFoundException}) occurred.
   */
  @SuppressWarnings("unchecked")
  public static StringMap<? extends CharSequence> loadStringMap( final String filename ) throws IOException {
    try {
      return (StringMap<? extends CharSequence>) BinIO.loadObject( filename );
    } catch ( FileNotFoundException e ) {
      return null;
    } catch ( ClassNotFoundException e ) {
      throw new RuntimeException( e );
    }
  }

  /** Utility static method that loads a prefix map.
   *
   * @param filename the name of the file containing the prefix map.
   * @return the map, or <code>null</code> if the file did not exist.
   * @throws IOException if some IOException (other than {@link FileNotFoundException}) occurred.
   */
  @SuppressWarnings("unchecked")
  public static PrefixMap<? extends CharSequence> loadPrefixMap( final String filename ) throws IOException {
    try {
      return  (PrefixMap<? extends CharSequence>) BinIO.loadObject( filename );
    } catch ( FileNotFoundException e ) {
      return null;
    } catch ( ClassNotFoundException e ) {
      throw new RuntimeException( e );
    }
  }

  /** Returns a new disk-based index, loading exactly the specified parts and using preloaded {@link Properties}.
   *
   * @param basename the basename of the index.
   * @param properties the properties obtained from the given basename.
   * @param termMap the term map for this index, or <code>null</code> for no term map.
   * @param prefixMap the prefix map for this index, or <code>null</code> for no prefix map.
   * @param randomAccess whether the index should be accessible randomly (e.g., if it will
   * be possible to call {@link IndexReader#documents(int)} on the index readers returned by the index).
   * @param documentSizes if true, document sizes will be loaded (note that sometimes document sizes
   * might be loaded anyway because the compression method for positions requires it).
   * @param queryProperties a map containing associations between {@link Index.UriKeys} and values, or <code>null</code>.
   */
  public static BitStreamIndex getInstance( final CharSequence basename, Properties properties, final StringMap<? extends CharSequence> termMap, final PrefixMap<? extends CharSequence> prefixMap, final boolean randomAccess, final boolean documentSizes, final EnumMap<UriKeys,String> queryProperties ) throws ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {

    // This could be null if old indices contain SkipIndex
    Class<?> indexClass = null;
    try {
      indexClass = Class.forName( properties.getString( Index.PropertyKeys.INDEXCLASS, "(missing index class)" ));
    }
    catch( Exception ignore ) {}

    File indexFile = new File( basename + INDEX_EXTENSION );
    if ( ! indexFile.exists() ) throw new FileNotFoundException( "Cannot find index file " + indexFile.getName() );
   
    final Map<Component,Coding> flags = CompressionFlags.valueOf( properties.getStringArray( Index.PropertyKeys.CODING ), null );

    final int numberOfDocuments = properties.getInt( Index.PropertyKeys.DOCUMENTS );
    final int numberOfTerms = properties.getInt( Index.PropertyKeys.TERMS );
    final long numberOfPostings= properties.getLong( Index.PropertyKeys.POSTINGS );
    final long numberOfOccurrences = properties.getLong( Index.PropertyKeys.OCCURRENCES, -1 );
    final int maxCount = properties.getInt( Index.PropertyKeys.MAXCOUNT, -1 );
    final String field = properties.getString( Index.PropertyKeys.FIELD, new File( basename.toString() ).getName() );

    if ( termMap != null && termMap.size() != numberOfTerms ) throw new IllegalArgumentException( "The size of the term map (" + termMap.size() + ") is not equal to the number of terms (" + numberOfTerms + ")" );
    if ( prefixMap != null && prefixMap.size() != numberOfTerms ) throw new IllegalArgumentException( "The size of the prefix map (" + prefixMap.size() + ") is not equal to the number of terms (" + numberOfTerms + ")" );

    final Payload payload = (Payload)( properties.containsKey( Index.PropertyKeys.PAYLOADCLASS ) ? Class.forName( properties.getString( Index.PropertyKeys.PAYLOADCLASS ) ).newInstance() : null );
    final Coding frequencyCoding = flags.get( Component.FREQUENCIES );
    final Coding pointerCoding = flags.get( Component.POINTERS );
    final Coding countCoding = flags.get( Component.COUNTS );
    final Coding positionCoding = flags.get( Component.POSITIONS );
   
    if ( countCoding == null && positionCoding != null ) throw new IllegalArgumentException( "Index " + basename + " has positions but no counts (this can't happen)" );
   
    // Load document sizes if forced to do so, or if the pointer/position compression methods make it necessary.
    IntList sizes = null;
    // TODO: quick patch to avoid loading sizes in case of payloads.
    if ( payload == null && ( documentSizes || positionCoding == Coding.GOLOMB || positionCoding == Coding.INTERPOLATIVE ) ) {
      sizes = queryProperties != null && queryProperties.containsKey( UriKeys.SUCCINCTSIZES ) ? readSizesSuccinct( basename + DiskBasedIndex.SIZES_EXTENSION, numberOfDocuments ) : readSizes( basename + DiskBasedIndex.SIZES_EXTENSION, numberOfDocuments );
      if ( sizes.size() != numberOfDocuments ) throw new IllegalStateException( "The length of the size list (" + sizes.size() + ") is not equal to the number of documents (" + numberOfDocuments + ")" );
    }
   
    // Load offsets if forced to do so. Depending on a property, we use the core-memory or the semi-external version.
    final LongList offsets;
    // TODO: quick patch to avoid loading sizes in case of payloads.
    if ( payload == null && randomAccess ) {
      int offsetStep = queryProperties != null && queryProperties.get( UriKeys.OFFSETSTEP ) != null ? Integer.parseInt( queryProperties.get( UriKeys.OFFSETSTEP ) ) : DEFAULT_OFFSET_STEP;
     
      if ( offsetStep < 0 ) { // Memory-mapped
        offsetStep  = -offsetStep;
        offsets = LongLists.synchronize( new SemiExternalOffsetList(
            new InputBitStream( ByteBufferInputStream.map( new FileInputStream( basename + DiskBasedIndex.OFFSETS_EXTENSION ).getChannel(), MapMode.READ_ONLY ) ),
            offsetStep, numberOfTerms + 1 ) );
      }
      else {
        offsets = offsetStep == 0?
            DiskBasedIndex.readOffsets( basename + DiskBasedIndex.OFFSETS_EXTENSION, numberOfTerms ) :
              LongLists.synchronize( new SemiExternalOffsetList( new InputBitStream( basename + DiskBasedIndex.OFFSETS_EXTENSION, 1024 ), offsetStep, numberOfTerms + 1 ) );
      }
      if ( offsets.size() != numberOfTerms + 1 ) throw new IllegalStateException( "The length of the offset list (" + offsets.size() + ") is not equal to the number of terms plus one (" + numberOfTerms + " + 1)" );
    }
    else offsets = null;
   
   
    final int quantum = properties.getInt( BitStreamIndex.PropertyKeys.SKIPQUANTUM, -1 );
    final int height = properties.getInt( BitStreamIndex.PropertyKeys.SKIPHEIGHT, -1 );
    final int bufferSize = properties.getInt( BitStreamIndex.PropertyKeys.BUFFERSIZE, BitStreamIndex.DEFAULT_BUFFER_SIZE );

    final TermProcessor termProcessor = Index.getTermProcessor( properties );
    final boolean highPerformance = indexClass != null && FileHPIndex.class.isAssignableFrom( indexClass );
   
    if ( queryProperties != null && queryProperties.containsKey( UriKeys.INMEMORY ) ) {
      /*if ( SqrtSkipIndex.class.isAssignableFrom( indexClass ) )
        return new SqrtSkipInMemoryIndex( BinIO.loadBytes( indexFile.toString() ),
            numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount,
            frequencyCoding, pointerCoding, countCoding, positionCoding,
            termProcessor,
            field, properties, termMap, prefixMap, sizes, offsets );*/
      return highPerformance
      ? new InMemoryHPIndex( BinIO.loadBytes( indexFile.toString() ), BinIO.loadBytes( basename + POSITIONS_EXTENSION ),
          numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount,
          payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height,
          termProcessor,
          field, properties, termMap, prefixMap, sizes, offsets )
      : new InMemoryIndex( BinIO.loadBytes( indexFile.toString() ),
          numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount,
          payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height,
          termProcessor,
          field, properties, termMap, prefixMap, sizes, offsets );
    }
    else if ( queryProperties != null && queryProperties.containsKey( UriKeys.MAPPED ) ) {
      final File positionsFile = new File( basename + POSITIONS_EXTENSION );
      final ByteBufferInputStream index = ByteBufferInputStream.map( new FileInputStream( indexFile ).getChannel(), MapMode.READ_ONLY );
      return highPerformance
          ? new MemoryMappedHPIndex( index, ByteBufferInputStream.map( new FileInputStream( positionsFile ).getChannel(), MapMode.READ_ONLY ),
          numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount,
          payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height,
          termProcessor,
          field, properties, termMap, prefixMap, sizes, offsets )
          : new MemoryMappedIndex( index,
              numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount,
              payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height,
              termProcessor,
              field, properties, termMap, prefixMap, sizes, offsets );
     
    }
    /*if ( SqrtSkipIndex.class.isAssignableFrom( indexClass ) )
      return new SqrtSkipFileIndex( basename.toString(),
        numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount,
        frequencyCoding, pointerCoding, countCoding, positionCoding,
        termProcessor,
        field, properties, termMap, prefixMap, sizes, offsets, indexFile );*/
   
    return highPerformance 
        ? new FileHPIndex( basename.toString(),
            numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount,
            payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height, bufferSize,
            termProcessor,
            field, properties, termMap, prefixMap, sizes, offsets )
        : new FileIndex( basename.toString(),
        numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount,
        payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height, bufferSize,
        termProcessor,
        field, properties, termMap, prefixMap, sizes, offsets );
    
  }

  /** Returns a new disk-based index, using preloaded {@link Properties} and possibly guessing reasonable term and prefix maps from the basename.
   *
   * @param basename the basename of the index.
   * @param properties the properties obtained by stemming <code>basename</code>.
   * @param randomAccess whether the index should be accessible randomly.
   * @param documentSizes if true, document sizes will be loaded.
   * @param maps if true, {@linkplain StringMap term} and {@linkplain PrefixMap prefix} maps will be guessed and loaded.
   * @param queryProperties a map containing associations between {@link Index.UriKeys} and values, or <code>null</code>.
   * @throws IllegalAccessException
   * @throws InstantiationException
   *
   * @see #getInstance(CharSequence, Properties, StringMap, PrefixMap, boolean, boolean, EnumMap)
   */
  public static BitStreamIndex getInstance( final CharSequence basename, final Properties properties, final boolean randomAccess, final boolean documentSizes, final boolean maps, final EnumMap<UriKeys,String> queryProperties ) throws ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {
    StringMap<? extends CharSequence> termMap = null;
    PrefixMap<? extends CharSequence> prefixMap = null;
    if ( maps ) {
      // TODO: check this logic
      termMap = DiskBasedIndex.loadStringMap( basename + DiskBasedIndex.TERMMAP_EXTENSION );
      if ( termMap != null && termMap instanceof PrefixMap ) return getInstance( basename, properties, termMap, (PrefixMap<?>)termMap, randomAccess, documentSizes, queryProperties );
      prefixMap = DiskBasedIndex.loadPrefixMap( basename + DiskBasedIndex.PREFIXMAP_EXTENSION );
      if ( termMap != null ) return getInstance( basename, properties, termMap, prefixMap, randomAccess, documentSizes, queryProperties );
      if ( prefixMap != null ) return getInstance( basename, properties, prefixMap, prefixMap, randomAccess, documentSizes, queryProperties );
    }
    return getInstance( basename, properties, null, prefixMap, randomAccess, documentSizes, queryProperties );
  }


  /** Returns a new disk-based index, possibly guessing reasonable term and prefix maps from the basename.
   *
   * <p>If there is a term map file (basename stemmed with <samp>.termmap</samp>), it is used as term map and,
   * in case it implements {@link PrefixMap}. Otherwise, we search for a prefix map (basename stemmed with <samp>.prefixmap</samp>)
   * and, if it implements {@link StringMap} and no term map has been found, we use it as prefix map.
   *
   * @param basename the basename of the index.
   * @param randomAccess whether the index should be accessible randomly (e.g., if it will
   * be possible to call {@link IndexReader#documents(int)} on the index readers returned by the index).
   * @param documentSizes if true, document sizes will be loaded (note that sometimes document sizes
   * might be loaded anyway because the compression method for positions requires it).
   * @param maps if true, {@linkplain StringMap term} and {@linkplain PrefixMap prefix} maps will be guessed and loaded (this
   * feature might not be available with some kind of index).
   * @param queryProperties a map containing associations between {@link Index.UriKeys} and values, or <code>null</code>.
   */
  public static BitStreamIndex getInstance( final CharSequence basename, final boolean randomAccess, final boolean documentSizes, final boolean maps, final EnumMap<UriKeys,String> queryProperties ) throws ConfigurationException, ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {
    return getInstance( basename, new Properties( basename + DiskBasedIndex.PROPERTIES_EXTENSION ), randomAccess, documentSizes, maps, queryProperties );
  }


  /** Returns a new disk-based index, using preloaded {@link Properties} and possibly guessing reasonable term and prefix maps from the basename.
   *
   * <p>If there is a term map file (basename stemmed with <samp>.termmap</samp>), it is used as term map and,
   * in case it implements {@link PrefixMap}. Otherwise, we search for a prefix map (basename stemmed with <samp>.prefixmap</samp>)
   * and, if it implements {@link StringMap} and no term map has been found, we use it as prefix map.
   *
   * @param basename the basename of the index.
   * @param randomAccess whether the index should be accessible randomly (e.g., if it will
   * be possible to call {@link IndexReader#documents(int)} on the index readers returned by the index).
   * @param documentSizes if true, document sizes will be loaded (note that sometimes document sizes
   * might be loaded anyway because the compression method for positions requires it).
   * @param maps if true, {@linkplain StringMap term} and {@linkplain PrefixMap prefix} maps will be guessed and loaded (this
   * feature might not be available with some kind of index).
   * @see #getInstance(CharSequence, boolean, boolean, boolean, EnumMap)
   */
  public static BitStreamIndex getInstance( final CharSequence basename, final boolean randomAccess, final boolean documentSizes, final boolean maps ) throws ConfigurationException, ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {
    return getInstance( basename, new Properties( basename + DiskBasedIndex.PROPERTIES_EXTENSION ), randomAccess, documentSizes, maps, null );
  }

 
  /** Returns a new disk-based index, guessing reasonable term and prefix maps from the basename.
   *
   * @param basename the basename of the index.
   * @param randomAccess whether the index should be accessible randomly (e.g., if it will
   * be possible to call {@link IndexReader#documents(int)} on the index readers returned by the index).
   * @param documentSizes if true, document sizes will be loaded (note that sometimes document sizes
   * might be loaded anyway because the compression method for positions requires it).
   */
  public static BitStreamIndex getInstance( final CharSequence basename, final boolean randomAccess, final boolean documentSizes ) throws ConfigurationException, ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {
    return getInstance( basename, randomAccess, documentSizes, true );
  }

  /** Returns a new local index, trying to guess reasonable term and prefix maps from the basename,
   * and loading document sizes only if it is necessary.
   *
   * @param basename the basename of the index.
   * @param randomAccess whether the index should be accessible randomly (e.g., if it will
   * be possible to call {@link IndexReader#documents(int)} on the index readers returned by the index).
   */
  public static BitStreamIndex getInstance( final CharSequence basename, final boolean randomAccess ) throws ConfigurationException, ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {
    return getInstance( basename, randomAccess, false );
  }

  /** Returns a new local index, trying to guess reasonable term and prefix maps from the basename,
   *  loading offsets but loading document sizes only if it is necessary.
   *
   * @param basename the basename of the index.
   */
  public static BitStreamIndex getInstance( final CharSequence basename ) throws ConfigurationException, ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {
    return getInstance( basename, true );
  }
}
TOP

Related Classes of it.unimi.dsi.mg4j.index.DiskBasedIndex$GammaCodedIterableList

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.