package it.unimi.dsi.mg4j.index;
/*
* MG4J: Managing Gigabytes for Java
*
* Copyright (C) 2004-2010 Sebastiano Vigna
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*
*/
import it.unimi.dsi.Util;
import it.unimi.dsi.fastutil.ints.AbstractIntIterator;
import it.unimi.dsi.fastutil.ints.AbstractIntList;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntIterable;
import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.fastutil.ints.IntList;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.longs.LongArrayList;
import it.unimi.dsi.fastutil.longs.LongList;
import it.unimi.dsi.fastutil.longs.LongLists;
import it.unimi.dsi.io.ByteBufferInputStream;
import it.unimi.dsi.io.InputBitStream;
import it.unimi.dsi.mg4j.index.CompressionFlags.Coding;
import it.unimi.dsi.mg4j.index.CompressionFlags.Component;
import it.unimi.dsi.mg4j.index.Index.UriKeys;
import it.unimi.dsi.mg4j.index.payload.Payload;
import it.unimi.dsi.mg4j.util.SemiExternalOffsetList;
import it.unimi.dsi.sux4j.util.EliasFanoLongBigList;
import it.unimi.dsi.util.PrefixMap;
import it.unimi.dsi.util.Properties;
import it.unimi.dsi.util.StringMap;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.channels.FileChannel.MapMode;
import java.util.EnumMap;
import java.util.Map;
import java.util.NoSuchElementException;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.log4j.Logger;
/** A static container providing facilities to load an index based on data stored on disk.
*
* <P>This class contains several useful static methods
* such as {@link #readOffsets(InputBitStream, int)} and {@link #readSizes(CharSequence, int)},
* and static factor methods such as {@link #getInstance(CharSequence, boolean, boolean, boolean, EnumMap)}
* that take care of reading the properties associated to the index, identify
* the correct {@link it.unimi.dsi.mg4j.index.Index} implementation that
* should be used to load the index, and load the necessary data into memory.
*
* <p>As an option, a disk-based index can be <em>loaded</em> into main memory (key: {@link Index.UriKeys#INMEMORY}), returning
* an {@link it.unimi.dsi.mg4j.index.InMemoryIndex}/{@link InMemoryHPIndex}, or <em>mapped</em> into main memory (key: {@link Index.UriKeys#MAPPED}),
* returning a {@link MemoryMappedIndex}/{@link InMemoryHPIndex} (note that the value assigned to the keys is irrelevant).
* In both cases some insurmountable Java problems
* prevents using indices whose size exceeds two gigabytes (but see {@link MemoryMappedIndex} for
* some elaboration on this topic).
*
* <p>Moreover, by default the
* term-offset list is accessed using a {@link it.unimi.dsi.mg4j.util.SemiExternalOffsetList}
* with a step of {@link #DEFAULT_OFFSET_STEP}. This behaviour can be changed using
* the URI key {@link UriKeys#OFFSETSTEP}.
*
* <p>Disk-based indices are the workhorse of MG4J. All other indices (clustered,
* remote, etc.) ultimately rely on disk-based indices to provide results.
*
* <p>Note that not all data produced by {@link it.unimi.dsi.mg4j.tool.Scan} and
* by the other indexing utilities are actually necessary to run a disk-based
* index. Usually the property file and the index file (plus the positions file,
* for {@linkplain BitStreamHPIndex high-performance indices}) are sufficient: if one
* needs random access, also the offsets file must be present, and if the
* compression method requires document sizes or if sizes are requested explicitly,
* also the sizes file must be present. A {@link StringMap}
* and possibly a {@link PrefixMap} will be fetched
* automatically by {@link #getInstance(CharSequence, boolean, boolean)}
* using standard extensions.
*
* <h2>Thread safety</h2>
*
* <p>A disk-based index is thread safe as long as the offset list, the size list and
* the term/prefix map are. The static factory methods provided by this class load
* offsets and sizes using data structures that are thread safe. If you use directly
* a constructor, instead, it is your responsibility to pass thread-safe data structures.
*
* @author Sebastiano Vigna
* @since 1.1
*/
public class DiskBasedIndex {
private static final Logger LOGGER = Util.getLogger( DiskBasedIndex.class );
private static final long serialVersionUID = 0;
/** The default value for the query parameter {@link Index.UriKeys#OFFSETSTEP}. */
public final static int DEFAULT_OFFSET_STEP = 256;
/** Standard extension for the index bitstream. */
public static final String INDEX_EXTENSION = ".index";
/** Standard extension for the positions bitstream of a {@linkplain BitStreamHPIndexWriter high-performance index}. */
public static final String POSITIONS_EXTENSION = ".positions";
/** Standard extension for the index properties. */
public static final String PROPERTIES_EXTENSION = ".properties";
/** Standard extension for the file of sizes. */
public static final String SIZES_EXTENSION = ".sizes";
/** Standard extension for the file of offsets. */
public static final String OFFSETS_EXTENSION = ".offsets";
/** Standard extension for the file of lengths of positions. */
public static final String POSITIONS_NUMBER_OF_BITS_EXTENSION = ".posnumbits";
/** Standard extension for the file of global counts. */
public static final String GLOBCOUNTS_EXTENSION = ".globcounts";
/** Standard extension for the file of frequencies. */
public static final String FREQUENCIES_EXTENSION = ".frequencies";
/** Standard extension for the file of terms. */
public static final String TERMS_EXTENSION = ".terms";
/** Standard extension for the file of terms, unsorted. */
public static final String UNSORTED_TERMS_EXTENSION = ".terms.unsorted";
/** Standard extension for the term map. */
public static final String TERMMAP_EXTENSION = ".termmap";
/** Standard extension for the prefix map. */
public static final String PREFIXMAP_EXTENSION = ".prefixmap";
/** Standard extension for the stats file. */
public static final String STATS_EXTENSION = ".stats";
private DiskBasedIndex() {}
/** Utility method to load a compressed offset file into a list.
*
* @param in the input bit stream providing the offsets (see {@link BitStreamIndexWriter}).
* @param T the number of terms indexed.
* @return a list of longs backed by an array; the list has
* an additional final element of index <code>T</code> that gives the number
* of bytes of the index file.
*/
public static LongList readOffsets( final InputBitStream in, final int T ) throws IOException {
final long[] offset = new long[ T + 1 ];
LOGGER.debug( "Loading offsets..." );
offset[ 0 ] = in.readLongGamma();
for( int i = 0; i < T; i++ ) offset[ i + 1 ] = in.readLongGamma() + offset[ i ];
LOGGER.debug( "Completed." );
return LongArrayList.wrap( offset );
}
/** Utility method to load a compressed offset file into a list.
*
* @param filename the file containing the offsets (see {@link BitStreamIndexWriter}).
* @param T the number of terms indexed.
* @return a list of longs backed by an array; the list has
* an additional final element of index <code>T</code> that gives the number
* of bytes of the index file.
*/
public static LongList readOffsets( final CharSequence filename, final int T ) throws IOException {
final InputBitStream in = new InputBitStream( filename.toString() );
final long[] offset = new long[ T + 1 ];
LOGGER.debug( "Loading offsets..." );
offset[ 0 ] = in.readLongGamma();
for( int i = 0; i < T; i++ ) offset[ i + 1 ] = in.readLongGamma() + offset[ i ];
LOGGER.debug( "Completed." );
in.close();
return LongArrayList.wrap( offset );
}
/** Utility method to load a compressed size file into a list.
*
* @param filename the file containing the γ-coded sizes (see {@link BitStreamIndexWriter}).
* @param N the number of documents.
* @return a list of integers backed by an array.
*/
public static IntList readSizes( final CharSequence filename, final int N ) throws IOException {
final int[] size = new int[ N ];
final InputBitStream in = new InputBitStream( filename.toString() );
LOGGER.debug( "Loading sizes..." );
in.readGammas( size, N );
LOGGER.debug( "Completed." );
in.close();
return IntArrayList.wrap( size );
}
/** Utility method to load a compressed size file into an {@linkplain EliasFanoLongBigList Elias–Fano compressed list}.
*
* @param filename the filename containing the γ-coded sizes (see {@link BitStreamIndexWriter}).
* @param N the number of documents indexed.
* @return a list of integers backed by an {@linkplain EliasFanoLongBigList Elias–Fano compressed list}.
* @throws IOException
*/
public static IntList readSizesSuccinct( final CharSequence filename, final int N ) throws IOException {
LOGGER.debug( "Loading sizes..." );
final IntList sizes = new AbstractIntList() {
final EliasFanoLongBigList list = new EliasFanoLongBigList( new GammaCodedIterableList( BinIO.loadBytes( filename ), N ) );
public int getInt( int index ) {
return (int)list.getLong( index );
}
public int size() {
return list.size();
}
};
LOGGER.debug( "Completed." );
return sizes;
}
// TODO: replace this with a general-purpose class
private static class GammaCodedIterableList implements IntIterable {
private final int n;
private final byte[] array;
public GammaCodedIterableList( final byte []array, final int n ) {
this.array = array;
this.n = n;
}
public IntIterator iterator() {
return new AbstractIntIterator() {
final InputBitStream ibs = new InputBitStream( array );
int pos;
public boolean hasNext() {
return pos < n;
}
public int nextInt() {
if ( ! hasNext() ) throw new NoSuchElementException();
pos++;
try {
return ibs.readGamma();
}
catch ( IOException e ) {
throw new RuntimeException( e );
}
}
};
}
}
/** Utility static method that loads a term map.
*
* @param filename the name of the file containing the term map.
* @return the map, or <code>null</code> if the file did not exist.
* @throws IOException if some IOException (other than {@link FileNotFoundException}) occurred.
*/
@SuppressWarnings("unchecked")
public static StringMap<? extends CharSequence> loadStringMap( final String filename ) throws IOException {
try {
return (StringMap<? extends CharSequence>) BinIO.loadObject( filename );
} catch ( FileNotFoundException e ) {
return null;
} catch ( ClassNotFoundException e ) {
throw new RuntimeException( e );
}
}
/** Utility static method that loads a prefix map.
*
* @param filename the name of the file containing the prefix map.
* @return the map, or <code>null</code> if the file did not exist.
* @throws IOException if some IOException (other than {@link FileNotFoundException}) occurred.
*/
@SuppressWarnings("unchecked")
public static PrefixMap<? extends CharSequence> loadPrefixMap( final String filename ) throws IOException {
try {
return (PrefixMap<? extends CharSequence>) BinIO.loadObject( filename );
} catch ( FileNotFoundException e ) {
return null;
} catch ( ClassNotFoundException e ) {
throw new RuntimeException( e );
}
}
/** Returns a new disk-based index, loading exactly the specified parts and using preloaded {@link Properties}.
*
* @param basename the basename of the index.
* @param properties the properties obtained from the given basename.
* @param termMap the term map for this index, or <code>null</code> for no term map.
* @param prefixMap the prefix map for this index, or <code>null</code> for no prefix map.
* @param randomAccess whether the index should be accessible randomly (e.g., if it will
* be possible to call {@link IndexReader#documents(int)} on the index readers returned by the index).
* @param documentSizes if true, document sizes will be loaded (note that sometimes document sizes
* might be loaded anyway because the compression method for positions requires it).
* @param queryProperties a map containing associations between {@link Index.UriKeys} and values, or <code>null</code>.
*/
public static BitStreamIndex getInstance( final CharSequence basename, Properties properties, final StringMap<? extends CharSequence> termMap, final PrefixMap<? extends CharSequence> prefixMap, final boolean randomAccess, final boolean documentSizes, final EnumMap<UriKeys,String> queryProperties ) throws ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {
// This could be null if old indices contain SkipIndex
Class<?> indexClass = null;
try {
indexClass = Class.forName( properties.getString( Index.PropertyKeys.INDEXCLASS, "(missing index class)" ));
}
catch( Exception ignore ) {}
File indexFile = new File( basename + INDEX_EXTENSION );
if ( ! indexFile.exists() ) throw new FileNotFoundException( "Cannot find index file " + indexFile.getName() );
final Map<Component,Coding> flags = CompressionFlags.valueOf( properties.getStringArray( Index.PropertyKeys.CODING ), null );
final int numberOfDocuments = properties.getInt( Index.PropertyKeys.DOCUMENTS );
final int numberOfTerms = properties.getInt( Index.PropertyKeys.TERMS );
final long numberOfPostings= properties.getLong( Index.PropertyKeys.POSTINGS );
final long numberOfOccurrences = properties.getLong( Index.PropertyKeys.OCCURRENCES, -1 );
final int maxCount = properties.getInt( Index.PropertyKeys.MAXCOUNT, -1 );
final String field = properties.getString( Index.PropertyKeys.FIELD, new File( basename.toString() ).getName() );
if ( termMap != null && termMap.size() != numberOfTerms ) throw new IllegalArgumentException( "The size of the term map (" + termMap.size() + ") is not equal to the number of terms (" + numberOfTerms + ")" );
if ( prefixMap != null && prefixMap.size() != numberOfTerms ) throw new IllegalArgumentException( "The size of the prefix map (" + prefixMap.size() + ") is not equal to the number of terms (" + numberOfTerms + ")" );
final Payload payload = (Payload)( properties.containsKey( Index.PropertyKeys.PAYLOADCLASS ) ? Class.forName( properties.getString( Index.PropertyKeys.PAYLOADCLASS ) ).newInstance() : null );
final Coding frequencyCoding = flags.get( Component.FREQUENCIES );
final Coding pointerCoding = flags.get( Component.POINTERS );
final Coding countCoding = flags.get( Component.COUNTS );
final Coding positionCoding = flags.get( Component.POSITIONS );
if ( countCoding == null && positionCoding != null ) throw new IllegalArgumentException( "Index " + basename + " has positions but no counts (this can't happen)" );
// Load document sizes if forced to do so, or if the pointer/position compression methods make it necessary.
IntList sizes = null;
// TODO: quick patch to avoid loading sizes in case of payloads.
if ( payload == null && ( documentSizes || positionCoding == Coding.GOLOMB || positionCoding == Coding.INTERPOLATIVE ) ) {
sizes = queryProperties != null && queryProperties.containsKey( UriKeys.SUCCINCTSIZES ) ? readSizesSuccinct( basename + DiskBasedIndex.SIZES_EXTENSION, numberOfDocuments ) : readSizes( basename + DiskBasedIndex.SIZES_EXTENSION, numberOfDocuments );
if ( sizes.size() != numberOfDocuments ) throw new IllegalStateException( "The length of the size list (" + sizes.size() + ") is not equal to the number of documents (" + numberOfDocuments + ")" );
}
// Load offsets if forced to do so. Depending on a property, we use the core-memory or the semi-external version.
final LongList offsets;
// TODO: quick patch to avoid loading sizes in case of payloads.
if ( payload == null && randomAccess ) {
int offsetStep = queryProperties != null && queryProperties.get( UriKeys.OFFSETSTEP ) != null ? Integer.parseInt( queryProperties.get( UriKeys.OFFSETSTEP ) ) : DEFAULT_OFFSET_STEP;
if ( offsetStep < 0 ) { // Memory-mapped
offsetStep = -offsetStep;
offsets = LongLists.synchronize( new SemiExternalOffsetList(
new InputBitStream( ByteBufferInputStream.map( new FileInputStream( basename + DiskBasedIndex.OFFSETS_EXTENSION ).getChannel(), MapMode.READ_ONLY ) ),
offsetStep, numberOfTerms + 1 ) );
}
else {
offsets = offsetStep == 0?
DiskBasedIndex.readOffsets( basename + DiskBasedIndex.OFFSETS_EXTENSION, numberOfTerms ) :
LongLists.synchronize( new SemiExternalOffsetList( new InputBitStream( basename + DiskBasedIndex.OFFSETS_EXTENSION, 1024 ), offsetStep, numberOfTerms + 1 ) );
}
if ( offsets.size() != numberOfTerms + 1 ) throw new IllegalStateException( "The length of the offset list (" + offsets.size() + ") is not equal to the number of terms plus one (" + numberOfTerms + " + 1)" );
}
else offsets = null;
final int quantum = properties.getInt( BitStreamIndex.PropertyKeys.SKIPQUANTUM, -1 );
final int height = properties.getInt( BitStreamIndex.PropertyKeys.SKIPHEIGHT, -1 );
final int bufferSize = properties.getInt( BitStreamIndex.PropertyKeys.BUFFERSIZE, BitStreamIndex.DEFAULT_BUFFER_SIZE );
final TermProcessor termProcessor = Index.getTermProcessor( properties );
final boolean highPerformance = indexClass != null && FileHPIndex.class.isAssignableFrom( indexClass );
if ( queryProperties != null && queryProperties.containsKey( UriKeys.INMEMORY ) ) {
/*if ( SqrtSkipIndex.class.isAssignableFrom( indexClass ) )
return new SqrtSkipInMemoryIndex( BinIO.loadBytes( indexFile.toString() ),
numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount,
frequencyCoding, pointerCoding, countCoding, positionCoding,
termProcessor,
field, properties, termMap, prefixMap, sizes, offsets );*/
return highPerformance
? new InMemoryHPIndex( BinIO.loadBytes( indexFile.toString() ), BinIO.loadBytes( basename + POSITIONS_EXTENSION ),
numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount,
payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height,
termProcessor,
field, properties, termMap, prefixMap, sizes, offsets )
: new InMemoryIndex( BinIO.loadBytes( indexFile.toString() ),
numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount,
payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height,
termProcessor,
field, properties, termMap, prefixMap, sizes, offsets );
}
else if ( queryProperties != null && queryProperties.containsKey( UriKeys.MAPPED ) ) {
final File positionsFile = new File( basename + POSITIONS_EXTENSION );
final ByteBufferInputStream index = ByteBufferInputStream.map( new FileInputStream( indexFile ).getChannel(), MapMode.READ_ONLY );
return highPerformance
? new MemoryMappedHPIndex( index, ByteBufferInputStream.map( new FileInputStream( positionsFile ).getChannel(), MapMode.READ_ONLY ),
numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount,
payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height,
termProcessor,
field, properties, termMap, prefixMap, sizes, offsets )
: new MemoryMappedIndex( index,
numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount,
payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height,
termProcessor,
field, properties, termMap, prefixMap, sizes, offsets );
}
/*if ( SqrtSkipIndex.class.isAssignableFrom( indexClass ) )
return new SqrtSkipFileIndex( basename.toString(),
numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount,
frequencyCoding, pointerCoding, countCoding, positionCoding,
termProcessor,
field, properties, termMap, prefixMap, sizes, offsets, indexFile );*/
return highPerformance
? new FileHPIndex( basename.toString(),
numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount,
payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height, bufferSize,
termProcessor,
field, properties, termMap, prefixMap, sizes, offsets )
: new FileIndex( basename.toString(),
numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount,
payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height, bufferSize,
termProcessor,
field, properties, termMap, prefixMap, sizes, offsets );
}
/** Returns a new disk-based index, using preloaded {@link Properties} and possibly guessing reasonable term and prefix maps from the basename.
*
* @param basename the basename of the index.
* @param properties the properties obtained by stemming <code>basename</code>.
* @param randomAccess whether the index should be accessible randomly.
* @param documentSizes if true, document sizes will be loaded.
* @param maps if true, {@linkplain StringMap term} and {@linkplain PrefixMap prefix} maps will be guessed and loaded.
* @param queryProperties a map containing associations between {@link Index.UriKeys} and values, or <code>null</code>.
* @throws IllegalAccessException
* @throws InstantiationException
*
* @see #getInstance(CharSequence, Properties, StringMap, PrefixMap, boolean, boolean, EnumMap)
*/
public static BitStreamIndex getInstance( final CharSequence basename, final Properties properties, final boolean randomAccess, final boolean documentSizes, final boolean maps, final EnumMap<UriKeys,String> queryProperties ) throws ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {
StringMap<? extends CharSequence> termMap = null;
PrefixMap<? extends CharSequence> prefixMap = null;
if ( maps ) {
// TODO: check this logic
termMap = DiskBasedIndex.loadStringMap( basename + DiskBasedIndex.TERMMAP_EXTENSION );
if ( termMap != null && termMap instanceof PrefixMap ) return getInstance( basename, properties, termMap, (PrefixMap<?>)termMap, randomAccess, documentSizes, queryProperties );
prefixMap = DiskBasedIndex.loadPrefixMap( basename + DiskBasedIndex.PREFIXMAP_EXTENSION );
if ( termMap != null ) return getInstance( basename, properties, termMap, prefixMap, randomAccess, documentSizes, queryProperties );
if ( prefixMap != null ) return getInstance( basename, properties, prefixMap, prefixMap, randomAccess, documentSizes, queryProperties );
}
return getInstance( basename, properties, null, prefixMap, randomAccess, documentSizes, queryProperties );
}
/** Returns a new disk-based index, possibly guessing reasonable term and prefix maps from the basename.
*
* <p>If there is a term map file (basename stemmed with <samp>.termmap</samp>), it is used as term map and,
* in case it implements {@link PrefixMap}. Otherwise, we search for a prefix map (basename stemmed with <samp>.prefixmap</samp>)
* and, if it implements {@link StringMap} and no term map has been found, we use it as prefix map.
*
* @param basename the basename of the index.
* @param randomAccess whether the index should be accessible randomly (e.g., if it will
* be possible to call {@link IndexReader#documents(int)} on the index readers returned by the index).
* @param documentSizes if true, document sizes will be loaded (note that sometimes document sizes
* might be loaded anyway because the compression method for positions requires it).
* @param maps if true, {@linkplain StringMap term} and {@linkplain PrefixMap prefix} maps will be guessed and loaded (this
* feature might not be available with some kind of index).
* @param queryProperties a map containing associations between {@link Index.UriKeys} and values, or <code>null</code>.
*/
public static BitStreamIndex getInstance( final CharSequence basename, final boolean randomAccess, final boolean documentSizes, final boolean maps, final EnumMap<UriKeys,String> queryProperties ) throws ConfigurationException, ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {
return getInstance( basename, new Properties( basename + DiskBasedIndex.PROPERTIES_EXTENSION ), randomAccess, documentSizes, maps, queryProperties );
}
/** Returns a new disk-based index, using preloaded {@link Properties} and possibly guessing reasonable term and prefix maps from the basename.
*
* <p>If there is a term map file (basename stemmed with <samp>.termmap</samp>), it is used as term map and,
* in case it implements {@link PrefixMap}. Otherwise, we search for a prefix map (basename stemmed with <samp>.prefixmap</samp>)
* and, if it implements {@link StringMap} and no term map has been found, we use it as prefix map.
*
* @param basename the basename of the index.
* @param randomAccess whether the index should be accessible randomly (e.g., if it will
* be possible to call {@link IndexReader#documents(int)} on the index readers returned by the index).
* @param documentSizes if true, document sizes will be loaded (note that sometimes document sizes
* might be loaded anyway because the compression method for positions requires it).
* @param maps if true, {@linkplain StringMap term} and {@linkplain PrefixMap prefix} maps will be guessed and loaded (this
* feature might not be available with some kind of index).
* @see #getInstance(CharSequence, boolean, boolean, boolean, EnumMap)
*/
public static BitStreamIndex getInstance( final CharSequence basename, final boolean randomAccess, final boolean documentSizes, final boolean maps ) throws ConfigurationException, ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {
return getInstance( basename, new Properties( basename + DiskBasedIndex.PROPERTIES_EXTENSION ), randomAccess, documentSizes, maps, null );
}
/** Returns a new disk-based index, guessing reasonable term and prefix maps from the basename.
*
* @param basename the basename of the index.
* @param randomAccess whether the index should be accessible randomly (e.g., if it will
* be possible to call {@link IndexReader#documents(int)} on the index readers returned by the index).
* @param documentSizes if true, document sizes will be loaded (note that sometimes document sizes
* might be loaded anyway because the compression method for positions requires it).
*/
public static BitStreamIndex getInstance( final CharSequence basename, final boolean randomAccess, final boolean documentSizes ) throws ConfigurationException, ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {
return getInstance( basename, randomAccess, documentSizes, true );
}
/** Returns a new local index, trying to guess reasonable term and prefix maps from the basename,
* and loading document sizes only if it is necessary.
*
* @param basename the basename of the index.
* @param randomAccess whether the index should be accessible randomly (e.g., if it will
* be possible to call {@link IndexReader#documents(int)} on the index readers returned by the index).
*/
public static BitStreamIndex getInstance( final CharSequence basename, final boolean randomAccess ) throws ConfigurationException, ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {
return getInstance( basename, randomAccess, false );
}
/** Returns a new local index, trying to guess reasonable term and prefix maps from the basename,
* loading offsets but loading document sizes only if it is necessary.
*
* @param basename the basename of the index.
*/
public static BitStreamIndex getInstance( final CharSequence basename ) throws ConfigurationException, ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {
return getInstance( basename, true );
}
}