final boolean isHighPerformance = BitStreamHPIndex.class.isAssignableFrom( Class.forName( new Properties( inputBasename + DiskBasedIndex.PROPERTIES_EXTENSION ).getString( Index.PropertyKeys.INDEXCLASS ) ) );
final InputBitStream globalIndex = new InputBitStream( inputBasename + DiskBasedIndex.INDEX_EXTENSION, bufferSize );
final long globalPositionsLength = new File( inputBasename + DiskBasedIndex.POSITIONS_EXTENSION ).length();
final InputBitStream globalPositions = isHighPerformance ? new InputBitStream( inputBasename + DiskBasedIndex.POSITIONS_EXTENSION, bufferSize ) : null;
final FastBufferedReader terms = new FastBufferedReader( new InputStreamReader( new FileInputStream( inputBasename + DiskBasedIndex.TERMS_EXTENSION ), "UTF-8" ) );
final InputBitStream offsets = new InputBitStream( inputBasename + DiskBasedIndex.OFFSETS_EXTENSION );
final File posNumBitsFile = new File( inputBasename + DiskBasedIndex.POSITIONS_NUMBER_OF_BITS_EXTENSION );
final InputBitStream posNumBits = posNumBitsFile.exists() ? new InputBitStream( inputBasename + DiskBasedIndex.POSITIONS_NUMBER_OF_BITS_EXTENSION ) : null;
final InputBitStream frequencies = new InputBitStream( inputBasename + DiskBasedIndex.FREQUENCIES_EXTENSION );
final InputBitStream globCounts = new InputBitStream( inputBasename + DiskBasedIndex.GLOBCOUNTS_EXTENSION );
offsets.readGamma();
for( int i = 0; i < numIndices; i++ ) {
localIndexStream[ i ] = new OutputBitStream( localBasename[ i ] + DiskBasedIndex.INDEX_EXTENSION, bufferSize );
if ( isHighPerformance ) localPositionsStream[ i ] = new OutputBitStream( localBasename[ i ] + DiskBasedIndex.POSITIONS_EXTENSION, bufferSize );
localFrequencies[ i ] = new OutputBitStream( localBasename[ i ] + DiskBasedIndex.FREQUENCIES_EXTENSION );
localGlobCounts[ i ] = new OutputBitStream( localBasename[ i ] + DiskBasedIndex.GLOBCOUNTS_EXTENSION );
localTerms[ i ] = new PrintWriter( new OutputStreamWriter( new FastBufferedOutputStream( new FileOutputStream( localBasename[ i ] + DiskBasedIndex.TERMS_EXTENSION ) ), "UTF-8" ) );
localOffsets[ i ] = new OutputBitStream( localBasename[ i ] + DiskBasedIndex.OFFSETS_EXTENSION );
if ( posNumBits != null ) localPosNumBits[ i ] = new OutputBitStream( localBasename[ i ] + DiskBasedIndex.POSITIONS_NUMBER_OF_BITS_EXTENSION );
localOffsets[ i ].writeGamma( 0 );
}
// The current term
final MutableString currTerm = new MutableString();
pl.expectedUpdates = ( new File( inputBasename + DiskBasedIndex.INDEX_EXTENSION ).length() + ( isHighPerformance ? new File( inputBasename + DiskBasedIndex.POSITIONS_EXTENSION ).length() : 0 ) ) * 8;
pl.itemsName = "bits";
pl.logInterval = logInterval;
pl.start( "Partitioning index..." );
int termNumber = 0, k, prevK = -1, previousHeaderLength = 0, newHeaderLength = 0;
long length, count, positionsOffset = 0;
int res, frequency;
while( terms.readLine( currTerm ) != null ) {
k = strategy.localIndex( termNumber ); // The local index for this term
if ( numTerms[ k ] != strategy.localNumber( termNumber ) ) throw new IllegalStateException();
numTerms[ k ]++;
if ( isHighPerformance ) {
final long temp = globalIndex.readBits();
positionsOffset = globalIndex.readLongDelta();
previousHeaderLength = (int)( globalIndex.readBits() - temp );
if ( prevK != -1 ) {
length = positionsOffset - globalPositions.readBits();
pl.count += length;
while( length > 0 ) {
res = (int)Math.min( bufferSize * 8, length );
globalPositions.read( buffer, res );
localPositionsStream[ prevK ].write( buffer, res );
length -= res;
}
}
newHeaderLength = localIndexStream[ k ].writeLongDelta( localPositionsStream[ k ].writtenBits() );
}
frequency = frequencies.readGamma();
localFrequencies[ k ].writeGamma( frequency );
numberOfPostings[ k ] += frequency;
if ( posNumBits != null ) localPosNumBits[ k ].writeGamma( posNumBits.readGamma() );
count = globCounts.readLongGamma();
numberOfOccurrences[ k ] += count;
localGlobCounts[ k ].writeLongGamma( count );
currTerm.println( localTerms[ k ] );
length = offsets.readLongGamma() - previousHeaderLength;
localOffsets[ k ].writeLongGamma( length + newHeaderLength );
pl.count += length + previousHeaderLength - 1;
while( length > 0 ) {
res = (int)Math.min( bufferSize * 8, length );
globalIndex.read( buffer, res );
localIndexStream[ k ].write( buffer, res );
length -= res;
}
pl.update();
prevK = k;
termNumber++;
}
// We pour the last piece of positions
if ( isHighPerformance ) {
if ( prevK != -1 ) {
length = globalPositionsLength * 8 - globalPositions.readBits();
System.err.println( globalPositionsLength * 8 - globalPositions.readBits() );
while( length > 0 ) {
res = (int)Math.min( bufferSize * 8, length );
globalPositions.read( buffer, res );
localPositionsStream[ prevK ].write( buffer, res );
length -= res;
}
}
}
pl.done();
terms.close();
offsets.close();
frequencies.close();
globCounts.close();
globalIndex.close();
if ( posNumBits != null ) posNumBits.close();