final PrintWriter[] localTerms = new PrintWriter[ numIndices ];
final int numTerms[] = new int[ numIndices ];
final long numberOfOccurrences[] = new long[ numIndices ];
final long numberOfPostings[] = new long[ numIndices ];
final boolean isHighPerformance = BitStreamHPIndex.class.isAssignableFrom( Class.forName( new Properties( inputBasename + DiskBasedIndex.PROPERTIES_EXTENSION ).getString( Index.PropertyKeys.INDEXCLASS ) ) );
final InputBitStream globalIndex = new InputBitStream( inputBasename + DiskBasedIndex.INDEX_EXTENSION, bufferSize );
final long globalPositionsLength = new File( inputBasename + DiskBasedIndex.POSITIONS_EXTENSION ).length();
final InputBitStream globalPositions = isHighPerformance ? new InputBitStream( inputBasename + DiskBasedIndex.POSITIONS_EXTENSION, bufferSize ) : null;
final FastBufferedReader terms = new FastBufferedReader( new InputStreamReader( new FileInputStream( inputBasename + DiskBasedIndex.TERMS_EXTENSION ), "UTF-8" ) );
final InputBitStream offsets = new InputBitStream( inputBasename + DiskBasedIndex.OFFSETS_EXTENSION );
final File posNumBitsFile = new File( inputBasename + DiskBasedIndex.POSITIONS_NUMBER_OF_BITS_EXTENSION );
final InputBitStream posNumBits = posNumBitsFile.exists() ? new InputBitStream( inputBasename + DiskBasedIndex.POSITIONS_NUMBER_OF_BITS_EXTENSION ) : null;
final InputBitStream frequencies = new InputBitStream( inputBasename + DiskBasedIndex.FREQUENCIES_EXTENSION );
final InputBitStream globCounts = new InputBitStream( inputBasename + DiskBasedIndex.GLOBCOUNTS_EXTENSION );
offsets.readGamma();
for( int i = 0; i < numIndices; i++ ) {
localIndexStream[ i ] = new OutputBitStream( localBasename[ i ] + DiskBasedIndex.INDEX_EXTENSION, bufferSize );
if ( isHighPerformance ) localPositionsStream[ i ] = new OutputBitStream( localBasename[ i ] + DiskBasedIndex.POSITIONS_EXTENSION, bufferSize );
localFrequencies[ i ] = new OutputBitStream( localBasename[ i ] + DiskBasedIndex.FREQUENCIES_EXTENSION );
localGlobCounts[ i ] = new OutputBitStream( localBasename[ i ] + DiskBasedIndex.GLOBCOUNTS_EXTENSION );
localTerms[ i ] = new PrintWriter( new OutputStreamWriter( new FastBufferedOutputStream( new FileOutputStream( localBasename[ i ] + DiskBasedIndex.TERMS_EXTENSION ) ), "UTF-8" ) );
localOffsets[ i ] = new OutputBitStream( localBasename[ i ] + DiskBasedIndex.OFFSETS_EXTENSION );
if ( posNumBits != null ) localPosNumBits[ i ] = new OutputBitStream( localBasename[ i ] + DiskBasedIndex.POSITIONS_NUMBER_OF_BITS_EXTENSION );
localOffsets[ i ].writeGamma( 0 );
}
// The current term
final MutableString currTerm = new MutableString();
pl.expectedUpdates = ( new File( inputBasename + DiskBasedIndex.INDEX_EXTENSION ).length() + ( isHighPerformance ? new File( inputBasename + DiskBasedIndex.POSITIONS_EXTENSION ).length() : 0 ) ) * 8;
pl.itemsName = "bits";
pl.logInterval = logInterval;
pl.start( "Partitioning index..." );
int termNumber = 0, k, prevK = -1, previousHeaderLength = 0, newHeaderLength = 0;
long length, count, positionsOffset = 0;
int res, frequency;
while( terms.readLine( currTerm ) != null ) {
k = strategy.localIndex( termNumber ); // The local index for this term
if ( numTerms[ k ] != strategy.localNumber( termNumber ) ) throw new IllegalStateException();
numTerms[ k ]++;
if ( isHighPerformance ) {
final long temp = globalIndex.readBits();
positionsOffset = globalIndex.readLongDelta();
previousHeaderLength = (int)( globalIndex.readBits() - temp );
if ( prevK != -1 ) {
length = positionsOffset - globalPositions.readBits();
pl.count += length;
while( length > 0 ) {
res = (int)Math.min( bufferSize * 8, length );
globalPositions.read( buffer, res );
localPositionsStream[ prevK ].write( buffer, res );
length -= res;
}
}
newHeaderLength = localIndexStream[ k ].writeLongDelta( localPositionsStream[ k ].writtenBits() );
}
frequency = frequencies.readGamma();
localFrequencies[ k ].writeGamma( frequency );
numberOfPostings[ k ] += frequency;
if ( posNumBits != null ) localPosNumBits[ k ].writeGamma( posNumBits.readGamma() );
count = globCounts.readLongGamma();
numberOfOccurrences[ k ] += count;
localGlobCounts[ k ].writeLongGamma( count );
currTerm.println( localTerms[ k ] );
length = offsets.readLongGamma() - previousHeaderLength;
localOffsets[ k ].writeLongGamma( length + newHeaderLength );
pl.count += length + previousHeaderLength - 1;
while( length > 0 ) {
res = (int)Math.min( bufferSize * 8, length );
globalIndex.read( buffer, res );
localIndexStream[ k ].write( buffer, res );
length -= res;
}
pl.update();
prevK = k;
termNumber++;
}
// We pour the last piece of positions
if ( isHighPerformance ) {
if ( prevK != -1 ) {
length = globalPositionsLength * 8 - globalPositions.readBits();
System.err.println( globalPositionsLength * 8 - globalPositions.readBits() );
while( length > 0 ) {
res = (int)Math.min( bufferSize * 8, length );
globalPositions.read( buffer, res );
localPositionsStream[ prevK ].write( buffer, res );
length -= res;
}
}
}
pl.done();
terms.close();
offsets.close();
frequencies.close();
globCounts.close();
globalIndex.close();
if ( posNumBits != null ) posNumBits.close();
if ( isHighPerformance ) globalPositions.close();
// We copy the relevant properties from the original
Properties properties = new Properties( inputBasename + DiskBasedIndex.PROPERTIES_EXTENSION );
Properties globalProperties = new Properties();
if ( strategyFilename != null ) globalProperties.setProperty( IndexCluster.PropertyKeys.STRATEGY, strategyFilename );
globalProperties.setProperty( DocumentalCluster.PropertyKeys.BLOOM, false );
globalProperties.setProperty( Index.PropertyKeys.INDEXCLASS, LexicalCluster.class.getName() );
for( int i = 0; i < numIndices; i++ ) globalProperties.addProperty( IndexCluster.PropertyKeys.LOCALINDEX, localBasename[ i ] );
globalProperties.setProperty( Index.PropertyKeys.FIELD, properties.getProperty( Index.PropertyKeys.FIELD ) );
globalProperties.setProperty( Index.PropertyKeys.POSTINGS, properties.getProperty( Index.PropertyKeys.POSTINGS ) );
globalProperties.setProperty( Index.PropertyKeys.OCCURRENCES, properties.getProperty( Index.PropertyKeys.OCCURRENCES ) );
globalProperties.setProperty( Index.PropertyKeys.DOCUMENTS, properties.getProperty( Index.PropertyKeys.DOCUMENTS ) );
globalProperties.setProperty( Index.PropertyKeys.TERMS, properties.getProperty( Index.PropertyKeys.TERMS ) );
globalProperties.setProperty( Index.PropertyKeys.TERMPROCESSOR, properties.getProperty( Index.PropertyKeys.TERMPROCESSOR ) );
globalProperties.setProperty( Index.PropertyKeys.MAXCOUNT, properties.getProperty( Index.PropertyKeys.MAXCOUNT ) );
globalProperties.setProperty( Index.PropertyKeys.MAXDOCSIZE, properties.getProperty( Index.PropertyKeys.MAXDOCSIZE ) );
globalProperties.save( outputBasename + DiskBasedIndex.PROPERTIES_EXTENSION );
LOGGER.debug( "Properties for clustered index " + outputBasename + ": " + new ConfigurationMap( globalProperties ) );
for( int i = 0; i < numIndices; i++ ) {
localIndexStream[ i ].close();
if ( isHighPerformance ) localPositionsStream[ i ].close();
localOffsets[ i ].close();
if ( posNumBits != null ) localPosNumBits[ i ].close();
localFrequencies[ i ].close();
localGlobCounts[ i ].close();
localTerms[ i ].close();
final InputStream input = new FileInputStream( inputBasename + DiskBasedIndex.SIZES_EXTENSION );
final OutputStream output = new FileOutputStream( localBasename[ i ] + DiskBasedIndex.SIZES_EXTENSION );
IOUtils.copy( input, output );
input.close();
output.close();
Properties localProperties = new Properties();
localProperties.addAll( globalProperties );
localProperties.setProperty( Index.PropertyKeys.TERMS, numTerms[ i ] );
localProperties.setProperty( Index.PropertyKeys.OCCURRENCES, numberOfOccurrences[ i ] );
localProperties.setProperty( Index.PropertyKeys.POSTINGS, numberOfPostings[ i ] );
localProperties.setProperty( Index.PropertyKeys.POSTINGS, numberOfPostings[ i ] );
localProperties.setProperty( Index.PropertyKeys.INDEXCLASS, properties.getProperty( Index.PropertyKeys.INDEXCLASS ) );
localProperties.addProperties( Index.PropertyKeys.CODING, properties.getStringArray( Index.PropertyKeys.CODING ) );
localProperties.setProperty( BitStreamIndex.PropertyKeys.SKIPQUANTUM, properties.getProperty( BitStreamIndex.PropertyKeys.SKIPQUANTUM ) );
localProperties.setProperty( BitStreamIndex.PropertyKeys.SKIPHEIGHT, properties.getProperty( BitStreamIndex.PropertyKeys.SKIPHEIGHT ) );
if ( strategyProperties[ i ] != null ) localProperties.addAll( strategyProperties[ i ] );
localProperties.save( localBasename[ i ] + DiskBasedIndex.PROPERTIES_EXTENSION );
LOGGER.debug( "Post-partitioning properties for index " + localBasename[ i ] + ": " + new ConfigurationMap( localProperties ) );
}
}