}
}
public void run() throws Exception {
final ProgressLogger pl = new ProgressLogger( LOGGER, logInterval );
final IntList sizeList = globalIndex.sizes;
partitionSizes();
final int[] position = new int[ globalIndex.maxCount ];
final int[] localFrequency = new int[ numIndices ];
final int[] usedIndex = new int[ numIndices ];
final InputBitStream[] direct = new InputBitStream[ numIndices ];
final InputBitStream[] indirect = new InputBitStream[ numIndices ];
final BloomFilter[] bloomFilter = bloomFilterPrecision != 0 ? new BloomFilter[ numIndices ] : null;
final File[] tempFile = new File[ numIndices ];
final CachingOutputBitStream[] temp = new CachingOutputBitStream[ numIndices ];
IndexIterator indexIterator;
for ( int i = 0; i < numIndices; i++ ) {
tempFile[ i ] = new File( localBasename[ i ] + ".temp" );
temp[ i ] = new CachingOutputBitStream( tempFile[ i ], bufferSize );
direct[ i ] = new InputBitStream( temp[ i ].buffer() );
indirect[ i ] = new InputBitStream( tempFile[ i ] );
if ( bloomFilterPrecision != 0 ) bloomFilter[ i ] = new BloomFilter( globalIndex.numberOfTerms, bloomFilterPrecision );
}
int usedIndices;
MutableString currentTerm = new MutableString();
Payload payload = null;
int frequency, globalPointer, localIndex, localPointer, count = -1;
pl.expectedUpdates = globalIndex.numberOfPostings;
pl.itemsName = "postings";
pl.logInterval = logInterval;
pl.start( "Partitioning index..." );
for ( int t = 0; t < globalIndex.numberOfTerms; t++ ) {
terms.readLine( currentTerm );
indexIterator = indexReader.nextIterator();
usedIndices = 0;
frequency = indexIterator.frequency();
for ( int j = 0; j < frequency; j++ ) {
globalPointer = indexIterator.nextDocument();
localIndex = strategy.localIndex( globalPointer );
if ( localFrequency[ localIndex ] == 0 ) {
// First time we see a document for this index.
currentTerm.println( localTerms[ localIndex ] );
numTerms[ localIndex ]++;
usedIndex[ usedIndices++ ] = localIndex;
if ( bloomFilterPrecision != 0 ) bloomFilter[ localIndex ].add( currentTerm );
}
/* Store temporarily posting data; note that we save the global pointer as we
* will have to access the size list. */
localFrequency[ localIndex ]++;
numPostings[ localIndex ]++;
temp[ localIndex ].writeGamma( globalPointer );
if ( globalIndex.hasPayloads ) payload = indexIterator.payload();
if ( havePayloads ) payload.write( temp[ localIndex ] );
if ( haveCounts ) {
count = indexIterator.count();
temp[ localIndex ].writeGamma( count );
globCount[ localIndex ] += count;
if ( maxDocPos[ localIndex ] < count ) maxDocPos[ localIndex ] = count;
if ( havePositions ) {
final int[] pos = indexIterator.positionArray();
// TODO: compress this stuff
for( int p = 0; p < count; p++ ) temp[ localIndex ].writeGamma( pos[ p ] );
}
}
}
// We now run through the indices used by this term and copy from the temporary buffer.
OutputBitStream obs;
for( int k = 0; k < usedIndices; k++ ) {
final int i = usedIndex[ k ];
localFrequencies[ i ].writeGamma( localFrequency[ i ] );
if ( haveCounts ) numOccurrences[ i ] += globCount[ i ];
if ( localGlobCounts[ i ] != null ) localGlobCounts[ i ].writeLongGamma( globCount[ i ] );
globCount[ i ] = 0;
InputBitStream ibs;
indexWriter[ i ].newInvertedList();
temp[ i ].align();
if ( temp[ i ].buffer() != null ) ibs = direct[ i ];
else {
// We cannot read directly from the internal buffer.
ibs = indirect[ i ];
ibs.flush();
temp[ i ].flush();
}
ibs.position( 0 );
indexWriter[ i ].writeFrequency( localFrequency[ i ] );
for( int j = 0; j < localFrequency[ i ]; j++ ) {
obs = indexWriter[ i ].newDocumentRecord();
globalPointer = ibs.readGamma();
localPointer = strategy.localPointer( globalPointer );
indexWriter[ i ].writeDocumentPointer( obs, localPointer );
if ( havePayloads ) {
payload.read( ibs );
indexWriter[ i ].writePayload( obs, payload );
}
if ( haveCounts ) indexWriter[ i ].writePositionCount( obs, count = ibs.readGamma() );
if ( havePositions ) {
for( int p = 0; p < count; p++ ) position[ p ] = ibs.readGamma();
indexWriter[ i ].writeDocumentPositions( obs, position, 0, count, sizeList != null ? sizeList.getInt( globalPointer ) : -1 );
}
}
temp[ i ].position( 0 );
temp[ i ].writtenBits( 0 );