if ( building ) builder.open( "@0" ); // First batch
pl.displayFreeMemory = true;
pl.start( "Indexing documents..." );
DocumentIterator iterator = documentSequence.iterator();
Reader reader;
WordReader wordReader;
ObjectList<VirtualDocumentFragment> fragments;
Document document;
int documentPointer = 0, documentsInBatch = 0;
long batchStartTime = System.currentTimeMillis();
boolean outOfMemoryError = false;
while ( ( document = iterator.nextDocument() ) != null ) {
long overallTerms = 0;
if ( building ) builder.startDocument( document.title(), document.uri() );
for ( int i = 0; i < numberOfIndexedFields; i++ ) {
switch ( factory.fieldType( indexedField[ i ] ) ) {
case TEXT:
reader = (Reader)document.content( indexedField[ i ] );
wordReader = document.wordReader( indexedField[ i ] );
wordReader.setReader( reader );
if ( building ) builder.startTextField();
scan[ i ].processDocument( map != null ? map[ documentPointer ] : documentPointer, wordReader );
if ( building ) builder.endTextField();
overallTerms += scan[ i ].numTerms;
break;
case VIRTUAL:
fragments = (ObjectList<VirtualDocumentFragment>)document.content( indexedField[ i ] );
wordReader = document.wordReader( indexedField[ i ] );
virtualDocumentResolver[ i ].context( document );
for( VirtualDocumentFragment fragment: fragments ) {
int virtualDocumentPointer = virtualDocumentResolver[ i ].resolve( fragment.documentSpecifier() );
if ( virtualDocumentPointer < 0 ) continue;
if ( map != null ) virtualDocumentPointer = map[ virtualDocumentPointer ];
wordReader.setReader( new FastBufferedReader( fragment.text() ) );
scan[ i ].processDocument( virtualDocumentPointer, wordReader );
}
if ( building ) builder.virtualField( fragments );
overallTerms += scan[ i ].numTerms;
break;
default:
Object o = document.content( indexedField[ i ] );
accumulator[ i ].processData( map != null ? map[ documentPointer ] : documentPointer, o );
if ( building ) builder.nonTextField( o );
break;
}
if ( scan[ i ] != null && scan[ i ].outOfMemoryError ) outOfMemoryError = true;
}
if ( building ) builder.endDocument();
documentPointer++;
documentsInBatch++;
document.close();
pl.update();
// We try compaction if we detect less than PERC_AVAILABLE_MEMORY_CHECK memory available
long percAvailableMemory = Util.percAvailableMemory();
boolean compacted = false;
if ( ! outOfMemoryError && percAvailableMemory < PERC_AVAILABLE_MEMORY_CHECK ) {
LOGGER.info( "Starting compaction... (" + percAvailableMemory + "% available)" );
compacted = true;
Util.compactMemory();
percAvailableMemory = Util.percAvailableMemory();
LOGGER.info( "Compaction completed (" + percAvailableMemory + "% available)" );
}
if ( outOfMemoryError || overallTerms >= maxTerms || documentsInBatch == documentsPerBatch || ( compacted && percAvailableMemory < PERC_AVAILABLE_MEMORY_DUMP ) ) {
if ( outOfMemoryError ) LOGGER.warn( "OutOfMemoryError during buffer reallocation: writing a batch of " + documentsInBatch + " documents" );
else if ( overallTerms >= maxTerms ) LOGGER.warn( "Too many terms (" + overallTerms + "): writing a batch of " + documentsInBatch + " documents" );
else if ( compacted && percAvailableMemory < PERC_AVAILABLE_MEMORY_DUMP ) LOGGER.warn( "Available memory below " + PERC_AVAILABLE_MEMORY_DUMP + "%: writing a batch of " + documentsInBatch + " documents" );
long occurrences = 0;
for ( int i = 0; i < numberOfIndexedFields; i++ ) {
switch ( factory.fieldType( indexedField[ i ] ) ) {
case TEXT:
case VIRTUAL:
occurrences += scan[ i ].dumpBatch();
scan[ i ].openSizeBitStream();
break;
default:
accumulator[ i ].writeData();
}
}
if ( building ) {
builder.close();
builder.open( "@" + scan[ 0 ].batch );
}
LOGGER.info( "Last set of batches indexed at " + Util.format( ( 1000. * occurrences ) / ( System.currentTimeMillis() - batchStartTime ) ) + " occurrences/s" );
batchStartTime = System.currentTimeMillis();
documentsInBatch = 0;
outOfMemoryError = false;
}
}
iterator.close();
if ( builder != null ) builder.close();
for ( int i = 0; i < numberOfIndexedFields; i++ ) {
switch ( factory.fieldType( indexedField[ i ] ) ) {
case TEXT: