long currStart, currStop, currInter, oldPos;
boolean pastHeader = false, startedBlock = false;
LOGGER.debug( "Processing file " + fileIndex + " (" + file[ fileIndex ] + ")" );
FastBufferedInputStream fbis = new FastBufferedInputStream( is, bufferSize );
currStart = 0; // make java compiler happy.
currInter = 0;
oldPos = 0;
int l;
while ( ( l = fbis.readLine( buffer ) ) != -1 ) {
if ( l == buffer.length ) {
// We filled the buffer, which means we have a very very long line. Let's skip it.
while ( ( l = fbis.readLine( buffer ) ) == buffer.length );
}
else {
if ( !startedBlock && equals( buffer, l, DOC_OPEN ) ) {
currStart = oldPos;
startedBlock = true; // Start of the current block (includes <DOC> marker)
}
else if ( startedBlock && equals( buffer, l, DOC_CLOSE ) ) {
currStop = oldPos;
if ( DEBUG ) LOGGER.debug( "Setting markers <" + currStart + "," + currInter + ", " + currStop + ">" );
descriptors.add( new TRECDocumentDescriptor( fileIndex, currStart, currInter, currStop ) );
startedBlock = pastHeader = false;
}
else if ( startedBlock && !pastHeader && equals( buffer, l, DOCHDR_CLOSE ) ) {
currInter = fbis.position();
pastHeader = true;
}
oldPos = fbis.position();
}
}
fbis.close();
}