public Document document( int index ) throws IOException {
ensureDocumentIndex( index );
ensureFiles();
documentsInputBitStream.position( docOffsets.getLong( index ) );
final DataInputStream nonTextDataInputStream = hasNonText ? new DataInputStream( new FastBufferedInputStream( zipFile.getInputStream( zipFile.getEntry( Integer.toString( index ) ) ) ) ) : null;
final MutableString uri = readSelfDelimitedUtf8String( documentsInputBitStream, new MutableString() );
final MutableString title = readSelfDelimitedUtf8String( documentsInputBitStream, new MutableString() );
return new AbstractDocument() {
final MutableString fieldContent = new MutableString();
@SuppressWarnings("unchecked")
final Document fakeDocument = factory.getDocument( NullInputStream.getInstance(), Reference2ObjectMaps.EMPTY_MAP );
int nextField = 0;
public Object content( int field ) throws IOException {
FieldType fieldType = factory.fieldType( field );
if ( nextField > field ) throw new IllegalStateException();
// Skip fields
final MutableString s = new MutableString();
int len;
while( nextField < field ) {
switch( fieldType ) {
case TEXT:
len = documentsInputBitStream.readDelta();
if ( exact ) len *= 2;
documentsInputBitStream.skipDeltas( len );
break;
case VIRTUAL:
final int nfrag = nonTextDataInputStream.readInt();
for ( int i = 0; i < 2 * nfrag; i++ ) MutableString.skipSelfDelimUTF8( nonTextDataInputStream );
break;
default:
try { new ObjectInputStream( nonTextDataInputStream ).readObject(); } catch ( ClassNotFoundException e ) { throw new RuntimeException( e ); }
}
nextField++;
}
// Read field
nextField++;
switch( fieldType ) {
case TEXT:
len = documentsInputBitStream.readDelta();
fieldContent.length( 0 );
termsFrequencyKeeper.reset();
if ( exact ) nonTermsFrequencyKeeper.reset();
while( len-- != 0 ) {
termsInputStream.position( termOffsets.getLong( termsFrequencyKeeper.decode( documentsInputBitStream.readDelta() ) ) );
s.readSelfDelimUTF8( termsInputStream );
fieldContent.append( s );
if ( exact ) {
nonTermsInputStream.position( nonTermOffsets.getLong( nonTermsFrequencyKeeper.decode( documentsInputBitStream.readDelta() ) ) );
s.readSelfDelimUTF8( nonTermsInputStream );
fieldContent.append( s );
}
else fieldContent.append( ' ');
}
return new FastBufferedReader( fieldContent );
case VIRTUAL:
final int nfrag = nonTextDataInputStream.readInt();
MutableString doc = new MutableString();
MutableString text = new MutableString();
VirtualDocumentFragment[] fragArray = new VirtualDocumentFragment[ nfrag ];
for ( int i = 0; i < nfrag; i++ ) {
doc.readSelfDelimUTF8( (InputStream)nonTextDataInputStream );
text.readSelfDelimUTF8( (InputStream)nonTextDataInputStream );
fragArray[ i ] = new AnchorExtractor.Anchor( doc.copy(), text.copy() );
}
return new ObjectArrayList<VirtualDocumentFragment>( fragArray );
default:
try { return new ObjectInputStream( nonTextDataInputStream ).readObject(); } catch ( ClassNotFoundException e ) { throw new RuntimeException( e ); }
}
}
public CharSequence title() {
return title;
}
public CharSequence uri() {
return uri.length() == 0 ? null : uri;
}
public WordReader wordReader( int field ) {
switch( factory.fieldType( field ) ) {
case TEXT:
case VIRTUAL: return fakeDocument.wordReader( field );
default: return null;
}
}
public void close() throws IOException {
super.close();
if ( hasNonText ) nonTextDataInputStream.close();
}
};
}