IOUtils.copy( this.getClass().getResourceAsStream( "testContents.data" ), outputStream );
outputStream.close();
IOUtils.copy( this.getClass().getResourceAsStream( "testContentsAgain.data" ), outputStreamAgain );
outputStreamAgain.close();
TRECDocumentCollection collection = new TRECDocumentCollection(
new String[] { temp.toString(), tempAgain.toString() },
CompositeDocumentFactory
.getFactory(new DocumentFactory[] {
new TRECHeaderDocumentFactory(),
new HtmlDocumentFactory( new String[] { "encoding=ISO-8859-1" } ) } ),
4, // Very small, to induce fragmentation
false);
DocumentIterator iter = collection.iterator();
Document d = null;
d = iter.nextDocument();
assertNotNull(d);
assertEquals("http://gx0001/", d.uri());
assertEquals("GX001", d.title());
final int textIndex = collection.factory().fieldIndex( "text" );
assertEquals( "Line 1\n The line 2!\n Mamma\n", IOUtils.toString( (Reader)d.content( textIndex ) ) );
d = iter.nextDocument();
assertNotNull(d);
assertEquals("http://gx0002/", d.uri());
assertEquals("GX002", d.title());
assertEquals( "Contents of this file reside on one line only\n", IOUtils.toString( (Reader)d.content( textIndex ) ) );
d = iter.nextDocument();
assertNotNull(d);
assertEquals("http://gx0003/", d.uri());
assertEquals("GX003", d.title());
assertEquals( "Line 1\nLine 2\nLine 3\nLine 4\nLine 5\n", IOUtils.toString( (Reader)d.content( textIndex ) ) );
d = iter.nextDocument();
assertNotNull(d);
assertEquals("http://gx0004/", d.uri());
assertEquals("GX004", d.title());
assertEquals( "New content 0\n", IOUtils.toString( (Reader)d.content( textIndex ) ) );
d = iter.nextDocument();
assertNotNull(d);
assertEquals("http://gx0005/", d.uri());
assertEquals("GX005", d.title());
assertEquals( "New content 1\n", IOUtils.toString( (Reader)d.content( textIndex ) ) );
d = iter.nextDocument();
assertNotNull(d);
assertEquals("http://gx0006/", d.uri());
assertEquals("GX006", d.title());
assertEquals( "New content 2\n", IOUtils.toString( (Reader)d.content( textIndex ) ) );
d = iter.nextDocument();
assertNotNull(d);
assertEquals("http://gx0007/", d.uri());
assertEquals("GX007", d.title());
assertEquals( "", IOUtils.toString( (Reader)d.content( textIndex ) ) );
d = iter.nextDocument();
assertNull(d);
iter.close();
d = collection.document( 0 );
assertNotNull(d);
assertEquals("http://gx0001/", d.uri());
assertEquals("GX001", d.title());
assertEquals( "Line 1\n The line 2!\n Mamma\n", IOUtils.toString( (Reader)d.content( textIndex ) ) );
d.close();
d = collection.document( 1 );
assertNotNull(d);
assertEquals("http://gx0002/", d.uri());
assertEquals("GX002", d.title());
assertEquals( "Contents of this file reside on one line only\n", IOUtils.toString( (Reader)d.content( textIndex ) ) );
d.close();
d = collection.document( 2 );
assertNotNull(d);
assertEquals("http://gx0003/", d.uri());
assertEquals("GX003", d.title());
assertEquals( "Line 1\nLine 2\nLine 3\nLine 4\nLine 5\n", IOUtils.toString( (Reader)d.content( textIndex ) ) );
d.close();
d = collection.document( 3 );
assertNotNull(d);
assertEquals("http://gx0004/", d.uri());
assertEquals("GX004", d.title());
assertEquals( "New content 0\n", IOUtils.toString( (Reader)d.content( textIndex ) ) );
d.close();
d = collection.document( 4 );
assertNotNull(d);
assertEquals("http://gx0005/", d.uri());
assertEquals("GX005", d.title());
assertEquals( "New content 1\n", IOUtils.toString( (Reader)d.content( textIndex ) ) );
d.close();
d = collection.document( 5 );
assertNotNull(d);
assertEquals("http://gx0006/", d.uri());
assertEquals("GX006", d.title());
assertEquals( "New content 2\n", IOUtils.toString( (Reader)d.content( textIndex ) ) );
d.close();
d = collection.document( 6 );
assertNotNull(d);
assertEquals("http://gx0007/", d.uri());
assertEquals("GX007", d.title());
assertEquals( "", IOUtils.toString( (Reader)d.content( textIndex ) ) );