new FlaggedOption( "bufferSize", JSAP.INTSIZE_PARSER, DEFAULT_BUFFER_SIZE, JSAP.NOT_REQUIRED, 'b', "buffer-size", "The size of an I/O buffer." ),
new UnflaggedOption( "collection", JSAP.STRING_PARSER, JSAP.REQUIRED, "The filename for the serialised collection." ),
new UnflaggedOption( "file", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.GREEDY, "A list of files that will be indexed. If missing, a list of files will be read from standard input." )
} );
JSAPResult jsapResult = jsap.parse( arg );
if ( jsap.messagePrinted() ) return;
final DocumentFactory userFactory = PropertyBasedDocumentFactory.getInstance( jsapResult.getClass( "factory" ), jsapResult.getStringArray( "property" ) );
String[] file = jsapResult.getStringArray( "file" );
if ( file.length == 0 ) {
final ObjectArrayList<String> files = new ObjectArrayList<String>();
BufferedReader bufferedReader = new BufferedReader( new InputStreamReader( System.in ) );
String s;
while ( ( s = bufferedReader.readLine() ) != null ) files.add( s );
file = files.toArray( new String[ 0 ] );
}
// To avoid problems with find and similar utilities, we sort the file names
if ( !jsapResult.getBoolean( "unsorted" ) ) Arrays.sort( file );
final DocumentFactory composite = CompositeDocumentFactory.getFactory( new TRECHeaderDocumentFactory(), userFactory );
if ( file.length == 0 ) System.err.println( "WARNING: empty file set." );
BinIO.storeObject( new TRECDocumentCollection( file, composite, jsapResult.getInt( "bufferSize" ), jsapResult.getBoolean( "gzipped" ) ), jsapResult.getString( "collection" ) );
}