}
@SuppressWarnings("unchecked")
public static void main( final String[] arg ) throws JSAPException, InvocationTargetException, NoSuchMethodException, IllegalAccessException, ConfigurationException, ClassNotFoundException, IOException, InstantiationException, URISyntaxException {
SimpleJSAP jsap = new SimpleJSAP( IndexBuilder.class.getName(), "Builds an index (creates batches, combines them, and builds a term map).",
new Parameter[] {
new FlaggedOption( "sequence", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'S', "sequence", "A serialised document sequence that will be used instead of stdin." ),
new FlaggedOption( "objectSequence", new ObjectParser( DocumentSequence.class, MG4JClassParser.PACKAGE ), JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'o', "object-sequence", "An object specification describing a document sequence that will be used instead of stdin." ),
new FlaggedOption( "delimiter", JSAP.INTEGER_PARSER, Integer.toString( Scan.DEFAULT_DELIMITER ), JSAP.NOT_REQUIRED, 'd', "delimiter", "The document delimiter (when indexing stdin)." ),
new FlaggedOption( "factory", MG4JClassParser.getParser(), IdentityDocumentFactory.class.getName(), JSAP.NOT_REQUIRED, 'f', "factory", "A document factory with a standard constructor (when indexing stdin)." ),
new FlaggedOption( "property", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'p', "property", "A 'key=value' specification, or the name of a property file (when indexing stdin)." ).setAllowMultipleDeclarations( true ),
new FlaggedOption( "termProcessor", JSAP.STRING_PARSER, NullTermProcessor.class.getName(), JSAP.NOT_REQUIRED, 't', "term-processor", "Sets the term processor to the given class." ),
new FlaggedOption( "termMap", MG4JClassParser.getParser(), ImmutableExternalPrefixMap.class.getName(), JSAP.NOT_REQUIRED, 'm', "term-map", "Sets the term map class." ),
new Switch( "downcase", JSAP.NO_SHORTFLAG, "downcase", "A shortcut for setting the term processor to the downcasing processor." ),
new FlaggedOption( "indexedField", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'I', "indexed-field", "The field(s) of the document factory that will be indexed. (default: all fields)" ).setAllowMultipleDeclarations( true ),
new Switch( "allFields", 'a', "all-fields", "Index also all virtual fields; has no effect if indexedField has been used at least once." ),
new FlaggedOption( "batchSize", JSAP.INTSIZE_PARSER, Integer.toString( Scan.DEFAULT_BATCH_SIZE ), JSAP.NOT_REQUIRED, 's', "batch-size", "The maximum size of a batch, in documents. Batches will be smaller, however, if memory is exhausted." ),
new FlaggedOption( "maxTerms", JSAP.INTSIZE_PARSER, Integer.toString( Scan.DEFAULT_MAX_TERMS ), JSAP.NOT_REQUIRED, 'M', "max-terms", "The maximum number of terms in a batch, in documents." ),
new Switch( "keepBatches", JSAP.NO_SHORTFLAG, "keep-batches", "Do not delete intermediate batch files." ),
new FlaggedOption( "virtualDocumentResolver", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'v', "virtual-document-resolver", "The virtual document resolver. It can be specified several times in the form [<field>:]<filename>. If the field is omitted, it sets the document resolver for all virtual fields." ).setAllowMultipleDeclarations( true ),
new FlaggedOption( "virtualDocumentGap", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'g', "virtual-document-gap", "The virtual document gap. It can be specified several times in the form [<field>:]<gap>. If the field is omitted, it sets the document gap for all virtual fields; the default gap is " + Scan.DEFAULT_VIRTUAL_DOCUMENT_GAP ).setAllowMultipleDeclarations( true ),
new FlaggedOption( "scanBufferSize", JSAP.INTSIZE_PARSER, Util.formatBinarySize( Scan.DEFAULT_BUFFER_SIZE ), JSAP.NOT_REQUIRED, 'b', "scan-buffer-size", "The size of an I/O buffer for the scanning phase." ),
new FlaggedOption( "combineBufferSize", JSAP.INTSIZE_PARSER, Util.formatBinarySize( Combine.DEFAULT_BUFFER_SIZE ), JSAP.NOT_REQUIRED, JSAP.NO_SHORTFLAG, "combine-buffer-size", "The size of an I/O buffer for the combination phase." ),
new FlaggedOption( "pasteBufferSize", JSAP.INTSIZE_PARSER, Util.formatBinarySize( Paste.DEFAULT_BUFFER_SIZE ), JSAP.NOT_REQUIRED, JSAP.NO_SHORTFLAG, "paste-buffer-size", "The size of the internal temporary buffer used while pasting indices." ),
new FlaggedOption( "skipBufferSize", JSAP.INTSIZE_PARSER, Util.formatBinarySize( SkipBitStreamIndexWriter.DEFAULT_TEMP_BUFFER_SIZE ), JSAP.NOT_REQUIRED, JSAP.NO_SHORTFLAG, "skip-buffer-size", "The size of the internal temporary buffer used while creating an index with skips." ),
new FlaggedOption( "renumber", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'r', "renumber", "The filename of a document renumbering." ),
new FlaggedOption( "zipCollection", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'z', "zip", "Creates a support ZipDocumentCollection with given basename (obsolete)." ),
new FlaggedOption( "buildCollection", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'B', "build-collection", "During the indexing phase, build a collection using this basename." ),
new FlaggedOption( "builderClass", MG4JClassParser.getParser(), SimpleCompressedDocumentCollectionBuilder.class.getName(), JSAP.NOT_REQUIRED, JSAP.NO_SHORTFLAG, "builder-class", "Specifies a builder class for a document collection that will be created during the indexing phase." ),
new Switch( "exact", 'e', "exact", "The builder class should be instantiated in its exact form, which records both words and nonwords." ),
new FlaggedOption( "comp", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'c', "comp", "A compression flag for textual indices (may be specified several times)." ).setAllowMultipleDeclarations( true ),
new FlaggedOption( "payloadComp", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'C', "comp-payload", "A compression flag for payload indices (may be specified several times)." ).setAllowMultipleDeclarations( true ),
new Switch( "noSkips", JSAP.NO_SHORTFLAG, "no-skips", "Disables skips." ),
new Switch( "interleaved", JSAP.NO_SHORTFLAG, "interleaved", "Forces an interleaved index." ),
new FlaggedOption( "quantum", JSAP.INTEGER_PARSER, Integer.toString( BitStreamIndex.DEFAULT_QUANTUM ), JSAP.NOT_REQUIRED, 'Q', "quantum", "Enable skips with given quantum, if positive; fix space occupancy of variable-quantum skip towers in percentage if negative." ),
new FlaggedOption( "height", JSAP.INTSIZE_PARSER, Integer.toString( BitStreamIndex.DEFAULT_HEIGHT ), JSAP.NOT_REQUIRED, 'H', "height", "Enable skips with given height." ),
new FlaggedOption( "logInterval", JSAP.LONG_PARSER, Long.toString( ProgressLogger.DEFAULT_LOG_INTERVAL ), JSAP.NOT_REQUIRED, 'l', "log-interval", "The minimum time interval between activity logs in milliseconds." ),
new FlaggedOption( "tempDir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'T', "temp-dir", "A directory for all temporary batch files." ),
new UnflaggedOption( "basename", JSAP.STRING_PARSER, JSAP.REQUIRED, "The basename of the resulting index." )
});
JSAPResult jsapResult = jsap.parse( arg );
if ( jsap.messagePrinted() ) return;
if ( ( jsapResult.userSpecified( "builderClass" ) || jsapResult.userSpecified( "exact" ) ) && ! jsapResult.userSpecified( "buildCollection" ) ) throw new IllegalArgumentException( "To specify options about the collection building process, you must specify a basename first." );
if ( jsapResult.userSpecified( "sequence" ) && jsapResult.userSpecified( "objectSequence" ) ) throw new IllegalArgumentException( "You cannot specify both a serialised and an parseable-object sequence" );
final DocumentSequence documentSequence = jsapResult.userSpecified( "objectSequence" ) ? (DocumentSequence)jsapResult.getObject( "objectSequence" ) : Scan.getSequence( jsapResult.getString( "sequence" ), jsapResult.getClass( "factory" ), jsapResult.getStringArray( "property" ), jsapResult.getInt( "delimiter" ), LOGGER );