Examples of net.sourceforge.argparse4j.inf.ArgumentGroup

net.sourceforge.argparse4j.inf.ArgumentGroup
This interface defines a method to conceptually group up {@link Argument}objects.


    @Test
    public void testFormatHelpWithArgumentGroup()
            throws ArgumentParserException {
        ap.description("This is argparser4j.").epilog("This is epilog.");
        ArgumentGroup group = ap.addArgumentGroup("group1")
                .description("group1 description");
        group.addArgument("--foo");
        assertEquals(String.format(
                  TextHelper.LOCALE_ROOT,
                  "usage: argparse4j [-h] [--foo FOO]%n"
                + "%n"
                + "This is argparser4j.%n"

View Full Code Here


    @Test
    public void testFormatHelpWithArgumentGroupWithoutTitleAndDescription()
            throws ArgumentParserException {
        ap.description("This is argparser4j.").epilog("This is epilog.");
        ArgumentGroup group = ap.addArgumentGroup("");
        group.addArgument("--foo");
        assertEquals(String.format(
                  TextHelper.LOCALE_ROOT,
                  "usage: argparse4j [-h] [--foo FOO]%n"
                + "%n"
                + "This is argparser4j.%n"

View Full Code Here

    }


    @Test
    public void testFormatHelpWithArgumentGroupWithoutHelp() throws ArgumentParserException {
        ArgumentParserImpl ap = new ArgumentParserImpl("argparse4j", false);
        ArgumentGroup group1 = ap.addArgumentGroup("group1").description(
                "group1 description");
        group1.addArgument("foo").help("foo help");
        ArgumentGroup group2 = ap.addArgumentGroup("group2").description(
                "group2 description");
        group2.addArgument("--bar").help("bar help");
        assertEquals(String.format(
                  TextHelper.LOCALE_ROOT,
                  "usage: argparse4j [--bar BAR] foo%n"
                + "%n"
                + "group1:%n"

View Full Code Here

    }


    @Test
    public void testFormatHelpWithSuppress()
            throws ArgumentParserException {
        ArgumentGroup group = ap.addArgumentGroup("group");
        group.addArgument("--foo");
        group.addArgument("--bar").help(Arguments.SUPPRESS);
        ap.addArgument("-a").help(Arguments.SUPPRESS).required(true);
        ap.addArgument("-b");
        MutuallyExclusiveGroup mutex1 = ap.addMutuallyExclusiveGroup("mutex1");
        mutex1.addArgument("-c").help(Arguments.SUPPRESS);
        mutex1.addArgument("-d");

View Full Code Here

                            );
                            throw new FoundHelpArgument(); // Trick to prevent processing of any remaining arguments
                        }
                    });


            ArgumentGroup requiredGroup = parser.addArgumentGroup("Required arguments");


            Argument outputDirArg = requiredGroup.addArgument("--output-dir")
                    .metavar("HDFS_URI")
                    .type(new PathArgumentType(conf) {
                        @Override
                        public Path convert(ArgumentParser parser, Argument arg, String value) throws ArgumentParserException {
                            Path path = super.convert(parser, arg, value);
                            if ("hdfs".equals(path.toUri().getScheme()) && path.toUri().getAuthority() == null) {
                                // TODO: consider defaulting to hadoop's fs.default.name here or in SolrRecordWriter.createEmbeddedSolrServer()
                                throw new ArgumentParserException("Missing authority in path URI: " + path, parser);
                            }
                            return path;
                        }
                    }.verifyHasScheme().verifyIsAbsolute().verifyCanWriteParent())
                    .required(true)
                    .help("HDFS directory to write Solr indexes to. Inside there one output directory per shard will be generated. " +
                            "Example: hdfs://c2202.mycompany.com/user/$USER/test");


            Argument inputListArg = parser.addArgument("--input-list")
                    .action(Arguments.append())
                    .metavar("URI")
                            //      .type(new PathArgumentType(fs).verifyExists().verifyCanRead())
                    .type(Path.class)
                    .help("Local URI or HDFS URI of a UTF-8 encoded file containing a list of HDFS URIs to index, " +
                            "one URI per line in the file. If '-' is specified, URIs are read from the standard input. " +
                            "Multiple --input-list arguments can be specified.");


            Argument morphlineFileArg = requiredGroup.addArgument("--morphline-file")
                    .metavar("FILE")
                    .type(new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead())
                    .required(true)
                    .help("Relative or absolute path to a local config file that contains one or more morphlines. " +
                            "The file must be UTF-8 encoded. Example: /path/to/morphline.conf");


            Argument morphlineIdArg = parser.addArgument("--morphline-id")
                    .metavar("STRING")
                    .type(String.class)
                    .help("The identifier of the morphline that shall be executed within the morphline config file " +
                            "specified by --morphline-file. If the --morphline-id option is ommitted the first (i.e. " +
                            "top-most) morphline within the config file is used. Example: morphline1");


            Argument solrHomeDirArg = parser.addArgument("--solr-home-dir")
                    .metavar("DIR")
                    .type(new FileArgumentType() {
                        @Override
                        public File convert(ArgumentParser parser, Argument arg, String value) throws ArgumentParserException {
                            File solrHomeDir = super.convert(parser, arg, value);
                            File solrConfigFile = new File(new File(solrHomeDir, "conf"), "solrconfig.xml");
                            new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead().convert(
                                    parser, arg, solrConfigFile.getPath());
                            return solrHomeDir;
                        }
                    }.verifyIsDirectory().verifyCanRead())
                    .required(false)
                    .help("Relative or absolute path to a local dir containing Solr conf/ dir and in particular " +
                            "conf/solrconfig.xml and optionally also lib/ dir. This directory will be uploaded to each MR task. " +
                            "Example: src/test/resources/solr/minimr");


            Argument updateConflictResolverArg = parser.addArgument("--update-conflict-resolver")
                    .metavar("FQCN")
                    .type(String.class)
                    .setDefault(RetainMostRecentUpdateConflictResolver.class.getName())
                    .help("Fully qualified class name of a Java class that implements the UpdateConflictResolver interface. " +
                            "This enables deduplication and ordering of a series of document updates for the same unique document " +
                            "key. For example, a MapReduce batch job might index multiple files in the same job where some of the " +
                            "files contain old and new versions of the very same document, using the same unique document key.\n" +
                            "Typically, implementations of this interface forbid collisions by throwing an exception, or ignore all but " +
                            "the most recent document version, or, in the general case, order colliding updates ascending from least " +
                            "recent to most recent (partial) update. The caller of this interface (i.e. the Hadoop Reducer) will then " +
                            "apply the updates to Solr in the order returned by the orderUpdates() method.\n" +
                            "The default RetainMostRecentUpdateConflictResolver implementation ignores all but the most recent document " +
                            "version, based on a configurable numeric Solr field, which defaults to the file_last_modified timestamp");


            Argument mappersArg = parser.addArgument("--mappers")
                    .metavar("INTEGER")
                    .type(Integer.class)
                    .choices(new RangeArgumentChoice(-1, Integer.MAX_VALUE)) // TODO: also support X% syntax where X is an integer
                    .setDefault(-1)
                    .help("Tuning knob that indicates the maximum number of MR mapper tasks to use. -1 indicates use all map slots " +
                            "available on the cluster.");


            Argument reducersArg = parser.addArgument("--reducers")
                    .metavar("INTEGER")
                    .type(Integer.class)
                    .choices(new RangeArgumentChoice(-1, Integer.MAX_VALUE)) // TODO: also support X% syntax where X is an integer
                    .setDefault(-1)
                    .help("Tuning knob that indicates the number of reducers to index into. " +
                            "-1 indicates use all reduce slots available on the cluster. " +
                            "0 indicates use one reducer per output shard, which disables the mtree merge MR algorithm. " +
                            "The mtree merge MR algorithm improves scalability by spreading load " +
                            "(in particular CPU load) among a number of parallel reducers that can be much larger than the number " +
                            "of solr shards expected by the user. It can be seen as an extension of concurrent lucene merges " +
                            "and tiered lucene merges to the clustered case. The subsequent mapper-only phase " +
                            "merges the output of said large number of reducers to the number of shards expected by the user, " +
                            "again by utilizing more available parallelism on the cluster.");


            Argument fanoutArg = parser.addArgument("--fanout")
                    .metavar("INTEGER")
                    .type(Integer.class)
                    .choices(new RangeArgumentChoice(2, Integer.MAX_VALUE))
                    .setDefault(Integer.MAX_VALUE)
                    .help(FeatureControl.SUPPRESS);


            Argument maxSegmentsArg = parser.addArgument("--max-segments")
                    .metavar("INTEGER")
                    .type(Integer.class)
                    .choices(new RangeArgumentChoice(1, Integer.MAX_VALUE))
                    .setDefault(1)
                    .help("Tuning knob that indicates the maximum number of segments to be contained on output in the index of " +
                            "each reducer shard. After a reducer has built its output index it applies a merge policy to merge segments " +
                            "until there are <= maxSegments lucene segments left in this index. " +
                            "Merging segments involves reading and rewriting all data in all these segment files, " +
                            "potentially multiple times, which is very I/O intensive and time consuming. " +
                            "However, an index with fewer segments can later be merged faster, " +
                            "and it can later be queried faster once deployed to a live Solr serving shard. " +
                            "Set maxSegments to 1 to optimize the index for low query latency. " +
                            "In a nutshell, a small maxSegments value trades indexing latency for subsequently improved query latency. " +
                            "This can be a reasonable trade-off for batch indexing systems.");


            Argument fairSchedulerPoolArg = parser.addArgument("--fair-scheduler-pool")
                    .metavar("STRING")
                    .help("Optional tuning knob that indicates the name of the fair scheduler pool to submit jobs to. " +
                            "The Fair Scheduler is a pluggable MapReduce scheduler that provides a way to share large clusters. " +
                            "Fair scheduling is a method of assigning resources to jobs such that all jobs get, on average, an " +
                            "equal share of resources over time. When there is a single job running, that job uses the entire " +
                            "cluster. When other jobs are submitted, tasks slots that free up are assigned to the new jobs, so " +
                            "that each job gets roughly the same amount of CPU time. Unlike the default Hadoop scheduler, which " +
                            "forms a queue of jobs, this lets short jobs finish in reasonable time while not starving long jobs. " +
                            "It is also an easy way to share a cluster between multiple of users. Fair sharing can also work with " +
                            "job priorities - the priorities are used as weights to determine the fraction of total compute time " +
                            "that each job gets.");


            Argument dryRunArg = parser.addArgument("--dry-run")
                    .action(Arguments.storeTrue())
                    .help("Run in local mode and print documents to stdout instead of loading them into Solr. This executes " +
                            "the morphline in the client process (without submitting a job to MR) for quicker turnaround during " +
                            "early trial & debug sessions.");


            Argument log4jConfigFileArg = parser.addArgument("--log4j")
                    .metavar("FILE")
                    .type(new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead())
                    .help("Relative or absolute path to a log4j.properties config file on the local file system. This file " +
                            "will be uploaded to each MR task. Example: /path/to/log4j.properties");


            Argument verboseArg = parser.addArgument("--verbose", "-v")
                    .action(Arguments.storeTrue())
                    .help("Turn on verbose output.");


            ArgumentGroup clusterInfoGroup = parser
                    .addArgumentGroup("Cluster arguments")
                    .description(
                            "Arguments that provide information about your Solr cluster. "
                                    +
                                    "If you are not using --go-live, pass the --shards argument. If you are building shards for "
                                    +
                                    "a Non-SolrCloud cluster, pass the --shard-url argument one or more times. To build indexes for"
                                    +
                                    " a replicated cluster with --shard-url, pass replica urls consecutively and also pass --shards. "
                                    + "If you are building shards for a SolrCloud cluster, pass the --zk-host argument. "
                                    + "Using --go-live requires either --shard-url or --zk-host.");


            Argument shardUrlsArg = clusterInfoGroup.addArgument("--shard-url")
                    .metavar("URL")
                    .type(String.class)
                    .action(Arguments.append())
                    .help("Solr URL to merge resulting shard into if using --go-live. " +
                            "Example: http://solr001.mycompany.com:8983/solr/collection1. " +
                            "Multiple --shard-url arguments can be specified, one for each desired shard. " +
                            "If you are merging shards into a SolrCloud cluster, use --zk-host instead.");


            Argument zkHostArg = clusterInfoGroup.addArgument("--zk-host")
                    .metavar("STRING")
                    .type(String.class)
                    .help("The address of a ZooKeeper ensemble being used by a SolrCloud cluster. "
                            + "This ZooKeeper ensemble will be examined to determine the number of output "
                            +
                            "shards to create as well as the Solr URLs to merge the output shards into when using the --go-live option. "
                            + "Requires that you also pass the --collection to merge the shards into.\n"
                            + "\n"
                            + "The --zk-host option implements the same partitioning semantics as the standard SolrCloud "
                            + "Near-Real-Time (NRT) API. This enables to mix batch updates from MapReduce ingestion with "
                            + "updates from standard Solr NRT ingestion on the same SolrCloud cluster, "
                            + "using identical unique document keys.\n"
                            + "\n"
                            + "Format is: a list of comma separated host:port pairs, each corresponding to a zk "
                            + "server. Example: '127.0.0.1:2181,127.0.0.1:2182,127.0.0.1:2183' If "
                            + "the optional chroot suffix is used the example would look "
                            + "like: '127.0.0.1:2181/solr,127.0.0.1:2182/solr,127.0.0.1:2183/solr' "
                            + "where the client would be rooted at '/solr' and all paths "
                            + "would be relative to this root - i.e. getting/setting/etc... "
                            + "'/foo/bar' would result in operations being run on "
                            + "'/solr/foo/bar' (from the server perspective).\n"
                            + "\n"
                            + "If --solr-home-dir is not specified, the Solr home directory for the collection "
                            + "will be downloaded from this ZooKeeper ensemble.");


            Argument shardsArg = clusterInfoGroup.addArgument("--shards")
                    .metavar("INTEGER")
                    .type(Integer.class)
                    .choices(new RangeArgumentChoice(1, Integer.MAX_VALUE))
                    .help("Number of output shards to generate.");


            ArgumentGroup goLiveGroup = parser.addArgumentGroup("Go live arguments")
                    .description("Arguments for merging the shards that are built into a live Solr cluster. " +
                            "Also see the Cluster arguments.");


            Argument goLiveArg = goLiveGroup.addArgument("--go-live")
                    .action(Arguments.storeTrue())
                    .help("Allows you to optionally merge the final index shards into a live Solr cluster after they are built. " +
                            "You can pass the ZooKeeper address with --zk-host and the relevant cluster information will be auto detected. " +
                            "If you are not using a SolrCloud cluster, --shard-url arguments can be used to specify each SolrCore to merge " +
                            "each shard into.");


            Argument collectionArg = goLiveGroup.addArgument("--collection")
                    .metavar("STRING")
                    .help("The SolrCloud collection to merge shards into when using --go-live and --zk-host. Example: collection1");


            Argument goLiveThreadsArg = goLiveGroup.addArgument("--go-live-threads")
                    .metavar("INTEGER")
                    .type(Integer.class)
                    .choices(new RangeArgumentChoice(1, Integer.MAX_VALUE))
                    .setDefault(1000)
                    .help("Tuning knob that indicates the maximum number of live merges to run in parallel at one time.");

View Full Code Here

                        "--overwrite-output-dir is specified. This means that if the whole job fails you can retry simply by rerunning " +
                        "the program again using the same arguments." 
                        );
                        


        ArgumentGroup hbaseIndexerGroup = parser.addArgumentGroup("HBase Indexer parameters")
                .description("Parameters for specifying the HBase indexer definition and/or where it should be loaded from.");


        Argument indexerZkHostArg = hbaseIndexerGroup.addArgument("--hbase-indexer-zk")
                .metavar("STRING")
                .help("The address of the ZooKeeper ensemble from which to fetch the indexer definition named --hbase-indexer-name. "
                    + "Format is: a list of comma separated host:port pairs, each corresponding to a zk server. "
                    + "Example: '127.0.0.1:2181,127.0.0.1:2182,127.0.0.1:2183'");


        Argument indexNameArg = hbaseIndexerGroup.addArgument("--hbase-indexer-name")
                .metavar("STRING")
                .help("The name of the indexer configuration to fetch from the ZooKeeper ensemble specified "
                    + "with --hbase-indexer-zk. Example: myIndexer");


        Argument hbaseIndexerConfigArg = hbaseIndexerGroup.addArgument("--hbase-indexer-file")
                .metavar("FILE")
                .type(new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead())
                .help("Relative or absolute path to a local HBase indexer XML configuration file. If "
                        + "supplied, this overrides --hbase-indexer-zk and --hbase-indexer-name. "
                        + "Example: /path/to/morphline-hbase-mapper.xml");


        Argument hbaseIndexerComponentFactoryArg = hbaseIndexerGroup.addArgument("--hbase-indexer-component-factory")
                .metavar("STRING")
                .help("Classname of the hbase indexer component factory.");


        ArgumentGroup scanArgumentGroup = parser.addArgumentGroup("HBase scan parameters")
                .description("Parameters for specifying what data is included while reading from HBase.");


        Argument hbaseTableNameArg = scanArgumentGroup.addArgument("--hbase-table-name")
                .metavar("STRING")
                .help("Optional name of the HBase table containing the records to be indexed. If "
                    + "supplied, this overrides the value from the --hbase-indexer-* options. "
                    + "Example: myTable");


        Argument startRowArg = scanArgumentGroup.addArgument("--hbase-start-row")
                .metavar("BINARYSTRING")
                .help("Binary string representation of start row from which to start indexing (inclusive). "
                    + "The format of the supplied row key should use two-digit hex values prefixed by "
                    + "\\x for non-ascii characters (e.g. 'row\\x00'). The semantics of this "
                    + "argument are the same as those for the HBase Scan#setStartRow method. "
                    + "The default is to include the first row of the table. Example: AAAA");


        Argument endRowArg = scanArgumentGroup.addArgument("--hbase-end-row")
                .metavar("BINARYSTRING")
                .help("Binary string representation of end row prefix at which to stop indexing (exclusive). "
                    + "See the description of --hbase-start-row for more information. "
                    + "The default is to include the last row of the table. Example: CCCC");


        Argument startTimeArg = scanArgumentGroup.addArgument("--hbase-start-time")
                .metavar("STRING")
                .help("Earliest timestamp (inclusive) in time range of HBase cells to be included for indexing. "
                    + "The default is to include all cells. Example: 0");


        Argument endTimeArg = scanArgumentGroup.addArgument("--hbase-end-time")
                .metavar("STRING")
                .help("Latest timestamp (exclusive) of HBase cells to be included for indexing. "
                    + "The default is to include all cells. Example: 123456789");
        
        Argument timestampFormatArg = scanArgumentGroup.addArgument("--hbase-timestamp-format")
                .metavar("STRING")
                .help("Timestamp format to be used to interpret --hbase-start-time and --hbase-end-time. " +
                      "This is a java.text.SimpleDateFormat compliant format (see " +
                      "http://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html). " +
                      "If this parameter is omitted then the timestamps are interpreted as number of " +
                      "milliseconds since the standard epoch (Unix time). " +
                      "Example: yyyy-MM-dd'T'HH:mm:ss.SSSZ");


        ArgumentGroup solrClusterInfoGroup = parser.addArgumentGroup("Solr cluster arguments")
                .description(
                      "Arguments that provide information about your Solr cluster. "
                    + nonSolrCloud("If you are building shards for a SolrCloud cluster, pass the --zk-host argument. "
                    + "If you are building shards for "
                    + "a Non-SolrCloud cluster, pass the --shard-url argument one or more times. To build indexes for "
                    + "a replicated Non-SolrCloud cluster with --shard-url, pass replica urls consecutively and also pass --shards. "
                    + "Using --go-live requires either --zk-host or --shard-url."));


        Argument zkHostArg = solrClusterInfoGroup.addArgument("--zk-host")
                .metavar("STRING")
                .type(String.class)
                .help("The address of a ZooKeeper ensemble being used by a SolrCloud cluster. "
                    + "This ZooKeeper ensemble will be examined to determine the number of output "
                    + "shards to create as well as the Solr URLs to merge the output shards into when using the --go-live option. "
                    + "Requires that you also pass the --collection to merge the shards into.\n"
                    + "\n"
                    + "The --zk-host option implements the same partitioning semantics as the standard SolrCloud "
                    + "Near-Real-Time (NRT) API. This enables to mix batch updates from MapReduce ingestion with "
                    + "updates from standard Solr NRT ingestion on the same SolrCloud cluster, "
                    + "using identical unique document keys.\n"
                    + "\n"
                    + "Format is: a list of comma separated host:port pairs, each corresponding to a zk "
                    + "server. Example: '127.0.0.1:2181,127.0.0.1:2182,127.0.0.1:2183' If "
                    + "the optional chroot suffix is used the example would look "
                    + "like: '127.0.0.1:2181/solr,127.0.0.1:2182/solr,127.0.0.1:2183/solr' "
                    + "where the client would be rooted at '/solr' and all paths "
                    + "would be relative to this root - i.e. getting/setting/etc... "
                    + "'/foo/bar' would result in operations being run on "
                    + "'/solr/foo/bar' (from the server perspective).\n"
                    + nonSolrCloud("\n"
                    + "If --solr-home-dir is not specified, the Solr home directory for the collection "
                    + "will be downloaded from this ZooKeeper ensemble."));


        Argument shardUrlsArg = nonSolrCloud(solrClusterInfoGroup.addArgument("--shard-url")
                .metavar("URL")
                .type(String.class)
                .action(Arguments.append())
                .help("Solr URL to merge resulting shard into if using --go-live. "
                    + "Example: http://solr001.mycompany.com:8983/solr/collection1. "
                    + "Multiple --shard-url arguments can be specified, one for each desired shard. "
                    + "If you are merging shards into a SolrCloud cluster, use --zk-host instead."));
        
        Argument shardsArg = nonSolrCloud(solrClusterInfoGroup.addArgument("--shards")
                .metavar("INTEGER")
                .type(Integer.class).choices(new RangeArgumentChoice(1, Integer.MAX_VALUE))
                .help("Number of output shards to generate."));


        ArgumentGroup goLiveGroup = parser.addArgumentGroup("Go live arguments")
                .description("Arguments for merging the shards that are built into a live Solr cluster. "
                          + "Also see the Cluster arguments.");


        Argument goLiveArg = goLiveGroup.addArgument("--go-live")
                .action(Arguments.storeTrue())
                .help("Allows you to optionally merge the final index shards into a live Solr cluster after they are built. "
                    + "You can pass the ZooKeeper address with --zk-host and the relevant cluster information will be auto detected. "
                    + nonSolrCloud("If you are not using a SolrCloud cluster, --shard-url arguments can be used to specify each SolrCore to merge "
                    + "each shard into."));


        Argument collectionArg = goLiveGroup.addArgument("--collection")
                .metavar("STRING")
                .help("The SolrCloud collection to merge shards into when using --go-live and --zk-host. Example: collection1");


        Argument goLiveThreadsArg = goLiveGroup.addArgument("--go-live-threads")
                .metavar("INTEGER")
                .type(Integer.class)
                .choices(new RangeArgumentChoice(1, Integer.MAX_VALUE))
                .setDefault(1000)
                .help("Tuning knob that indicates the maximum number of live merges to run in parallel at one time.");


        ArgumentGroup optionalGroup = parser.addArgumentGroup("Optional arguments");


        optionalGroup.addArgument("--help", "-help", "-h").help("Show this help message and exit")
                .action(new HelpArgumentAction() {
                    @Override
                    public void run(ArgumentParser parser, Argument arg, Map<String, Object> attrs, String flag, Object value) throws ArgumentParserException {
                      parser.printHelp(new PrintWriter(System.out, true));
                      System.out.println();
                      System.out.print(ForkedToolRunnerHelpFormatter.getGenericCommandUsage());
                      System.out.println("Examples: \n\n" +
                        "# (Re)index a table in GoLive mode based on a local indexer config file\n" +
                        "hadoop --config /etc/hadoop/conf \\\n" +
                        "  jar hbase-indexer-mr-*-job.jar \\\n" +
                        "  --conf /etc/hbase/conf/hbase-site.xml \\\n" +
                        "  -D 'mapred.child.java.opts=-Xmx500m' \\\n" + 
                        "  --hbase-indexer-file indexer.xml \\\n" +
                        "  --zk-host 127.0.0.1/solr \\\n" +
                        "  --collection collection1 \\\n" +
                        "  --go-live \\\n" +
                        "  --log4j src/test/resources/log4j.properties\n\n" + 
                        
                        "# (Re)index a table in GoLive mode using a local morphline-based indexer config file\n" +
                        "# Also include extra library jar file containing JSON tweet Java parser:\n" +
                        "hadoop --config /etc/hadoop/conf \\\n" +
                        "  jar hbase-indexer-mr-*-job.jar \\\n" +
                        "  --conf /etc/hbase/conf/hbase-site.xml \\\n" +
                        "  --libjars /path/to/kite-morphlines-twitter-0.10.0.jar \\\n" + 
                        "  -D 'mapred.child.java.opts=-Xmx500m' \\\n" + 
                        "  --hbase-indexer-file src/test/resources/morphline_indexer_without_zk.xml \\\n" +
                        "  --zk-host 127.0.0.1/solr \\\n" +
                        "  --collection collection1 \\\n" +
                        "  --go-live \\\n" +
                        "  --morphline-file src/test/resources/morphlines.conf \\\n" +
                        "  --output-dir hdfs://c2202.mycompany.com/user/$USER/test \\\n" + 
                        "  --overwrite-output-dir \\\n" + 
                        "  --log4j src/test/resources/log4j.properties\n\n" +
                        
                        "# (Re)index a table in GoLive mode\n" +
                        "hadoop --config /etc/hadoop/conf \\\n" +
                        "  jar hbase-indexer-mr-*-job.jar \\\n" +
                        "  --conf /etc/hbase/conf/hbase-site.xml \\\n" +
                        "  -D 'mapred.child.java.opts=-Xmx500m' \\\n" + 
                        "  --hbase-indexer-file indexer.xml \\\n" +
                        "  --zk-host 127.0.0.1/solr \\\n" +
                        "  --collection collection1 \\\n" +
                        "  --go-live \\\n" +
                        "  --log4j src/test/resources/log4j.properties\n\n" +
                        
                        "# (Re)index a table with direct writes to SolrCloud\n" +
                        "hadoop --config /etc/hadoop/conf \\\n" +
                        "  jar hbase-indexer-mr-*-job.jar \\\n" +
                        "  --conf /etc/hbase/conf/hbase-site.xml \\\n" +
                        "  -D 'mapred.child.java.opts=-Xmx500m' \\\n" + 
                        "  --hbase-indexer-file indexer.xml \\\n" +
                        "  --zk-host 127.0.0.1/solr \\\n" +
                        "  --collection collection1 \\\n" +
                        "  --reducers 0 \\\n" +
                        "  --log4j src/test/resources/log4j.properties\n\n" +
                        
                        "# (Re)index a table based on a indexer config stored in ZK\n" +
                        "hadoop --config /etc/hadoop/conf \\\n" +
                        "  jar hbase-indexer-mr-*-job.jar \\\n" +
                        "  --conf /etc/hbase/conf/hbase-site.xml \\\n" +
                        "  -D 'mapred.child.java.opts=-Xmx500m' \\\n" + 
                        "  --hbase-indexer-zk zk01 \\\n" +
                        "  --hbase-indexer-name docindexer \\\n" +
                        "  --go-live \\\n" +
                        "  --log4j src/test/resources/log4j.properties\n\n"); 


                      throw new FoundHelpArgument(); // Trick to prevent processing of any remaining arguments
                    }
                  });


        Argument outputDirArg = optionalGroup.addArgument("--output-dir")
                .metavar("HDFS_URI")
                .type(new PathArgumentType(conf) {
                    @Override
                    public Path convert(ArgumentParser parser, Argument arg, String value)
                            throws ArgumentParserException {
                        Path path = super.convert(parser, arg, value);
                        if ("hdfs".equals(path.toUri().getScheme())
                                && path.toUri().getAuthority() == null) {
                            // TODO: consider defaulting to hadoop's
                            // fs.default.name here or in
                            // SolrRecordWriter.createEmbeddedSolrServer()
                            throw new ArgumentParserException("Missing authority in path URI: "
                                    + path, parser);
                        }
                        return path;
                    }
                }.verifyHasScheme().verifyIsAbsolute().verifyCanWriteParent())
                .help("HDFS directory to write Solr indexes to. Inside there one output directory per shard will be generated. "
                    + "Example: hdfs://c2202.mycompany.com/user/$USER/test");
        
        Argument overwriteOutputDirArg = optionalGroup.addArgument("--overwrite-output-dir")
                .action(Arguments.storeTrue())
                .help("Overwrite the directory specified by --output-dir if it already exists. Using this parameter will result in " +
                      "the output directory being recursively deleted at job startup.");


        Argument morphlineFileArg = optionalGroup.addArgument("--morphline-file")
                .metavar("FILE")
                .type(new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead())
                .help("Relative or absolute path to a local config file that contains one or more morphlines. " +
                      "The file must be UTF-8 encoded. The file will be uploaded to each MR task. " +
                      "If supplied, this overrides the value from the --hbase-indexer-* options. " +
                      "Example: /path/to/morphlines.conf");
              
        Argument morphlineIdArg = optionalGroup.addArgument("--morphline-id")
                .metavar("STRING")
                .type(String.class)
                .help("The identifier of the morphline that shall be executed within the morphline config file, " +
                      "e.g. specified by --morphline-file. If the --morphline-id option is ommitted the first (i.e. " +
                      "top-most) morphline within the config file is used. If supplied, this overrides the value " +
                      "from the --hbase-indexer-* options. Example: morphline1 ");
                
        Argument solrHomeDirArg = nonSolrCloud(optionalGroup.addArgument("--solr-home-dir")
                .metavar("DIR")
                .type(new FileArgumentType() {
                    @Override
                    public File convert(ArgumentParser parser, Argument arg, String value)
                            throws ArgumentParserException {
                        File solrHomeDir = super.convert(parser, arg, value);
                        File solrConfigFile = new File(new File(solrHomeDir, "conf"),
                                "solrconfig.xml");
                        new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead()
                                .convert(parser, arg, solrConfigFile.getPath());
                        return solrHomeDir;
                    }
                }.verifyIsDirectory().verifyCanRead())
                .required(false)
                .help("Relative or absolute path to a local dir containing Solr conf/ dir and in particular "
                    + "conf/solrconfig.xml and optionally also lib/ dir. This directory will be uploaded to each MR task. "
                    + "Example: src/test/resources/solr/minimr"));


        Argument updateConflictResolverArg = optionalGroup.addArgument("--update-conflict-resolver")
                .metavar("FQCN")
                .type(String.class)
                .setDefault(RetainMostRecentUpdateConflictResolver.class.getName())
                .help("Fully qualified class name of a Java class that implements the UpdateConflictResolver interface. "
                    + "This enables deduplication and ordering of a series of document updates for the same unique document "
                    + "key. For example, a MapReduce batch job might index multiple files in the same job where some of the "
                    + "files contain old and new versions of the very same document, using the same unique document key.\n"
                    + "Typically, implementations of this interface forbid collisions by throwing an exception, or ignore all but "
                    + "the most recent document version, or, in the general case, order colliding updates ascending from least "
                    + "recent to most recent (partial) update. The caller of this interface (i.e. the Hadoop Reducer) will then "
                    + "apply the updates to Solr in the order returned by the orderUpdates() method.\n"
                    + "The default RetainMostRecentUpdateConflictResolver implementation ignores all but the most recent document "
                    + "version, based on a configurable numeric Solr field, which defaults to the file_last_modified timestamp");


        Argument reducersArg = optionalGroup.addArgument("--reducers")
                .metavar("INTEGER")
                .type(Integer.class)
                .choices(new RangeArgumentChoice(-2, Integer.MAX_VALUE))
                // TODO: also support X% syntax where X is an integer
                .setDefault(-1)
                .help("Tuning knob that indicates the number of reducers to index into. "
                    + "0 indicates that no reducers should be used, and documents should be sent directly from the mapper tasks to live Solr servers. "
                    + "-1 indicates use all reduce slots available on the cluster. "
                    + "-2 indicates use one reducer per output shard, which disables the mtree merge MR algorithm. "
                    + "The mtree merge MR algorithm improves scalability by spreading load "
                    + "(in particular CPU load) among a number of parallel reducers that can be much larger than the number "
                    + "of solr shards expected by the user. It can be seen as an extension of concurrent lucene merges "
                    + "and tiered lucene merges to the clustered case. The subsequent mapper-only phase "
                    + "merges the output of said large number of reducers to the number of shards expected by the user, "
                    + "again by utilizing more available parallelism on the cluster.");


        Argument fanoutArg = optionalGroup.addArgument("--fanout")
                .metavar("INTEGER")
                .type(Integer.class)
                .choices(new RangeArgumentChoice(2, Integer.MAX_VALUE))
                .setDefault(Integer.MAX_VALUE)
                .help(FeatureControl.SUPPRESS);


        Argument maxSegmentsArg = optionalGroup.addArgument("--max-segments")
                .metavar("INTEGER")
                .type(Integer.class)
                .choices(new RangeArgumentChoice(1, Integer.MAX_VALUE))
                .setDefault(1)
                .help("Tuning knob that indicates the maximum number of segments to be contained on output in the index of "
                    + "each reducer shard. After a reducer has built its output index it applies a merge policy to merge segments "
                    + "until there are <= maxSegments lucene segments left in this index. "
                    + "Merging segments involves reading and rewriting all data in all these segment files, "
                    + "potentially multiple times, which is very I/O intensive and time consuming. "
                    + "However, an index with fewer segments can later be merged faster, "
                    + "and it can later be queried faster once deployed to a live Solr serving shard. "
                    + "Set maxSegments to 1 to optimize the index for low query latency. "
                    + "In a nutshell, a small maxSegments value trades indexing latency for subsequently improved query latency. "
                    + "This can be a reasonable trade-off for batch indexing systems.");


        Argument fairSchedulerPoolArg = optionalGroup.addArgument("--fair-scheduler-pool")
                .metavar("STRING")
                .help("Optional tuning knob that indicates the name of the fair scheduler pool to submit jobs to. "
                    + "The Fair Scheduler is a pluggable MapReduce scheduler that provides a way to share large clusters. "
                    + "Fair scheduling is a method of assigning resources to jobs such that all jobs get, on average, an "
                    + "equal share of resources over time. When there is a single job running, that job uses the entire "
                    + "cluster. When other jobs are submitted, tasks slots that free up are assigned to the new jobs, so "
                    + "that each job gets roughly the same amount of CPU time. Unlike the default Hadoop scheduler, which "
                    + "forms a queue of jobs, this lets short jobs finish in reasonable time while not starving long jobs. "
                    + "It is also an easy way to share a cluster between multiple of users. Fair sharing can also work with "
                    + "job priorities - the priorities are used as weights to determine the fraction of total compute time "
                    + "that each job gets.");


        Argument dryRunArg = optionalGroup.addArgument("--dry-run")
                .action(Arguments.storeTrue())
                .help("Run in local mode and print documents to stdout instead of loading them into Solr. This executes "
                    + "the morphline in the client process (without submitting a job to MR) for quicker turnaround during "
                    + "early trial & debug sessions.");


        Argument log4jConfigFileArg = optionalGroup.addArgument("--log4j")
                .metavar("FILE")
                .type(new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead())
                .help("Relative or absolute path to a log4j.properties config file on the local file system. This file "
                    + "will be uploaded to each MR task. Example: /path/to/log4j.properties");


        Argument verboseArg = optionalGroup.addArgument("--verbose", "-v")
                .action(Arguments.storeTrue())
                .help("Turn on verbose output.");


        Argument clearIndexArg = optionalGroup.addArgument("--clear-index")
                .action(Arguments.storeTrue())
                .help("Will attempt to delete all entries in a solr index before starting batch build. This is not " +
                        "transactional so if the build fails the index will be empty.");
        
        optionalGroup.addArgument(SHOW_NON_SOLR_CLOUD)
                .action(Arguments.storeTrue())
                .help("Also show options for Non-SolrCloud mode as part of --help.");
    
        Namespace ns;
        try {

View Full Code Here


        subparser.addArgument("-o", "--output")
                 .dest("output")
                 .help("Write output to <file> instead of stdout");


        final ArgumentGroup tables = subparser.addArgumentGroup("Tables");
        tables.addArgument("--tables")
              .action(Arguments.storeTrue())
              .dest("tables")
              .help("Check for added or removed tables (default)");
        tables.addArgument("--ignore-tables")
              .action(Arguments.storeFalse())
              .dest("tables")
              .help("Ignore tables");


        final ArgumentGroup columns = subparser.addArgumentGroup("Columns");
        columns.addArgument("--columns")
               .action(Arguments.storeTrue())
               .dest("columns")
               .help("Check for added, removed, or modified tables (default)");
        columns.addArgument("--ignore-columns")
               .action(Arguments.storeFalse())
               .dest("columns")
               .help("Ignore columns");


        final ArgumentGroup views = subparser.addArgumentGroup("Views");
        views.addArgument("--views")
             .action(Arguments.storeTrue())
             .dest("views")
             .help("Check for added, removed, or modified views (default)");
        views.addArgument("--ignore-views")
             .action(Arguments.storeFalse())
             .dest("views")
             .help("Ignore views");


        final ArgumentGroup primaryKeys = subparser.addArgumentGroup("Primary Keys");
        primaryKeys.addArgument("--primary-keys")
                   .action(Arguments.storeTrue())
                   .dest("primary-keys")
                   .help("Check for changed primary keys (default)");
        primaryKeys.addArgument("--ignore-primary-keys")
                   .action(Arguments.storeFalse())
                   .dest("primary-keys")
                   .help("Ignore primary keys");


        final ArgumentGroup uniqueConstraints = subparser.addArgumentGroup("Unique Constraints");
        uniqueConstraints.addArgument("--unique-constraints")
                         .action(Arguments.storeTrue())
                         .dest("unique-constraints")
                         .help("Check for changed unique constraints (default)");
        uniqueConstraints.addArgument("--ignore-unique-constraints")
                         .action(Arguments.storeFalse())
                         .dest("unique-constraints")
                         .help("Ignore unique constraints");


        final ArgumentGroup indexes = subparser.addArgumentGroup("Indexes");
        indexes.addArgument("--indexes")
               .action(Arguments.storeTrue())
               .dest("indexes")
               .help("Check for changed indexes (default)");
        indexes.addArgument("--ignore-indexes")
               .action(Arguments.storeFalse())
               .dest("indexes")
               .help("Ignore indexes");


        final ArgumentGroup foreignKeys = subparser.addArgumentGroup("Foreign Keys");
        foreignKeys.addArgument("--foreign-keys")
                   .action(Arguments.storeTrue())
                   .dest("foreign-keys")
                   .help("Check for changed foreign keys (default)");
        foreignKeys.addArgument("--ignore-foreign-keys")
                   .action(Arguments.storeFalse())
                   .dest("foreign-keys")
                   .help("Ignore foreign keys");


        final ArgumentGroup sequences = subparser.addArgumentGroup("Sequences");
        sequences.addArgument("--sequences")
                 .action(Arguments.storeTrue())
                 .dest("sequences")
                 .help("Check for changed sequences (default)");
        sequences.addArgument("--ignore-sequences")
                 .action(Arguments.storeFalse())
                 .dest("sequences")
                 .help("Ignore foreign keys");


        final ArgumentGroup data = subparser.addArgumentGroup("Data");
        data.addArgument("--data")
            .action(Arguments.storeTrue())
            .dest("data")
            .help("Check for changed data")
            .setDefault(Boolean.FALSE);
        data.addArgument("--ignore-data")
            .action(Arguments.storeFalse())
            .dest("data")
            .help("Ignore data (default)")
            .setDefault(Boolean.FALSE);
    }

View Full Code Here

    public static void configureArguments(ArgumentParser parser) {
        MutuallyExclusiveGroup group =
                parser.addMutuallyExclusiveGroup("input data")
                      .description("Specify the input data for the command.")
                      .required(true);
        ArgumentGroup options = parser.addArgumentGroup("input options")
                                      .description("Additional options for input data.");
        group.addArgument("--csv-file")
             .type(File.class)
             .metavar("FILE")
             .help("read from comma-separated FILE");
        group.addArgument("--tsv-file")
             .type(File.class)
             .metavar("FILE")
             .help("read from tab-separated FILE");
        group.addArgument("--ratings-file")
             .type(File.class)
             .metavar("FILE")
             .help("read from delimited text FILE");
        options.addArgument("-d", "--delimiter")
               .setDefault(",")
               .metavar("DELIM")
               .help("input file is delimited by DELIM");
        group.addArgument("--pack-file")
             .type(File.class)

View Full Code Here

            "%cyan(%logger{24}) %msg%n";
    private static final String FILE_PATTERN =
            "%date{HH:mm:ss.SSS} %level [%thread] %logger: %msg%n";


    public static void addLoggingGroup(ArgumentParser parser) {
        ArgumentGroup logging = parser.addArgumentGroup("logging")
                                      .description("Control the logging output.");
        logging.addArgument("--log-file")
               .type(File.class)
               .metavar("FILE")
               .help("write logging output to FILE");
        logging.addArgument("--log-level")
               .type(String.class)
               .metavar("LEVEL")
               .help("include logging messages at LEVEL in log file");
        logging.addArgument("-d", "--debug")
               .action(Arguments.storeTrue())
               .help("include debug logging in console output");
        logging.addArgument("--debug-grapht")
               .action(Arguments.storeTrue())
               .help("include debug output from Grapht");
    }

View Full Code Here

            );
            throw new FoundHelpArgument(); // Trick to prevent processing of any remaining arguments
          }
        });
      
      ArgumentGroup requiredGroup = parser.addArgumentGroup("Required arguments");
      
      Argument outputDirArg = requiredGroup.addArgument("--output-dir")
        .metavar("HDFS_URI")
        .type(new PathArgumentType(conf) {
          @Override
          public Path convert(ArgumentParser parser, Argument arg, String value) throws ArgumentParserException {
            Path path = super.convert(parser, arg, value);
            if ("hdfs".equals(path.toUri().getScheme()) && path.toUri().getAuthority() == null) {
              // TODO: consider defaulting to hadoop's fs.default.name here or in SolrRecordWriter.createEmbeddedSolrServer()
              throw new ArgumentParserException("Missing authority in path URI: " + path, parser); 
            }
            return path;
          }
        }.verifyHasScheme().verifyIsAbsolute().verifyCanWriteParent())
        .required(true)
        .help("HDFS directory to write Solr indexes to. Inside there one output directory per shard will be generated. " +
              "Example: hdfs://c2202.mycompany.com/user/$USER/test");
      
      Argument inputListArg = parser.addArgument("--input-list")
        .action(Arguments.append())
        .metavar("URI")
  //      .type(new PathArgumentType(fs).verifyExists().verifyCanRead())
        .type(Path.class)
        .help("Local URI or HDFS URI of a UTF-8 encoded file containing a list of HDFS URIs to index, " +
              "one URI per line in the file. If '-' is specified, URIs are read from the standard input. " + 
              "Multiple --input-list arguments can be specified.");
        
      Argument morphlineFileArg = requiredGroup.addArgument("--morphline-file")
        .metavar("FILE")
        .type(new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead())
        .required(true)
        .help("Relative or absolute path to a local config file that contains one or more morphlines. " +
              "The file must be UTF-8 encoded. Example: /path/to/morphline.conf");
          
      Argument morphlineIdArg = parser.addArgument("--morphline-id")
        .metavar("STRING")
        .type(String.class)
        .help("The identifier of the morphline that shall be executed within the morphline config file " +
              "specified by --morphline-file. If the --morphline-id option is ommitted the first (i.e. " +
              "top-most) morphline within the config file is used. Example: morphline1");
            
      Argument solrHomeDirArg = nonSolrCloud(parser.addArgument("--solr-home-dir")
        .metavar("DIR")
        .type(new FileArgumentType() {
          @Override
          public File convert(ArgumentParser parser, Argument arg, String value) throws ArgumentParserException {
            File solrHomeDir = super.convert(parser, arg, value);
            File solrConfigFile = new File(new File(solrHomeDir, "conf"), "solrconfig.xml");
            new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead().convert(
                parser, arg, solrConfigFile.getPath());
            return solrHomeDir;
          }
        }.verifyIsDirectory().verifyCanRead())
        .required(false)
        .help("Relative or absolute path to a local dir containing Solr conf/ dir and in particular " +
              "conf/solrconfig.xml and optionally also lib/ dir. This directory will be uploaded to each MR task. " +
              "Example: src/test/resources/solr/minimr"));
        
      Argument updateConflictResolverArg = parser.addArgument("--update-conflict-resolver")
        .metavar("FQCN")
        .type(String.class)
        .setDefault(RetainMostRecentUpdateConflictResolver.class.getName())
        .help("Fully qualified class name of a Java class that implements the UpdateConflictResolver interface. " +
            "This enables deduplication and ordering of a series of document updates for the same unique document " +
            "key. For example, a MapReduce batch job might index multiple files in the same job where some of the " +
            "files contain old and new versions of the very same document, using the same unique document key.\n" +
            "Typically, implementations of this interface forbid collisions by throwing an exception, or ignore all but " +
            "the most recent document version, or, in the general case, order colliding updates ascending from least " +
            "recent to most recent (partial) update. The caller of this interface (i.e. the Hadoop Reducer) will then " +
            "apply the updates to Solr in the order returned by the orderUpdates() method.\n" +
            "The default RetainMostRecentUpdateConflictResolver implementation ignores all but the most recent document " +
            "version, based on a configurable numeric Solr field, which defaults to the file_last_modified timestamp");
      
      Argument mappersArg = parser.addArgument("--mappers")
        .metavar("INTEGER")
        .type(Integer.class)
        .choices(new RangeArgumentChoice(-1, Integer.MAX_VALUE)) // TODO: also support X% syntax where X is an integer
        .setDefault(-1)
        .help("Tuning knob that indicates the maximum number of MR mapper tasks to use. -1 indicates use all map slots " +
              "available on the cluster.");
  
      Argument reducersArg = parser.addArgument("--reducers")
        .metavar("INTEGER")
        .type(Integer.class)
        .choices(new RangeArgumentChoice(-2, Integer.MAX_VALUE)) // TODO: also support X% syntax where X is an integer
        .setDefault(-1)
        .help("Tuning knob that indicates the number of reducers to index into. " +
            "0 is reserved for a mapper-only feature that may ship in a future release. " +
            "-1 indicates use all reduce slots available on the cluster. " +
            "-2 indicates use one reducer per output shard, which disables the mtree merge MR algorithm. " +
            "The mtree merge MR algorithm improves scalability by spreading load " +
            "(in particular CPU load) among a number of parallel reducers that can be much larger than the number " +
            "of solr shards expected by the user. It can be seen as an extension of concurrent lucene merges " +
            "and tiered lucene merges to the clustered case. The subsequent mapper-only phase " +
            "merges the output of said large number of reducers to the number of shards expected by the user, " +
            "again by utilizing more available parallelism on the cluster.");


      Argument fanoutArg = parser.addArgument("--fanout")
        .metavar("INTEGER")
        .type(Integer.class)
        .choices(new RangeArgumentChoice(2, Integer.MAX_VALUE))
        .setDefault(Integer.MAX_VALUE)
        .help(FeatureControl.SUPPRESS);
  
      Argument maxSegmentsArg = parser.addArgument("--max-segments")
        .metavar("INTEGER")  
        .type(Integer.class)
        .choices(new RangeArgumentChoice(1, Integer.MAX_VALUE))
        .setDefault(1)
        .help("Tuning knob that indicates the maximum number of segments to be contained on output in the index of " +
            "each reducer shard. After a reducer has built its output index it applies a merge policy to merge segments " +
            "until there are <= maxSegments lucene segments left in this index. " + 
            "Merging segments involves reading and rewriting all data in all these segment files, " + 
            "potentially multiple times, which is very I/O intensive and time consuming. " + 
            "However, an index with fewer segments can later be merged faster, " +
            "and it can later be queried faster once deployed to a live Solr serving shard. " + 
            "Set maxSegments to 1 to optimize the index for low query latency. " + 
            "In a nutshell, a small maxSegments value trades indexing latency for subsequently improved query latency. " + 
            "This can be a reasonable trade-off for batch indexing systems.");
      
      Argument fairSchedulerPoolArg = parser.addArgument("--fair-scheduler-pool")
        .metavar("STRING")
        .help("Optional tuning knob that indicates the name of the fair scheduler pool to submit jobs to. " +
              "The Fair Scheduler is a pluggable MapReduce scheduler that provides a way to share large clusters. " +
              "Fair scheduling is a method of assigning resources to jobs such that all jobs get, on average, an " +
              "equal share of resources over time. When there is a single job running, that job uses the entire " +
              "cluster. When other jobs are submitted, tasks slots that free up are assigned to the new jobs, so " +
              "that each job gets roughly the same amount of CPU time. Unlike the default Hadoop scheduler, which " +
              "forms a queue of jobs, this lets short jobs finish in reasonable time while not starving long jobs. " +
              "It is also an easy way to share a cluster between multiple of users. Fair sharing can also work with " +
              "job priorities - the priorities are used as weights to determine the fraction of total compute time " +
              "that each job gets.");
  
      Argument dryRunArg = parser.addArgument("--dry-run")
        .action(Arguments.storeTrue())
        .help("Run in local mode and print documents to stdout instead of loading them into Solr. This executes " +
              "the morphline in the client process (without submitting a job to MR) for quicker turnaround during " +
              "early trial & debug sessions.");
    
      Argument log4jConfigFileArg = parser.addArgument("--log4j")
        .metavar("FILE")
        .type(new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead())
        .help("Relative or absolute path to a log4j.properties config file on the local file system. This file " +
              "will be uploaded to each MR task. Example: /path/to/log4j.properties");
    
      Argument verboseArg = parser.addArgument("--verbose", "-v")
        .action(Arguments.storeTrue())
        .help("Turn on verbose output.");
  
      parser.addArgument(SHOW_NON_SOLR_CLOUD)
        .action(Arguments.storeTrue())
        .help("Also show options for Non-SolrCloud mode as part of --help.");
      
      ArgumentGroup clusterInfoGroup = parser
          .addArgumentGroup("Cluster arguments")
          .description(
              "Arguments that provide information about your Solr cluster. "
            + nonSolrCloud("If you are building shards for a SolrCloud cluster, pass the --zk-host argument. "
            + "If you are building shards for "
            + "a Non-SolrCloud cluster, pass the --shard-url argument one or more times. To build indexes for "
            + "a replicated Non-SolrCloud cluster with --shard-url, pass replica urls consecutively and also pass --shards. "
            + "Using --go-live requires either --zk-host or --shard-url."));


      Argument zkHostArg = clusterInfoGroup.addArgument("--zk-host")
        .metavar("STRING")
        .type(String.class)
        .help("The address of a ZooKeeper ensemble being used by a SolrCloud cluster. "
            + "This ZooKeeper ensemble will be examined to determine the number of output "
            + "shards to create as well as the Solr URLs to merge the output shards into when using the --go-live option. "
            + "Requires that you also pass the --collection to merge the shards into.\n"
            + "\n"
            + "The --zk-host option implements the same partitioning semantics as the standard SolrCloud " 
            + "Near-Real-Time (NRT) API. This enables to mix batch updates from MapReduce ingestion with "
            + "updates from standard Solr NRT ingestion on the same SolrCloud cluster, "
            + "using identical unique document keys.\n"
            + "\n"
            + "Format is: a list of comma separated host:port pairs, each corresponding to a zk "
            + "server. Example: '127.0.0.1:2181,127.0.0.1:2182,127.0.0.1:2183' If "
            + "the optional chroot suffix is used the example would look "
            + "like: '127.0.0.1:2181/solr,127.0.0.1:2182/solr,127.0.0.1:2183/solr' "
            + "where the client would be rooted at '/solr' and all paths "
            + "would be relative to this root - i.e. getting/setting/etc... "
            + "'/foo/bar' would result in operations being run on "
            + "'/solr/foo/bar' (from the server perspective).\n"
            + nonSolrCloud("\n"
            + "If --solr-home-dir is not specified, the Solr home directory for the collection "
            + "will be downloaded from this ZooKeeper ensemble."));


      Argument shardUrlsArg = nonSolrCloud(clusterInfoGroup.addArgument("--shard-url")
        .metavar("URL")
        .type(String.class)
        .action(Arguments.append())
        .help("Solr URL to merge resulting shard into if using --go-live. " +
              "Example: http://solr001.mycompany.com:8983/solr/collection1. " + 
              "Multiple --shard-url arguments can be specified, one for each desired shard. " +
              "If you are merging shards into a SolrCloud cluster, use --zk-host instead."));
      
      Argument shardsArg = nonSolrCloud(clusterInfoGroup.addArgument("--shards")
        .metavar("INTEGER")
        .type(Integer.class)
        .choices(new RangeArgumentChoice(1, Integer.MAX_VALUE))
        .help("Number of output shards to generate."));
      
      ArgumentGroup goLiveGroup = parser.addArgumentGroup("Go live arguments")
        .description("Arguments for merging the shards that are built into a live Solr cluster. " +
                     "Also see the Cluster arguments.");


      Argument goLiveArg = goLiveGroup.addArgument("--go-live")
        .action(Arguments.storeTrue())
        .help("Allows you to optionally merge the final index shards into a live Solr cluster after they are built. " +
              "You can pass the ZooKeeper address with --zk-host and the relevant cluster information will be auto detected. " +
              nonSolrCloud("If you are not using a SolrCloud cluster, --shard-url arguments can be used to specify each SolrCore to merge " +
              "each shard into."));


      Argument collectionArg = goLiveGroup.addArgument("--collection")
        .metavar("STRING")
        .help("The SolrCloud collection to merge shards into when using --go-live and --zk-host. Example: collection1");
      
      Argument goLiveThreadsArg = goLiveGroup.addArgument("--go-live-threads")
        .metavar("INTEGER")
        .type(Integer.class)
        .choices(new RangeArgumentChoice(1, Integer.MAX_VALUE))
        .setDefault(1000)
        .help("Tuning knob that indicates the maximum number of live merges to run in parallel at one time.");

View Full Code Here

0 1

TOP

Related Classes of net.sourceforge.argparse4j.inf.ArgumentGroup

com.ngdata.hbaseindexer.mr.HBaseIndexerArgumentParser

io.dropwizard.migrations.DbDumpCommand

net.sourceforge.argparse4j.internal.ArgumentParserImplTest

org.apache.solr.crunch.CrunchIndexerToolArgumentParser

org.apache.solr.hadoop.ForkedMapReduceIndexerTool$MyArgumentParser

org.apache.solr.hadoop.MapReduceIndexerTool$MyArgumentParser

org.grouplens.lenskit.cli.InputData

org.grouplens.lenskit.cli.Logging

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.