Package net.sourceforge.argparse4j.inf

Examples of net.sourceforge.argparse4j.inf.ArgumentGroup


            + "Fault Tolerance: Task attempts are retried on failure per the standard MapReduce or Spark "
            + "semantics. If the whole job fails you can retry simply by rerunning the program again "
            + "using the same arguments."
        );
   
    ArgumentGroup indexerArgGroup = parser.addArgumentGroup("CrunchIndexerOptions");
   
    // trailing positional arguments
    Argument inputFilesArg = indexerArgGroup.addArgument("input-files")
        .metavar("HDFS_URI")
        .type(new PathArgumentType(conf).verifyExists().verifyCanRead())
        .nargs("*")
        .setDefault()
        .help("HDFS URI of file or directory tree to ingest.");

    Argument inputFileListArg = indexerArgGroup.addArgument("--input-file-list", "--input-list")
        .action(Arguments.append())
        .metavar("URI")
        .type(new PathArgumentType(conf).acceptSystemIn().verifyExists().verifyCanRead())
        .help("Local URI or HDFS URI of a UTF-8 encoded file containing a list of HDFS URIs to ingest, " +
            "one URI per line in the file. If '-' is specified, URIs are read from the standard input. " +
            "Multiple --input-file-list arguments can be specified.");

    Argument inputFormatArg = indexerArgGroup.addArgument("--input-file-format")
        .metavar("FQCN")
        .type(String.class)
        .help("The Hadoop FileInputFormat to use for extracting data from splittable HDFS files. Can be a "
            + "fully qualified Java class name or one of ['text', 'avro', 'avroParquet']. If this option "
            + "is present the extraction phase will emit a series of input data records rather than a series "
            + "of HDFS file input streams.");

    Argument inputFileProjectionSchemaArg = indexerArgGroup.addArgument("--input-file-projection-schema")
        .metavar("FILE")
        .type(new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead())
        .help("Relative or absolute path to an Avro schema file on the local file system. This will be used "
            + "as the projection schema for Parquet input files.");

    Argument inputFileReaderSchemaArg = indexerArgGroup.addArgument("--input-file-reader-schema")
        .metavar("FILE")
        .type(new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead())
        .help("Relative or absolute path to an Avro schema file on the local file system. This will be used "
            + "as the reader schema for Avro or Parquet input files. "
            + "Example: src/test/resources/test-documents/strings.avsc");

    Argument morphlineFileArg = indexerArgGroup.addArgument("--morphline-file")
        .metavar("FILE")
        .type(new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead())
        .required(true)
        .help("Relative or absolute path to a local config file that contains one or more morphlines. "
            + "The file must be UTF-8 encoded. It will be uploaded to each remote task. "
            + "Example: /path/to/morphline.conf");

    Argument morphlineIdArg = indexerArgGroup.addArgument("--morphline-id")
        .metavar("STRING")
        .type(String.class)
        .help("The identifier of the morphline that shall be executed within the morphline config file "
            + "specified by --morphline-file. If the --morphline-id option is omitted the first (i.e. "
            + "top-most) morphline within the config file is used. Example: morphline1");

    Argument pipelineTypeArg = indexerArgGroup.addArgument("--pipeline-type")
        .metavar("STRING")
        .type(PipelineType.class)
        .setDefault(PipelineType.mapreduce)
        .help("The engine to use for executing the job. Can be 'mapreduce' or 'spark'.");

    ArgumentGroup miscArgGroup = indexerArgGroup; //parser.addArgumentGroup("Misc arguments");

    miscArgGroup.addArgument("--xhelp", "--help", "-help")
        .help("Show this help message and exit")
        .action(new HelpArgumentAction() {
          @Override
          public void run(ArgumentParser parser, Argument arg, Map<String, Object> attrs, String flag, Object value) throws ArgumentParserException {
            StringWriter strWriter = new StringWriter();
            parser.printHelp(new PrintWriter(strWriter, true));
            String help = strWriter.toString();
            int i = help.indexOf(descriptionHead);
            String description = help.substring(i).trim();
            String usage = help.substring("usage: ".length(), i).trim();
            System.out.println(
                      "MapReduceUsage: export HADOOP_CLASSPATH=$myDependencyJarPaths; hadoop jar $myDriverJar \n" + CrunchIndexerTool.class.getName()
                    + " --libjars $myDependencyJarFiles [MapReduceGenericOptions]...\n"
                    + "        " + usage + "\n"
                    + "\n"
                    + "SparkUsage: spark-submit [SparkGenericOptions]... "
                    + "--master local|yarn --deploy-mode client|cluster\n"
                    + "--jars $myDependencyJarFiles --class " + CrunchIndexerTool.class.getName() + " $myDriverJar\n"
                    + "        " + usage + "\n"
                    + "\n"
                    + description + "\n"
                    + "\n"
                    + "SparkGenericOptions:     To print all options run 'spark-submit --help'\n"
                    + "\n"
                    + "MapReduceGenericOptions: " + ToolRunnerHelpFormatter.getGenericCommandUsage()
                    );
            System.out.println(
                      "Examples: \n\n"
                    + "# Prepare - Copy input files into HDFS:\n"
                    + "hadoop fs -copyFromLocal src/test/resources/test-documents/hello1.txt hdfs:/user/systest/input/\n"
                    + "\n"
                    + "# Prepare variables for convenient reuse:\n"
                    + "export myDriverJarDir=target # for build from git\n"
                    + "export myDriverJarDir=/opt/cloudera/parcels/CDH/lib/solr/contrib/crunch # for CDH with parcels\n"
                    + "export myDriverJarDir=/usr/lib/solr/contrib/crunch # for CDH with packages\n"
                    + "export myDependencyJarDir=target/lib # for build from git\n"
                    + "export myDependencyJarDir=/opt/cloudera/parcels/CDH/lib/search/lib/search-crunch # for CDH with parcels\n"
                    + "export myDependencyJarDir=/usr/lib/search/lib/search-crunch # for CDH with packages\n"
                    + "export myDriverJar=$(find $myDriverJarDir -maxdepth 1 -name '*.jar' ! -name '*-job.jar' ! -name '*-sources.jar')\n"
                    + "export myDependencyJarFiles=$(find $myDependencyJarDir -name '*.jar' | sort | tr '\\n' ',' | head -c -1)\n"
                    + "export myDependencyJarPaths=$(find $myDependencyJarDir -name '*.jar' | sort | tr '\\n' ':' | head -c -1)\n"
                    + "\n"
                    + "# MapReduce on Yarn - Ingest text file line by line into Solr:\n"
                    + "export HADOOP_CLASSPATH=$myDependencyJarPaths; hadoop \\\n"
                    + "  --config /etc/hadoop/conf.cloudera.YARN-1 \\\n"
                    + "  jar $myDriverJar " + CrunchIndexerTool.class.getName() + " \\\n"
                    + "  --libjars $myDependencyJarFiles \\\n"
                    + "  -D 'mapred.child.java.opts=-Xmx500m' \\\n"
                    + "  -D morphlineVariable.ZK_HOST=$(hostname):2181/solr \\\n"
                    + "  --files src/test/resources/test-documents/string.avsc \\\n"
                    + "  --morphline-file src/test/resources/test-morphlines/loadSolrLine.conf \\\n"
                    + "  --pipeline-type mapreduce \\\n"
                    + "  --chatty \\\n"
                    + "  --log4j src/test/resources/log4j.properties \\\n"
                    + "  /user/systest/input/hello1.txt\n"
                    + "\n"
                    + "# Spark in Local Mode (for rapid prototyping) - Ingest into Solr:\n"
                    + "spark-submit \\\n"
                    + "  --master local \\\n"
                    + "  --deploy-mode client \\\n"
                    + "  --jars $myDependencyJarFiles \\\n"
                    + "  --executor-memory 500M \\\n"
                    + "  # --driver-library-path /opt/cloudera/parcels/CDH/lib/hadoop/lib/native # for Snappy on CDH with parcels\\\n"
                    + "  # --driver-library-path /usr/lib/hadoop/lib/native # for Snappy on CDH with packages \\\n"
                    + "  --class " + CrunchIndexerTool.class.getName() + " \\\n"
                    + "  $myDriverJar \\\n"
                    + "  -D morphlineVariable.ZK_HOST=$(hostname):2181/solr \\\n"
                    + "  --morphline-file src/test/resources/test-morphlines/loadSolrLine.conf \\\n"
                    + "  --pipeline-type spark \\\n"
                    + "  --chatty \\\n"
                    + "  --log4j src/test/resources/log4j.properties \\\n"
                    + "  /user/systest/input/hello1.txt\n"
                    + "\n"
                    + "# Spark on Yarn in Client Mode (for testing) - Ingest into Solr:\n"
                    + "Same as above, except replace '--master local' with '--master yarn'\n"
                    + "\n"
                    + "# View the yarn executor log files (there is no GUI yet):\n"
                    + "yarn logs --applicationId $application_XYZ\n"
                    + "\n"
                    + "# Spark on Yarn in Cluster Mode (for production) - Ingest into Solr:\n"
                    + "spark-submit \\\n"
                    + "  --master yarn \\\n"
                    + "  --deploy-mode cluster \\\n"
                    + "  --jars $myDependencyJarFiles \\\n"
                    + "  --executor-memory 500M \\\n"
                    + "  --class " + CrunchIndexerTool.class.getName() + " \\\n"
                    + "  --files src/test/resources/log4j.properties,src/test/resources/test-morphlines/loadSolrLine.conf \\\n"
                    + "  $myDriverJar \\\n"
                    + "  -D hadoop.tmp.dir=/tmp \\\n"
                    + "  -D morphlineVariable.ZK_HOST=$(hostname):2181/solr \\\n"
                    + "  --morphline-file loadSolrLine.conf \\\n"
                    + "  --pipeline-type spark \\\n"
                    + "  --chatty \\\n"
                    + "  --log4j log4j.properties \\\n"
                    + "  /user/systest/input/hello1.txt\n"
            );
            throw new FoundHelpArgument(); // Trick to prevent processing of any remaining arguments
          }
        });

    Argument mappersArg = miscArgGroup.addArgument("--mappers")
        .metavar("INTEGER")
        .type(Integer.class)
        .choices(new RangeArgumentChoice(-1, Integer.MAX_VALUE)) // TODO: also support X% syntax where X is an integer
        .setDefault(-1)
        .help("Tuning knob that indicates the maximum number of MR mapper tasks to use. -1 indicates use all map slots " +
            "available on the cluster. This parameter only applies to non-splittable input files");

    Argument dryRunArg = miscArgGroup.addArgument("--dry-run")
        .action(Arguments.storeTrue())
        .help("Run the pipeline but print documents to stdout instead of loading them into Solr. " +
              "This can be used for quicker turnaround during early trial & debug sessions.");

    Argument log4jConfigFileArg = miscArgGroup.addArgument("--log4j")
        .metavar("FILE")
        .type(new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead())
        .help("Relative or absolute path to a log4j.properties config file on the local file system. This file " +
            "will be uploaded to each remote task. Example: /path/to/log4j.properties");

    Argument verboseArg = miscArgGroup.addArgument("--chatty")
        .action(Arguments.storeTrue())
        .help("Turn on verbose output.");

    Namespace ns;
    try {
View Full Code Here

TOP

Related Classes of net.sourceforge.argparse4j.inf.ArgumentGroup

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.