Package org.apache.avro.file

Examples of org.apache.avro.file.SnappyCodec$Option


 
  public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();
    Option helpOpt = DefaultOptionCreator.helpOption();
   
    Option parentOpt = obuilder.withLongName("parent").withRequired(true).withArgument(
      abuilder.withName("parent").withMinimum(1).withMaximum(1).create()).withDescription(
      "Parent dir containing the newsgroups").withShortName("p").create();
   
    Option outputDirOpt = obuilder.withLongName("outputDir").withRequired(true).withArgument(
      abuilder.withName("outputDir").withMinimum(1).withMaximum(1).create()).withDescription(
      "The output directory").withShortName("o").create();
   
    Option analyzerNameOpt = obuilder.withLongName("analyzerName").withRequired(true).withArgument(
      abuilder.withName("analyzerName").withMinimum(1).withMaximum(1).create()).withDescription(
      "The class name of the analyzer").withShortName("a").create();
   
    Option charsetOpt = obuilder.withLongName("charset").withRequired(true).withArgument(
      abuilder.withName("charset").withMinimum(1).withMaximum(1).create()).withDescription(
      "The name of the character encoding of the input files").withShortName("c").create();
   
    Group group = gbuilder.withName("Options").withOption(analyzerNameOpt).withOption(charsetOpt).withOption(
      outputDirOpt).withOption(parentOpt).withOption(helpOpt).create();
View Full Code Here


  }

  private static boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create();
    Option scores = builder.withLongName("scores").withDescription("output score diagnostics during training").create();

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFile = builder.withLongName("input")
            .withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data")
            .create();

    Option outputFile = builder.withLongName("output")
            .withRequired(true)
            .withArgument(argumentBuilder.withName("output").withMaximum(1).create())
            .withDescription("where to get training data")
            .create();

    Option predictors = builder.withLongName("predictors")
            .withRequired(true)
            .withArgument(argumentBuilder.withName("p").create())
            .withDescription("a list of predictor variables")
            .create();

    Option types = builder.withLongName("types")
            .withRequired(true)
            .withArgument(argumentBuilder.withName("t").create())
            .withDescription("a list of predictor variable types (numeric, word, or text)")
            .create();

    Option target = builder.withLongName("target")
            .withRequired(true)
            .withArgument(argumentBuilder.withName("target").withMaximum(1).create())
            .withDescription("the name of the target variable")
            .create();

    Option features = builder.withLongName("features")
            .withArgument(
                    argumentBuilder.withName("numFeatures")
                            .withDefault("1000")
                            .withMaximum(1).create())
            .withDescription("the number of internal hashed features to use")
            .create();

    Option passes = builder.withLongName("passes")
            .withArgument(
                    argumentBuilder.withName("passes")
                            .withDefault("2")
                            .withMaximum(1).create())
            .withDescription("the number of times to pass over the input data")
            .create();

    Option lambda = builder.withLongName("lambda")
            .withArgument(argumentBuilder.withName("lambda").withDefault("1e-4").withMaximum(1).create())
            .withDescription("the amount of coefficient decay to use")
            .create();

    Option rate = builder.withLongName("rate")
            .withArgument(argumentBuilder.withName("learningRate").withDefault("1e-3").withMaximum(1).create())
            .withDescription("the learning rate")
            .create();

    Option noBias = builder.withLongName("noBias")
            .withDescription("don't include a bias term")
            .create();

    Option targetCategories = builder.withLongName("categories")
            .withRequired(true)
            .withArgument(argumentBuilder.withName("number").withMaximum(1).create())
            .withDescription("the number of target categories to be considered")
            .create();
View Full Code Here

  }

  private static boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create();

    Option auc = builder.withLongName("auc").withDescription("print AUC").create();
    Option confusion = builder.withLongName("confusion").withDescription("print confusion matrix").create();

    Option scores = builder.withLongName("scores").withDescription("print scores").create();

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFileOption = builder.withLongName("input")
            .withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data")
            .create();

    Option modelFileOption = builder.withLongName("model")
            .withRequired(true)
            .withArgument(argumentBuilder.withName("model").withMaximum(1).create())
            .withDescription("where to get a model")
            .create();
View Full Code Here

  public boolean parseArgs(String[] args) throws Exception {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();
    Option helpOpt = DefaultOptionCreator.helpOption();
   
    Option inputDirOpt = obuilder.withLongName("inputDir").withRequired(true).withArgument(
        abuilder.withName("inputDir").withMinimum(1).withMaximum(1).create()).withDescription(
        "The input directory").withShortName("i").create();
   
    Option trainingOutputDirOpt = obuilder.withLongName("trainingOutputDir").withRequired(true).withArgument(
        abuilder.withName("outputDir").withMinimum(1).withMaximum(1).create()).withDescription(
        "The training data output directory").withShortName("tr").create();
   
    Option testOutputDirOpt = obuilder.withLongName("testOutputDir").withRequired(true).withArgument(
        abuilder.withName("outputDir").withMinimum(1).withMaximum(1).create()).withDescription(
        "The test data output directory").withShortName("te").create();
   
    Option testSplitSizeOpt = obuilder.withLongName("testSplitSize").withRequired(false).withArgument(
        abuilder.withName("splitSize").withMinimum(1).withMaximum(1).create()).withDescription(
        "The number of documents held back as test data for each category").withShortName("ss").create();
   
    Option testSplitPctOpt = obuilder.withLongName("testSplitPct").withRequired(false).withArgument(
        abuilder.withName("splitPct").withMinimum(1).withMaximum(1).create()).withDescription(
        "The percentage of documents held back as test data for each category").withShortName("sp").create();
   
    Option splitLocationOpt = obuilder.withLongName("splitLocation").withRequired(false).withArgument(
        abuilder.withName("splitLoc").withMinimum(1).withMaximum(1).create()).withDescription(
        "Location for start of test data expressed as a percentage of the input file size (0=start, 50=middle, 100=end")
        .withShortName("sl").create();
   
    Option randomSelectionSizeOpt = obuilder.withLongName("randomSelectionSize").withRequired(false).withArgument(
        abuilder.withName("randomSize").withMinimum(1).withMaximum(1).create()).withDescription(
        "The number of itemr to be randomly selected as test data ").withShortName("rs").create();
   
    Option randomSelectionPctOpt = obuilder.withLongName("randomSelectionPct").withRequired(false).withArgument(
        abuilder.withName("randomPct").withMinimum(1).withMaximum(1).create()).withDescription(
        "Percentage of items to be randomly selected as test data ").withShortName("rp").create();
   
    Option charsetOpt = obuilder.withLongName("charset").withRequired(true).withArgument(
        abuilder.withName("charset").withMinimum(1).withMaximum(1).create()).withDescription(
        "The name of the character encoding of the input files").withShortName("c").create();
   
    Group group = gbuilder.withName("Options").withOption(inputDirOpt).withOption(trainingOutputDirOpt)
         .withOption(testOutputDirOpt).withOption(testSplitSizeOpt).withOption(testSplitPctOpt)
View Full Code Here

          }
          fileValidator.validate(values);
        }     
      };

    Option input   = createOption("input",
                                  "DFS input file(s) for the Map step",
                                  "path",
                                  Integer.MAX_VALUE,
                                  true)
   
    Option output  = createOption("output",
                                  "DFS output directory for the Reduce step",
                                  "path", 1, true);
    Option mapper  = createOption("mapper",
                                  "The streaming command to run", "cmd", 1, false);
    Option combiner = createOption("combiner",
                                   "The streaming command to run", "cmd", 1, false);
    // reducer could be NONE
    Option reducer = createOption("reducer",
                                  "The streaming command to run", "cmd", 1, false);
    Option file = createOption("file",
                               "File/dir to be shipped in the Job jar file",
                               "file", Integer.MAX_VALUE, false, execValidator);
    Option dfs = createOption("dfs",
                              "Optional. Override DFS configuration", "<h:p>|local", 1, false);
    Option jt = createOption("jt",
                             "Optional. Override JobTracker configuration", "<h:p>|local", 1, false);
    Option additionalconfspec = createOption("additionalconfspec",
                                             "Optional.", "spec", 1, false);
    Option inputformat = createOption("inputformat",
                                      "Optional.", "spec", 1, false);
    Option outputformat = createOption("outputformat",
                                       "Optional.", "spec", 1, false);
    Option partitioner = createOption("partitioner",
                                      "Optional.", "spec", 1, false);
    Option numReduceTasks = createOption("numReduceTasks",
        "Optional.", "spec",1, false );
    Option inputreader = createOption("inputreader",
                                      "Optional.", "spec", 1, false);
    Option mapDebug = createOption("mapdebug",
                                   "Optional.", "spec", 1, false);
    Option reduceDebug = createOption("reducedebug",
                                      "Optional", "spec",1, false);
    Option cacheFile = createOption("cacheFile",
                                    "File name URI", "fileNameURI", Integer.MAX_VALUE, false);
    Option cacheArchive = createOption("cacheArchive",
                                       "File name URI", "fileNameURI", Integer.MAX_VALUE, false);
   
    // boolean properties
   
    Option verbose = createBoolOption("verbose", "print verbose output");
    Option info = createBoolOption("info", "print verbose output");
    Option help = createBoolOption("help", "print this help message");
    Option debug = createBoolOption("debug", "print debug output");
    Option inputtagged = createBoolOption("inputtagged", "inputtagged");
   
    allOptions = new GroupBuilder().
      withOption(input).
      withOption(output).
      withOption(mapper).
View Full Code Here

  public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = DefaultOptionCreator.inputOption().create();

    Option outputDirOpt = DefaultOptionCreator.outputOption().create();

    Option chunkSizeOpt = obuilder.withLongName("chunkSize").withArgument(
            abuilder.withName("chunkSize").withMinimum(1).withMaximum(1).create()).withDescription(
            "The chunkSize in MegaBytes. Defaults to 64").withShortName("chunk").create();

    Option keyPrefixOpt = obuilder.withLongName("keyPrefix").withArgument(
            abuilder.withName("keyPrefix").withMinimum(1).withMaximum(1).create()).withDescription(
            "The prefix to be prepended to the key").withShortName("prefix").create();
    Option charsetOpt = obuilder.withLongName("charset").withRequired(true).withArgument(
            abuilder.withName("charset").withMinimum(1).withMaximum(1).create()).withDescription(
            "The name of the character encoding of the input files").withShortName("c").create();
    Option subjectOpt = obuilder.withLongName("subject").withRequired(false).
            withDescription("Include the Mail subject as part of the text.  Default is false").withShortName("s").create();
    Option toOpt = obuilder.withLongName("to").withRequired(false).
            withDescription("Include the to field in the text.  Default is false").withShortName("to").create();
    Option fromOpt = obuilder.withLongName("from").withRequired(false).
            withDescription("Include the from field in the text.  Default is false").withShortName("from").create();
    Option refsOpt = obuilder.withLongName("references").withRequired(false).
            withDescription("Include the references field in the text.  Default is false").withShortName("refs").create();
    Option bodyOpt = obuilder.withLongName("body").withRequired(false).
            withDescription("Include the body in the output.  Default is false").withShortName("b").create();
    Option quotedOpt = obuilder.withLongName("stripQuoted").withRequired(false).
            withDescription("Strip (remove) quoted email text in the body.  Default is false").withShortName("q").create();
    Option quotedRegexOpt = obuilder.withLongName("quotedRegex").withRequired(false).withArgument(abuilder.withName("regex").withMinimum(1).withMaximum(1).create())
            .withDescription("Specify the regex that identifies quoted text.  Default is to look for > or | at the beginning of the line.").withShortName("q").create();
    Option separatorOpt = obuilder.withLongName("separator").withRequired(false).withArgument(
            abuilder.withName("separator").withMinimum(1).withMaximum(1).create()).
            withDescription("The separator to use between metadata items (to, from, etc.).  Default is \\n").withShortName("sep").create();

    Option bodySeparatorOpt = obuilder.withLongName("bodySeparator").withRequired(false).withArgument(
            abuilder.withName("bodySeparator").withMinimum(1).withMaximum(1).create()).
            withDescription("The separator to use between lines in the body.  Default is \\n.  Useful to change if you wish to have the message be on one line").withShortName("bodySep").create();
    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = gbuilder.withName("Options").withOption(keyPrefixOpt).withOption(chunkSizeOpt).withOption(
            charsetOpt).withOption(outputDirOpt).withOption(helpOpt).withOption(inputOpt).withOption(subjectOpt).withOption(toOpt)
            .withOption(fromOpt).withOption(bodyOpt).withOption(quotedOpt).withOption(refsOpt).withOption(bodySeparatorOpt)
            .withOption(quotedRegexOpt)
View Full Code Here

  public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();
   
    Option inputOpt = DefaultOptionCreator.inputOption().withRequired(false).create();
    Option outputOpt = DefaultOptionCreator.outputOption().withRequired(false).create();
    Option vectorOpt = obuilder.withLongName("vector").withRequired(false).withArgument(
      abuilder.withName("v").withMinimum(1).withMaximum(1).create()).withDescription(
      "The vector implementation to use.").withShortName("v").create();
   
    Option helpOpt = DefaultOptionCreator.helpOption();
   
    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(
      vectorOpt).withOption(helpOpt).create();

    try {
View Full Code Here

  public int run(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();
   
    Option inputDirOpt = DefaultOptionCreator.inputOption().create();
   
    Option outputDirOpt = DefaultOptionCreator.outputOption().create();
   
    Option minSupportOpt = obuilder.withLongName("minSupport").withArgument(
      abuilder.withName("minSupport").withMinimum(1).withMaximum(1).create()).withDescription(
      "(Optional) Minimum Support. Default Value: 2").withShortName("s").create();
   
    Option analyzerNameOpt = obuilder.withLongName("analyzerName").withArgument(
      abuilder.withName("analyzerName").withMinimum(1).withMaximum(1).create()).withDescription(
      "The class name of the analyzer").withShortName("a").create();
   
    Option chunkSizeOpt = obuilder.withLongName("chunkSize").withArgument(
      abuilder.withName("chunkSize").withMinimum(1).withMaximum(1).create()).withDescription(
      "The chunkSize in MegaBytes. 100-10000 MB").withShortName("chunk").create();
   
    Option weightOpt = obuilder.withLongName("weight").withRequired(false).withArgument(
      abuilder.withName("weight").withMinimum(1).withMaximum(1).create()).withDescription(
      "The kind of weight to use. Currently TF or TFIDF").withShortName("wt").create();
   
    Option minDFOpt = obuilder.withLongName("minDF").withRequired(false).withArgument(
      abuilder.withName("minDF").withMinimum(1).withMaximum(1).create()).withDescription(
      "The minimum document frequency.  Default is 1").withShortName("md").create();

    Option maxDFPercentOpt = obuilder.withLongName("maxDFPercent").withRequired(false).withArgument(
      abuilder.withName("maxDFPercent").withMinimum(1).withMaximum(1).create()).withDescription(
      "The max percentage of docs for the DF.  Can be used to remove really high frequency terms."
          + " Expressed as an integer between 0 and 100. Default is 99.  If maxDFSigma is also set, it will override this value.").withShortName("x").create();

    Option maxDFSigmaOpt = obuilder.withLongName("maxDFSigma").withRequired(false).withArgument(
      abuilder.withName("maxDFSigma").withMinimum(1).withMaximum(1).create()).withDescription(
      "What portion of the tf (tf-idf) vectors to be used, expressed in times the standard deviation (sigma) of the document frequencies of these vectors." +
              "  Can be used to remove really high frequency terms."
          + " Expressed as a double value. Good value to be specified is 3.0. In case the value is less then 0 no vectors " +
              "will be filtered out. Default is -1.0.  Overrides maxDFPercent").withShortName("xs").create();
   
    Option minLLROpt = obuilder.withLongName("minLLR").withRequired(false).withArgument(
      abuilder.withName("minLLR").withMinimum(1).withMaximum(1).create()).withDescription(
      "(Optional)The minimum Log Likelihood Ratio(Float)  Default is " + LLRReducer.DEFAULT_MIN_LLR)
        .withShortName("ml").create();
   
    Option numReduceTasksOpt = obuilder.withLongName("numReducers").withArgument(
      abuilder.withName("numReducers").withMinimum(1).withMaximum(1).create()).withDescription(
      "(Optional) Number of reduce tasks. Default Value: 1").withShortName("nr").create();
   
    Option powerOpt = obuilder.withLongName("norm").withRequired(false).withArgument(
      abuilder.withName("norm").withMinimum(1).withMaximum(1).create()).withDescription(
      "The norm to use, expressed as either a float or \"INF\" if you want to use the Infinite norm.  "
          + "Must be greater or equal to 0.  The default is not to normalize").withShortName("n").create();
   
    Option logNormalizeOpt = obuilder.withLongName("logNormalize").withRequired(false)
    .withDescription(
      "(Optional) Whether output vectors should be logNormalize. If set true else false")
    .withShortName("lnorm").create();
   
    Option maxNGramSizeOpt = obuilder.withLongName("maxNGramSize").withRequired(false).withArgument(
      abuilder.withName("ngramSize").withMinimum(1).withMaximum(1).create())
        .withDescription(
          "(Optional) The maximum size of ngrams to create"
              + " (2 = bigrams, 3 = trigrams, etc) Default Value:1").withShortName("ng").create();
   
    Option sequentialAccessVectorOpt = obuilder.withLongName("sequentialAccessVector").withRequired(false)
        .withDescription(
          "(Optional) Whether output vectors should be SequentialAccessVectors. If set true else false")
        .withShortName("seq").create();
   
    Option namedVectorOpt = obuilder.withLongName("namedVector").withRequired(false)
    .withDescription(
      "(Optional) Whether output vectors should be NamedVectors. If set true else false")
    .withShortName("nv").create();
   
    Option overwriteOutput = obuilder.withLongName("overwrite").withRequired(false).withDescription(
      "If set, overwrite the output directory").withShortName("ow").create();
    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
        .create();
   
    Group group = gbuilder.withName("Options").withOption(minSupportOpt).withOption(analyzerNameOpt)
        .withOption(chunkSizeOpt).withOption(outputDirOpt).withOption(inputDirOpt).withOption(minDFOpt)
        .withOption(maxDFSigmaOpt).withOption(maxDFPercentOpt).withOption(weightOpt).withOption(powerOpt).withOption(minLLROpt)
View Full Code Here

   *
   *
   */
  public Map<String, String> parseArguments(String[] args) throws IOException {

    Option helpOpt = addOption(DefaultOptionCreator.helpOption());
    addOption("tempDir", null, "Intermediate output directory", "temp");
    addOption("startPhase", null, "First phase to run", "0");
    addOption("endPhase", null, "Last phase to run", String.valueOf(Integer.MAX_VALUE));

    GroupBuilder gBuilder = new GroupBuilder().withName("Job-Specific Options:");
View Full Code Here

   
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();
   
    Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true)
        .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
        .withDescription("Data path").create();
   
    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
        .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create())
        .withDescription("Dataset path").create();
   
    Option selectionOpt = obuilder.withLongName("selection").withShortName("sl").withRequired(false)
        .withArgument(abuilder.withName("m").withMinimum(1).withMaximum(1).create())
        .withDescription("Optional, Number of variables to select randomly at each tree-node.\n" +
        "For classification problem, the default is square root of the number of explanatory variables.\n" +
        "For regression problem, the default is 1/3 of the number of explanatory variables.").create();

    Option noCompleteOpt = obuilder.withLongName("no-complete").withShortName("nc").withRequired(false)
        .withDescription("Optional, The tree is not complemented").create();

    Option minSplitOpt = obuilder.withLongName("minsplit").withShortName("ms").withRequired(false)
        .withArgument(abuilder.withName("minsplit").withMinimum(1).withMaximum(1).create())
        .withDescription("Optional, The tree-node is not divided, if the branching data size is " +
        "smaller than this value.\nThe default is 2.").create();

    Option minPropOpt = obuilder.withLongName("minprop").withShortName("mp").withRequired(false)
        .withArgument(abuilder.withName("minprop").withMinimum(1).withMaximum(1).create())
        .withDescription("Optional, The tree-node is not divided, if the proportion of the " +
        "variance of branching data is smaller than this value.\n" +
        "In the case of a regression problem, this value is used. " +
        "The default is 1/1000(0.001).").create();

    Option seedOpt = obuilder.withLongName("seed").withShortName("sd").withRequired(false)
        .withArgument(abuilder.withName("seed").withMinimum(1).withMaximum(1).create())
        .withDescription("Optional, seed value used to initialise the Random number generator").create();
   
    Option partialOpt = obuilder.withLongName("partial").withShortName("p").withRequired(false)
        .withDescription("Optional, use the Partial Data implementation").create();
   
    Option nbtreesOpt = obuilder.withLongName("nbtrees").withShortName("t").withRequired(true)
        .withArgument(abuilder.withName("nbtrees").withMinimum(1).withMaximum(1).create())
        .withDescription("Number of trees to grow").create();
   
    Option outputOpt = obuilder.withLongName("output").withShortName("o").withRequired(true)
        .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
        .withDescription("Output path, will contain the Decision Forest").create();

    Option helpOpt = obuilder.withLongName("help").withShortName("h")
        .withDescription("Print out help").create();
   
    Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt)
        .withOption(selectionOpt).withOption(noCompleteOpt).withOption(minSplitOpt)
        .withOption(minPropOpt).withOption(seedOpt).withOption(partialOpt).withOption(nbtreesOpt)
View Full Code Here

TOP

Related Classes of org.apache.avro.file.SnappyCodec$Option

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.