Package com.google.gxp.compiler.cli

Examples of com.google.gxp.compiler.cli.GxpcFlags$CommandLine


      encodingOpt).withOption(analyzerOpt).withOption(defaultCatOpt).withOption(gramSizeOpt).withOption(
      typeOpt).withOption(dataSourceOpt).create();
   
    Parser parser = new Parser();
    parser.setGroup(options);
    CommandLine cmdLine = parser.parse(args);
   
    int gramSize = 1;
    if (cmdLine.hasOption(gramSizeOpt)) {
      gramSize = Integer.parseInt((String) cmdLine.getValue(gramSizeOpt));
     
    }
   
    BayesParameters params = new BayesParameters(gramSize);
   
    String modelBasePath = (String) cmdLine.getValue(pathOpt);
   
    log.info("Loading model from: {}", params.print());
   
    Algorithm algorithm;
    Datastore datastore;
   
    String classifierType = (String) cmdLine.getValue(typeOpt);
   
    String dataSource = (String) cmdLine.getValue(dataSourceOpt);
    if (dataSource.equals("hdfs")) {
      if (classifierType.equalsIgnoreCase("bayes")) {
        log.info("Using Bayes Classifier");
        algorithm = new BayesAlgorithm();
        datastore = new InMemoryBayesDatastore(params);
      } else if (classifierType.equalsIgnoreCase("cbayes")) {
        log.info("Using Complementary Bayes Classifier");
        algorithm = new CBayesAlgorithm();
        datastore = new InMemoryBayesDatastore(params);
      } else {
        throw new IllegalArgumentException("Unrecognized classifier type: " + classifierType);
      }
     
    } else if (dataSource.equals("hbase")) {
      if (classifierType.equalsIgnoreCase("bayes")) {
        log.info("Using Bayes Classifier");
        algorithm = new BayesAlgorithm();
        datastore = new HBaseBayesDatastore(modelBasePath, params);
      } else if (classifierType.equalsIgnoreCase("cbayes")) {
        log.info("Using Complementary Bayes Classifier");
        algorithm = new CBayesAlgorithm();
        datastore = new HBaseBayesDatastore(modelBasePath, params);
      } else {
        throw new IllegalArgumentException("Unrecognized classifier type: " + classifierType);
      }
     
    } else {
      throw new IllegalArgumentException("Unrecognized dataSource type: " + dataSource);
    }
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    String defaultCat = "unknown";
    if (cmdLine.hasOption(defaultCatOpt)) {
      defaultCat = (String) cmdLine.getValue(defaultCatOpt);
    }
    File docPath = new File((String) cmdLine.getValue(classifyOpt));
    String encoding = "UTF-8";
    if (cmdLine.hasOption(encodingOpt)) {
      encoding = (String) cmdLine.getValue(encodingOpt);
    }
    Analyzer analyzer = null;
    if (cmdLine.hasOption(analyzerOpt)) {
      String className = (String) cmdLine.getValue(analyzerOpt);
      analyzer = Class.forName(className).asSubclass(Analyzer.class).newInstance();
    }
    if (analyzer == null) {
      analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
    }
View Full Code Here


                                              .withOption(minEigenValOpt);
    Group group = gBuilder.create();

    Map<String,String> argMap = new HashMap<String,String>();

    CommandLine cmdLine;
    try {
      Parser parser = new Parser();
      parser.setGroup(group);
      cmdLine = parser.parse(args);
    } catch (OptionException e) {
      log.error(e.getMessage());
      CommandLineUtil.printHelp(group);
      return null;
    }
    if (cmdLine.hasOption(helpOpt)) {
      CommandLineUtil.printHelp(group);
      return argMap;
    }
    maybePut(argMap, cmdLine, eigenInputOpt, corpusInputOpt, helpOpt, outOpt, inMemOpt, errorOpt, minEigenValOpt);
    return argMap;
View Full Code Here

        .withOption(numReduceTasksOpt).withOption(maxNGramSizeOpt).withOption(overwriteOutput).withOption(
          helpOpt).withOption(sequentialAccessVectorOpt).create();
    try {
      Parser parser = new Parser();
      parser.setGroup(group);
      CommandLine cmdLine = parser.parse(args);
     
      if (cmdLine.hasOption(helpOpt)) {
        CommandLineUtil.printHelp(group);
        return;
      }
     
      String inputDir = (String) cmdLine.getValue(inputDirOpt);
      String outputDir = (String) cmdLine.getValue(outputDirOpt);
     
      int chunkSize = 100;
      if (cmdLine.hasOption(chunkSizeOpt)) {
        chunkSize = Integer.parseInt((String) cmdLine.getValue(chunkSizeOpt));
      }
      int minSupport = 2;
      if (cmdLine.hasOption(minSupportOpt)) {
        String minSupportString = (String) cmdLine.getValue(minSupportOpt);
        minSupport = Integer.parseInt(minSupportString);
      }
     
      int maxNGramSize = 1;
     
      if (cmdLine.hasOption(maxNGramSizeOpt)) {
        try {
          maxNGramSize = Integer.parseInt(cmdLine.getValue(maxNGramSizeOpt).toString());
        } catch (NumberFormatException ex) {
          log.warn("Could not parse ngram size option");
        }
      }
      log.info("Maximum n-gram size is: {}", maxNGramSize);
     
      if (cmdLine.hasOption(overwriteOutput)) {
        HadoopUtil.overwriteOutput(outputDir);
      }
     
      float minLLRValue = LLRReducer.DEFAULT_MIN_LLR;
      if (cmdLine.hasOption(minLLROpt)) {
        minLLRValue = Float.parseFloat(cmdLine.getValue(minLLROpt).toString());
      }
      log.info("Minimum LLR value: {}", minLLRValue);
     
      int reduceTasks = 1;
      if (cmdLine.hasOption(numReduceTasksOpt)) {
        reduceTasks = Integer.parseInt(cmdLine.getValue(numReduceTasksOpt).toString());
      }
      log.info("Pass1 reduce tasks: {}", reduceTasks);
     
      Class<? extends Analyzer> analyzerClass = StandardAnalyzer.class;
      if (cmdLine.hasOption(analyzerNameOpt)) {
        String className = cmdLine.getValue(analyzerNameOpt).toString();
        analyzerClass = (Class<? extends Analyzer>) Class.forName(className);
        // try instantiating it, b/c there isn't any point in setting it if
        // you can't instantiate it
        analyzerClass.newInstance();
      }
     
      boolean processIdf;
     
      if (cmdLine.hasOption(weightOpt)) {
        String wString = cmdLine.getValue(weightOpt).toString();
        if (wString.equalsIgnoreCase("tf")) {
          processIdf = false;
        } else if (wString.equalsIgnoreCase("tfidf")) {
          processIdf = true;
        } else {
          throw new OptionException(weightOpt);
        }
      } else {
        processIdf = true;
      }
     
      int minDf = 1;
      if (cmdLine.hasOption(minDFOpt)) {
        minDf = Integer.parseInt(cmdLine.getValue(minDFOpt).toString());
      }
      int maxDFPercent = 99;
      if (cmdLine.hasOption(maxDFPercentOpt)) {
        maxDFPercent = Integer.parseInt(cmdLine.getValue(maxDFPercentOpt).toString());
      }
     
      float norm = PartialVectorMerger.NO_NORMALIZING;
      if (cmdLine.hasOption(powerOpt)) {
        String power = cmdLine.getValue(powerOpt).toString();
        if (power.equals("INF")) {
          norm = Float.POSITIVE_INFINITY;
        } else {
          norm = Float.parseFloat(power);
        }
      }
      HadoopUtil.overwriteOutput(outputDir);
      String tokenizedPath = outputDir + DocumentProcessor.TOKENIZED_DOCUMENT_OUTPUT_FOLDER;
      DocumentProcessor.tokenizeDocuments(inputDir, analyzerClass, tokenizedPath);
     
      boolean sequentialAccessOutput = false;
      if (cmdLine.hasOption(sequentialAccessVectorOpt)) {
        sequentialAccessOutput = true;
      }
     
      DictionaryVectorizer.createTermFrequencyVectors(tokenizedPath, outputDir, minSupport, maxNGramSize,
        minLLRValue, reduceTasks, chunkSize, sequentialAccessOutput);
View Full Code Here

      gBuilder = gBuilder.withOption(opt);
    }
   
    Group group = gBuilder.create();
   
    CommandLine cmdLine;
    try {
      Parser parser = new Parser();
      parser.setGroup(group);
      cmdLine = parser.parse(args);
    } catch (OptionException e) {
      log.error(e.getMessage());
      CommandLineUtil.printHelp(group);
      return null;
    }
   
    if (cmdLine.hasOption(helpOpt)) {
      CommandLineUtil.printHelp(group);
      return null;
    }
   
    Map<String,String> result = new HashMap<String,String>();
View Full Code Here

        .create();
    try {
      Parser parser = new Parser();
     
      parser.setGroup(group);
      CommandLine cmdLine = parser.parse(args);
      if (cmdLine.hasOption(helpOpt)) {
        CommandLineUtil.printHelp(group);
        return;
      }
     
      String classifierType = (String) cmdLine.getValue(typeOpt);
      String dataSourceType = (String) cmdLine.getValue(dataSourceOpt);
     
      BayesParameters params = new BayesParameters(Integer.parseInt((String) cmdLine.getValue(gramSizeOpt)));
     
      String alphaI = "1.0";
      if (cmdLine.hasOption(alphaOpt)) {
        alphaI = (String) cmdLine.getValue(alphaOpt);
      }
     
      params.set("alpha_i", alphaI);
     
      if (dataSourceType.equals("hbase")) {
        params.set("dataSource", "hbase");
      } else {
        params.set("dataSource", "hdfs");
      }
     
      if (classifierType.equalsIgnoreCase("bayes")) {
        log.info("Training Bayes Classifier");
        trainNaiveBayes((String) cmdLine.getValue(inputDirOpt), (String) cmdLine
            .getValue(outputOpt), params);
       
      } else if (classifierType.equalsIgnoreCase("cbayes")) {
        log.info("Training Complementary Bayes Classifier");
        // setup the HDFS and copy the files there, then run the trainer
        trainCNaiveBayes((String) cmdLine.getValue(inputDirOpt), (String) cmdLine
            .getValue(outputOpt), params);
      }
    } catch (OptionException e) {
      log.error("Error while parsing options", e);
      CommandLineUtil.printHelp(group);
View Full Code Here

        .withOption(helpOpt).withOption(methodOpt).withOption(verboseOutputOpt).withOption(alphaOpt).create();
   
    try {
      Parser parser = new Parser();
      parser.setGroup(group);
      CommandLine cmdLine = parser.parse(args);
     
      if (cmdLine.hasOption(helpOpt)) {
        CommandLineUtil.printHelp(group);
        return;
      }
     
      int gramSize = 1;
      if (cmdLine.hasOption(gramSizeOpt)) {
        gramSize = Integer.parseInt((String) cmdLine.getValue(gramSizeOpt));
       
      }
      BayesParameters params = new BayesParameters(gramSize);
     
      String modelBasePath = (String) cmdLine.getValue(pathOpt);
     
      String classifierType = (String) cmdLine.getValue(typeOpt);
      String dataSource = (String) cmdLine.getValue(dataSourceOpt);
     
      String defaultCat = "unknown";
      if (cmdLine.hasOption(defaultCatOpt)) {
        defaultCat = (String) cmdLine.getValue(defaultCatOpt);
      }
     
      String encoding = "UTF-8";
      if (cmdLine.hasOption(encodingOpt)) {
        encoding = (String) cmdLine.getValue(encodingOpt);
      }
     
      String alphaI = "1.0";
      if (cmdLine.hasOption(alphaOpt)) {
        alphaI = (String) cmdLine.getValue(alphaOpt);
      }
     
      boolean verbose = cmdLine.hasOption(verboseOutputOpt);
     
      String testDirPath = (String) cmdLine.getValue(dirOpt);
     
      String classificationMethod = "sequential";
      if (cmdLine.hasOption(methodOpt)) {
        classificationMethod = (String) cmdLine.getValue(methodOpt);
      }
     
      params.set("verbose", Boolean.toString(verbose));
      params.set("basePath", modelBasePath);
      params.set("classifierType", classifierType);
View Full Code Here

        .create();
   
    try {
      Parser parser = new Parser();
      parser.setGroup(group);
      CommandLine cmdLine = parser.parse(args);
     
      if (cmdLine.hasOption(helpOpt)) {
        CommandLineUtil.printHelp(group);
        return;
      }
     
      String input = cmdLine.getValue(inputOpt).toString();
      String output = cmdLine.getValue(outputOpt).toString();
      String measureClass = SquaredEuclideanDistanceMeasure.class.getName();
      if (cmdLine.hasOption(measureClassOpt)) {
        measureClass = cmdLine.getValue(measureClassOpt).toString();
      }
     
      // Class<? extends Vector> vectorClass = cmdLine.hasOption(vectorClassOpt) == false ?
      // RandomAccessSparseVector.class
      // : (Class<? extends Vector>) Class.forName(cmdLine.getValue(vectorClassOpt).toString());
      double t1 = Double.parseDouble(cmdLine.getValue(t1Opt).toString());
      double t2 = Double.parseDouble(cmdLine.getValue(t2Opt).toString());
     
      runJob(input, output, measureClass, t1, t2);
    } catch (OptionException e) {
      log.error("Exception", e);
      CommandLineUtil.printHelp(group);
View Full Code Here

          mOpt).withOption(topicsOpt).withOption(helpOpt).withOption(numRedOpt).create();
   
    try {
      Parser parser = new Parser();
      parser.setGroup(group);
      CommandLine cmdLine = parser.parse(args);
      if (cmdLine.hasOption(helpOpt)) {
        CommandLineUtil.printHelp(group);
        return;
      }
     
      String input = cmdLine.getValue(inputOpt).toString();
      String output = cmdLine.getValue(outputOpt).toString();
      String modelFactory = "org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution";
      if (cmdLine.hasOption(modelOpt)) modelFactory = cmdLine.getValue(modelOpt).toString();
      String modelPrototype = "org.apache.mahout.math.RandomAccessSparseVector";
      if (cmdLine.hasOption(prototypeOpt)) modelPrototype = cmdLine.getValue(prototypeOpt).toString();
      int prototypeSize = Integer.parseInt(cmdLine.getValue(sizeOpt).toString());
      int numReducers = Integer.parseInt(cmdLine.getValue(numRedOpt).toString());
      int numModels = Integer.parseInt(cmdLine.getValue(topicsOpt).toString());
      int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString());
      double alpha_0 = Double.parseDouble(cmdLine.getValue(mOpt).toString());
      runJob(input, output, modelFactory, modelPrototype, prototypeSize, numModels, maxIterations, alpha_0,
        numReducers);
    } catch (OptionException e) {
      log.error("Exception parsing command line: ", e);
      CommandLineUtil.printHelp(group);
View Full Code Here

        .withOption(helpOpt).create();
   
    try {
      Parser parser = new Parser();
      parser.setGroup(group);
      CommandLine cmdLine = parser.parse(args);
      if (cmdLine.hasOption(helpOpt)) {
        CommandLineUtil.printHelp(group);
        return;
      }
     
      String measureClass = SquaredEuclideanDistanceMeasure.class.getName();
      if (cmdLine.hasOption(measureClassOpt)) {
        measureClass = cmdLine.getValue(measureClassOpt).toString();
      }
      String output = cmdLine.getValue(outputOpt).toString();
      String canopies = cmdLine.getValue(canopiesOpt).toString();
      String points = cmdLine.getValue(pointsOpt).toString();
      // Class<? extends Vector> vectorClass = cmdLine.hasOption(vectorClassOpt) == false ?
      // RandomAccessSparseVector.class
      // : (Class<? extends Vector>) Class.forName(cmdLine.getValue(vectorClassOpt).toString());
      double t1 = Double.parseDouble(cmdLine.getValue(t1Opt).toString());
      double t2 = Double.parseDouble(cmdLine.getValue(t2Opt).toString());
     
      runJob(points, canopies, output, measureClass, t1, t2);
     
    } catch (OptionException e) {
      log.error("Exception", e);
View Full Code Here

        .create();
   
    try {
      Parser parser = new Parser();
      parser.setGroup(group);
      CommandLine cmdLine = parser.parse(args);
     
      if (cmdLine.hasOption(helpOpt)) {
        CommandLineUtil.printHelp(group);
        return;
      }
     
      String input = cmdLine.getValue(inputOpt).toString();
      String output = cmdLine.getValue(outputOpt).toString();
      String measureClass = SquaredEuclideanDistanceMeasure.class.getName();
      if (cmdLine.hasOption(measureClassOpt)) {
        measureClass = cmdLine.getValue(measureClassOpt).toString();
      }
     
      // Class<? extends Vector> vectorClass = cmdLine.hasOption(vectorClassOpt) == false ?
      // RandomAccessSparseVector.class
      // : (Class<? extends Vector>) Class.forName(cmdLine.getValue(vectorClassOpt).toString());
      double t1 = Double.parseDouble(cmdLine.getValue(t1Opt).toString());
      double t2 = Double.parseDouble(cmdLine.getValue(t2Opt).toString());
     
      runJob(input, output, measureClass, t1, t2);
     
    } catch (OptionException e) {
      log.error("Exception", e);
View Full Code Here

TOP

Related Classes of com.google.gxp.compiler.cli.GxpcFlags$CommandLine

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.