Package org.nebulaframework.benchmark.scimark2

Examples of org.nebulaframework.benchmark.scimark2.commandline


        .withOption(dictOutOpt).withOption(powerOpt).withOption(outWriterOpt).withOption(maxDFPercentOpt)
        .withOption(weightOpt).withOption(minDFOpt).create();
    try {
      Parser parser = new Parser();
      parser.setGroup(group);
      CommandLine cmdLine = parser.parse(args);
     
      if (cmdLine.hasOption(helpOpt)) {
       
        CommandLineUtil.printHelp(group);
        return;
      }
      // Springify all this
      if (cmdLine.hasOption(inputOpt)) { // Lucene case
        File file = new File(cmdLine.getValue(inputOpt).toString());
        if (file.exists() && file.isDirectory()) {
          long maxDocs = Long.MAX_VALUE;
          if (cmdLine.hasOption(maxOpt)) {
            maxDocs = Long.parseLong(cmdLine.getValue(maxOpt).toString());
          }
          if (maxDocs < 0) {
            throw new IllegalArgumentException("maxDocs must be >= 0");
          }
          Directory dir = FSDirectory.open(file);
          IndexReader reader = IndexReader.open(dir, true);
          Weight weight;
          if (cmdLine.hasOption(weightOpt)) {
            String wString = cmdLine.getValue(weightOpt).toString();
            if (wString.equalsIgnoreCase("tf")) {
              weight = new TF();
            } else if (wString.equalsIgnoreCase("tfidf")) {
              weight = new TFIDF();
            } else {
              throw new OptionException(weightOpt);
            }
          } else {
            weight = new TFIDF();
          }
          String field = cmdLine.getValue(fieldOpt).toString();
          int minDf = 1;
          if (cmdLine.hasOption(minDFOpt)) {
            minDf = Integer.parseInt(cmdLine.getValue(minDFOpt).toString());
          }
          int maxDFPercent = 99;
          if (cmdLine.hasOption(maxDFPercentOpt)) {
            maxDFPercent = Integer.parseInt(cmdLine.getValue(maxDFPercentOpt).toString());
          }
          TermInfo termInfo = new CachedTermInfo(reader, field, minDf, maxDFPercent);
          VectorMapper mapper = new TFDFMapper(reader, weight, termInfo);
          double norm = LuceneIterable.NO_NORMALIZING;
          if (cmdLine.hasOption(powerOpt)) {
            String power = cmdLine.getValue(powerOpt).toString();
            if (power.equals("INF")) {
              norm = Double.POSITIVE_INFINITY;
            } else {
              norm = Double.parseDouble(power);
            }
          }
          String idField = null;
          if (cmdLine.hasOption(idFieldOpt)) {
            idField = cmdLine.getValue(idFieldOpt).toString();
          }
          LuceneIterable iterable;
          if (norm == LuceneIterable.NO_NORMALIZING) {
            iterable = new LuceneIterable(reader, idField, field, mapper, LuceneIterable.NO_NORMALIZING);
          } else {
            iterable = new LuceneIterable(reader, idField, field, mapper, norm);
          }
          String outFile = cmdLine.getValue(outputOpt).toString();
          log.info("Output File: {}", outFile);
         
          VectorWriter vectorWriter;
          if (cmdLine.hasOption(outWriterOpt)) {
            String outWriter = cmdLine.getValue(outWriterOpt).toString();
            if (outWriter.equals("file")) {
              BufferedWriter writer = new BufferedWriter(new FileWriter(outFile));
              vectorWriter = new JWriterVectorWriter(writer);
            } else {
              vectorWriter = getSeqFileWriter(outFile);
            }
          } else {
            vectorWriter = getSeqFileWriter(outFile);
          }
         
          long numDocs = vectorWriter.write(iterable, maxDocs);
          vectorWriter.close();
          log.info("Wrote: {} vectors", numDocs);
         
          String delimiter = cmdLine.hasOption(delimiterOpt) ? cmdLine.getValue(delimiterOpt).toString()
              : "\t";
          File dictOutFile = new File(cmdLine.getValue(dictOutOpt).toString());
          log.info("Dictionary Output file: {}", dictOutFile);
          BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
              new FileOutputStream(dictOutFile), Charset.forName("UTF8")));
          JWriterTermInfoWriter tiWriter = new JWriterTermInfoWriter(writer, delimiter, field);
          tiWriter.write(termInfo);
View Full Code Here


      charsetOpt).withOption(outputDirOpt).withOption(helpOpt).withOption(parentOpt).create();
   
    try {
      Parser parser = new Parser();
      parser.setGroup(group);
      CommandLine cmdLine = parser.parse(args);
      if (cmdLine.hasOption(helpOpt)) {
        CommandLineUtil.printHelp(group);
        return;
      }
      File parentDir = new File((String) cmdLine.getValue(parentOpt));
      String outputDir = (String) cmdLine.getValue(outputDirOpt);
     
      int chunkSize = 64;
      if (cmdLine.hasOption(chunkSizeOpt)) {
        chunkSize = Integer.parseInt((String) cmdLine.getValue(chunkSizeOpt));
      }
     
      String prefix = "";
      if (cmdLine.hasOption(keyPrefixOpt)) {
        prefix = (String) cmdLine.getValue(keyPrefixOpt);
      }
      Charset charset = Charset.forName((String) cmdLine.getValue(charsetOpt));
      SequenceFilesFromDirectory dir = new SequenceFilesFromDirectory();
     
      dir.createSequenceFiles(parentDir, outputDir, prefix, chunkSize, charset);
    } catch (OptionException e) {
      log.error("Exception", e);
View Full Code Here

        .withOption(helpOpt).create();
   
    try {
      Parser parser = new Parser();
      parser.setGroup(group);
      CommandLine cmdLine = parser.parse(args);
     
      if (cmdLine.hasOption("help")) {
        CommandLineUtil.printHelp(group);
        return -1;
      }
     
      isPartial = cmdLine.hasOption(partialOpt);
      isOob = cmdLine.hasOption(oobOpt);
      String dataName = cmdLine.getValue(dataOpt).toString();
      String datasetName = cmdLine.getValue(datasetOpt).toString();
      m = Integer.parseInt(cmdLine.getValue(selectionOpt).toString());
      nbTrees = Integer.parseInt(cmdLine.getValue(nbtreesOpt).toString());
     
      if (cmdLine.hasOption(seedOpt)) {
        seed = Long.valueOf(cmdLine.getValue(seedOpt).toString());
      }
     
      log.debug("data : {}", dataName);
      log.debug("dataset : {}", datasetName);
      log.debug("m : {}", m);
View Full Code Here

        .withOption(helpOpt).create();
   
    try {
      Parser parser = new Parser();
      parser.setGroup(group);
      CommandLine cmdLine = parser.parse(args);
     
      if (cmdLine.hasOption("help")) {
        CommandLineUtil.printHelp(group);
        return -1;
      }
     
      isPartial = cmdLine.hasOption(partialOpt);
      isOob = cmdLine.hasOption(oobOpt);
      String dataName = cmdLine.getValue(dataOpt).toString();
      String datasetName = cmdLine.getValue(datasetOpt).toString();
      m = Integer.parseInt(cmdLine.getValue(selectionOpt).toString());
      nbTrees = Integer.parseInt(cmdLine.getValue(nbtreesOpt).toString());
     
      if (cmdLine.hasOption(seedOpt)) {
        seed = Long.valueOf(cmdLine.getValue(seedOpt).toString());
      }
     
      log.debug("data : {}", dataName);
      log.debug("dataset : {}", datasetName);
      log.debug("m : {}", m);
View Full Code Here

          unigramOpt).withOption(helpOpt).create();
   
    try {
      Parser parser = new Parser();
      parser.setGroup(group);
      CommandLine cmdLine = parser.parse(args);
     
      if (cmdLine.hasOption(helpOpt)) {
        CommandLineUtil.printHelp(group);
        return 1;
      }
     
      String input = cmdLine.getValue(inputOpt).toString();
      String output = cmdLine.getValue(outputOpt).toString();
     
      int maxNGramSize = DEFAULT_MAX_NGRAM_SIZE;
     
      if (cmdLine.hasOption(maxNGramSizeOpt)) {
        try {
          maxNGramSize = Integer.parseInt(cmdLine.getValue(maxNGramSizeOpt).toString());
        } catch (NumberFormatException ex) {
          log.warn("Could not parse ngram size option");
        }
      }
      log.info("Maximum n-gram size is: {}", maxNGramSize);
     
      if (cmdLine.hasOption(overwriteOutput)) {
        HadoopUtil.overwriteOutput(output);
      }
     
      int minSupport = CollocReducer.DEFAULT_MIN_SUPPORT;
      if (cmdLine.hasOption(minSupportOpt)) {
        minSupport = Integer.parseInt(cmdLine.getValue(minSupportOpt).toString());
      }
      log.info("Minimum Support value: {}", minSupport);
     
      float minLLRValue = LLRReducer.DEFAULT_MIN_LLR;
      if (cmdLine.hasOption(minLLROpt)) {
        minLLRValue = Float.parseFloat(cmdLine.getValue(minLLROpt).toString());
      }
      log.info("Minimum LLR value: {}", minLLRValue);
     
      int reduceTasks = DEFAULT_PASS1_NUM_REDUCE_TASKS;
      if (cmdLine.hasOption(numReduceTasksOpt)) {
        reduceTasks = Integer.parseInt(cmdLine.getValue(numReduceTasksOpt).toString());
      }
      log.info("Number of pass1 reduce tasks: {}", reduceTasks);
     
      boolean emitUnigrams = cmdLine.hasOption(unigramOpt);
     
      if (cmdLine.hasOption(preprocessOpt)) {
        log.info("Input will be preprocessed");
       
        Class<? extends Analyzer> analyzerClass = StandardAnalyzer.class;
        if (cmdLine.hasOption(analyzerNameOpt)) {
          String className = cmdLine.getValue(analyzerNameOpt).toString();
          analyzerClass = Class.forName(className).asSubclass(Analyzer.class);
          // try instantiating it, b/c there isn't any point in setting it if
          // you can't instantiate it
          analyzerClass.newInstance();
        }
View Full Code Here

        .withOption(redOpt).withOption(helpOpt).create();
   
    try {
      Parser parser = new Parser();
      parser.setGroup(group);
      CommandLine cmdLine = parser.parse(args);
      if (cmdLine.hasOption(helpOpt)) {
        CommandLineUtil.printHelp(group);
        return;
      }
     
      String input = cmdLine.getValue(inputOpt, "testdata").toString();
      String output = cmdLine.getValue(outputOpt, "output").toString();
      String modelFactory = cmdLine.getValue(modelOpt,
        "org.apache.mahout.clustering.syntheticcontrol.dirichlet.NormalScModelDistribution").toString();
      int numModels = Integer.parseInt(cmdLine.getValue(topicsOpt, "10").toString());
      int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt, "5").toString());
      double alpha_0 = Double.parseDouble(cmdLine.getValue(mOpt, "1.0").toString());
      int numReducers = Integer.parseInt(cmdLine.getValue(redOpt, "1").toString());
      String vectorClassName = cmdLine.getValue(vectorOpt, "org.apache.mahout.math.RandomAccessSparseVector")
          .toString();
      runJob(input, output, modelFactory, numModels, maxIterations, alpha_0, numReducers,
            vectorClassName);
    } catch (OptionException e) {
      log.error("Exception parsing command line: ", e);
View Full Code Here

      outputDirOpt).withOption(parentOpt).withOption(helpOpt).create();
    try {
     
      Parser parser = new Parser();
      parser.setGroup(group);
      CommandLine cmdLine = parser.parse(args);
     
      if (cmdLine.hasOption(helpOpt)) {
        CommandLineUtil.printHelp(group);
        return;
      }
     
      File parentDir = new File((String) cmdLine.getValue(parentOpt));
      File outputDir = new File((String) cmdLine.getValue(outputDirOpt));
      String analyzerName = (String) cmdLine.getValue(analyzerNameOpt);
      Charset charset = Charset.forName((String) cmdLine.getValue(charsetOpt));
      Analyzer analyzer = (Analyzer) Class.forName(analyzerName).newInstance();
      // parent dir contains dir by category
      File[] categoryDirs = parentDir.listFiles();
      for (File dir : categoryDirs) {
        if (dir.isDirectory()) {
View Full Code Here

      helpOpt).create();
   
    Parser parser = new Parser();
    parser.setGroup(group);
    try {
      CommandLine cmdLine = parser.parse(args);
     
      if (cmdLine.hasOption(helpOpt)) {
        CommandLineUtil.printHelp(group);
        return;
      }
     
      String input = cmdLine.getValue(inputOpt).toString();
     
      Path inpath = new Path(input);
      FileSystem fs = FileSystem.get(inpath.toUri(), new Configuration());
     
      log.info("Loading Descriptors...");
View Full Code Here

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(
      helpOpt).create();
   
    Parser parser = new Parser();
    parser.setGroup(group);
    CommandLine cmdLine = parser.parse(args);
   
    if (cmdLine.hasOption(helpOpt)) {
      CommandLineUtil.printHelp(group);
      return null;
    }
   
    String prefsFile = cmdLine.getValue(inputOpt).toString();
    return new File(prefsFile);
  }
View Full Code Here

        .withOption(numVectorsOpt).withOption(loopOpt).withOption(numOpsOpt).withOption(helpOpt).create();
   
    try {
      Parser parser = new Parser();
      parser.setGroup(group);
      CommandLine cmdLine = parser.parse(args);
     
      if (cmdLine.hasOption(helpOpt)) {
        CommandLineUtil.printHelp(group);
        return;
      }
     
      int cardinality = 1000;
      if (cmdLine.hasOption(vectorSizeOpt)) {
        cardinality = Integer.parseInt((String) cmdLine.getValue(vectorSizeOpt));
       
      }

      int sparsity = 1000;
      if (cmdLine.hasOption(vectorSparsityOpt)) {
        sparsity = Integer.parseInt((String) cmdLine.getValue(vectorSparsityOpt));
      }

      int numVectors = 100;
      if (cmdLine.hasOption(numVectorsOpt)) {
        numVectors = Integer.parseInt((String) cmdLine.getValue(numVectorsOpt));
       
      }
      int loop = 600;
      if (cmdLine.hasOption(loopOpt)) {
        loop = Integer.parseInt((String) cmdLine.getValue(loopOpt));
       
      }
      int numOps = 10;
      if (cmdLine.hasOption(numOpsOpt)) {
        numOps = Integer.parseInt((String) cmdLine.getValue(numOpsOpt));
       
      }
      VectorBenchmarks mark = new VectorBenchmarks(cardinality, sparsity, numVectors, loop, numOps);
      mark.createBenchmark();
      mark.incrementalCreateBenchmark();
View Full Code Here

TOP

Related Classes of org.nebulaframework.benchmark.scimark2.commandline

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.