Package SevenZip

Examples of SevenZip.LzmaAlone$CommandLine


      outputDirOpt).withOption(parentOpt).withOption(helpOpt).create();
    try {
     
      Parser parser = new Parser();
      parser.setGroup(group);
      CommandLine cmdLine = parser.parse(args);
     
      if (cmdLine.hasOption(helpOpt)) {
        CommandLineUtil.printHelp(group);
        return;
      }
     
      File parentDir = new File((String) cmdLine.getValue(parentOpt));
      File outputDir = new File((String) cmdLine.getValue(outputDirOpt));
      String analyzerName = (String) cmdLine.getValue(analyzerNameOpt);
      Charset charset = Charset.forName((String) cmdLine.getValue(charsetOpt));
      Analyzer analyzer = ClassUtils.instantiateAs(analyzerName, Analyzer.class);
      // parent dir contains dir by category
      if (!parentDir.exists()) {
        throw new FileNotFoundException("Can't find input directory " + parentDir);
      }
View Full Code Here


    Group group = gbuilder.withName("Options").withOption(dumpFileOpt).withOption(outputDirOpt).withOption(
      chunkSizeOpt).withOption(numChunksOpt).withOption(s3IdOpt).withOption(s3SecretOpt).create();
   
    Parser parser = new Parser();
    parser.setGroup(group);
    CommandLine cmdLine;
    try {
      cmdLine = parser.parse(args);
    } catch (OptionException e) {
      log.error("Error while parsing options", e);
      CommandLineUtil.printHelp(group);
      return;
    }
   
    Configuration conf = new Configuration();
    String dumpFilePath = (String) cmdLine.getValue(dumpFileOpt);
    String outputDirPath = (String) cmdLine.getValue(outputDirOpt);
   
    if (cmdLine.hasOption(s3IdOpt)) {
      String id = (String) cmdLine.getValue(s3IdOpt);
      conf.set("fs.s3n.awsAccessKeyId", id);
      conf.set("fs.s3.awsAccessKeyId", id);
    }
    if (cmdLine.hasOption(s3SecretOpt)) {
      String secret = (String) cmdLine.getValue(s3SecretOpt);
      conf.set("fs.s3n.awsSecretAccessKey", secret);
      conf.set("fs.s3.awsSecretAccessKey", secret);
    }
    // do not compute crc file when using local FS
    conf.set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem");
    FileSystem fs = FileSystem.get(URI.create(outputDirPath), conf);
   
    int chunkSize = 1024 * 1024 * Integer.parseInt((String) cmdLine.getValue(chunkSizeOpt));
   
    int numChunks = Integer.MAX_VALUE;
    if (cmdLine.hasOption(numChunksOpt)) {
      numChunks = Integer.parseInt((String) cmdLine.getValue(numChunksOpt));
    }
   
    String header = "<mediawiki xmlns=\"http://www.mediawiki.org/xml/export-0.3/\" "
                    + "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
                    + "xsi:schemaLocation=\"http://www.mediawiki.org/xml/export-0.3/ "
View Full Code Here

    Group group = gbuilder.withName("Options").withOption(dictOpt).withOption(outOpt).withOption(wordOpt)
        .withOption(inputOpt).withOption(dictTypeOpt).create();
    try {
      Parser parser = new Parser();
      parser.setGroup(group);
      CommandLine cmdLine = parser.parse(args);
     
      if (cmdLine.hasOption(helpOpt)) {
        CommandLineUtil.printHelp(group);
        return;
      }
     
      String input = cmdLine.getValue(inputOpt).toString();
      String dictFile = cmdLine.getValue(dictOpt).toString();
      int numWords = 20;
      if (cmdLine.hasOption(wordOpt)) {
        numWords = Integer.parseInt(cmdLine.getValue(wordOpt).toString());
      }
      Configuration config = new Configuration();
     
      String dictionaryType = "text";
      if (cmdLine.hasOption(dictTypeOpt)) {
        dictionaryType = cmdLine.getValue(dictTypeOpt).toString();
      }
     
      List<String> wordList;
      if ("text".equals(dictionaryType)) {
        wordList = Arrays.asList(VectorHelper.loadTermDictionary(new File(dictFile)));
      } else if ("sequencefile".equals(dictionaryType)) {
        wordList = Arrays.asList(VectorHelper.loadTermDictionary(config, dictFile));
      } else {
        throw new IllegalArgumentException("Invalid dictionary format");
      }
     
      List<Queue<Pair<String,Double>>> topWords = topWordsForTopics(input, config, wordList, numWords);

      File output = null;
      if (cmdLine.hasOption(outOpt)) {
        output = new File(cmdLine.getValue(outOpt).toString());
        if (!output.exists() && !output.mkdirs()) {
          throw new IOException("Could not create directory: " + output);
        }
      }
      printTopWords(topWords, output);
View Full Code Here

        .create();
   
    Parser parser = new Parser();
    parser.setGroup(group);
    try {
      CommandLine cmdLine = parser.parse(args);
      if (cmdLine.hasOption(helpOpt)) {
        CommandLineUtil.printHelp(group);
        return;
      }
     
      String inputPath = (String) cmdLine.getValue(dirInputPathOpt);
      String outputPath = (String) cmdLine.getValue(dirOutputPathOpt);
      String catFile = (String) cmdLine.getValue(categoriesOpt);
      Class<? extends Analyzer> analyzerClass = WikipediaAnalyzer.class;
      if (cmdLine.hasOption(analyzerOpt)) {
        String className = cmdLine.getValue(analyzerOpt).toString();
        analyzerClass = Class.forName(className).asSubclass(Analyzer.class);
        // try instantiating it, b/c there isn't any point in setting it if
        // you can't instantiate it
        ClassUtils.instantiateAs(analyzerClass, Analyzer.class);
      }
      runJob(inputPath, outputPath, catFile, cmdLine.hasOption(exactMatchOpt),
        analyzerClass);
    } catch (OptionException e) {
      log.error("Exception", e);
      CommandLineUtil.printHelp(group);
    } catch (ClassNotFoundException e) {
View Full Code Here

    Parser parser = new Parser();
    parser.setHelpOption(help);
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
      return false;
    }
View Full Code Here

   
    Parser parser = new Parser();
    parser.setGroup(group);
    parser.setHelpOption(helpOpt);
    try {
      CommandLine cmdLine = parser.parse(args);
      if (cmdLine.hasOption(helpOpt)) {
        CommandLineUtil.printHelp(group);
        return;
      }
     
      String inputPath = (String) cmdLine.getValue(dirInputPathOpt);
      String outputPath = (String) cmdLine.getValue(dirOutputPathOpt);
     
      String catFile = "";
      if (cmdLine.hasOption(categoriesOpt)) {
        catFile = (String) cmdLine.getValue(categoriesOpt);
      }
     
      boolean all = false;
      if (cmdLine.hasOption(allOpt)) {
        all = true;
      }
      runJob(inputPath, outputPath, catFile, cmdLine.hasOption(exactMatchOpt), all);
    } catch (OptionException e) {
      log.error("Exception", e);
      CommandLineUtil.printHelp(group);
    } catch (InterruptedException e) {
      log.error("Exception", e);
View Full Code Here

        .withOption(weightOpt).withOption(minDFOpt).create();

    try {
      Parser parser = new Parser();
      parser.setGroup(group);
      CommandLine cmdLine = parser.parse(args);

      if (cmdLine.hasOption(helpOpt)) {
       
        CommandLineUtil.printHelp(group);
        return;
      }
      // Springify all this
      if (cmdLine.hasOption(inputOpt)) { // Lucene case
        File file = new File(cmdLine.getValue(inputOpt).toString());
        if (!file.isDirectory()) {
          throw new IllegalArgumentException("Lucene directory: " + file.getAbsolutePath()
              " does not exist or is not a directory");
        }

        long maxDocs = Long.MAX_VALUE;
        if (cmdLine.hasOption(maxOpt)) {
          maxDocs = Long.parseLong(cmdLine.getValue(maxOpt).toString());
        }
        if (maxDocs < 0) {
          throw new IllegalArgumentException("maxDocs must be >= 0");
        }

        Directory dir = FSDirectory.open(file);
        IndexReader reader = IndexReader.open(dir, true);

        Weight weight;
        if (cmdLine.hasOption(weightOpt)) {
          String wString = cmdLine.getValue(weightOpt).toString();
          if ("tf".equalsIgnoreCase(wString)) {
            weight = new TF();
          } else if ("tfidf".equalsIgnoreCase(wString)) {
            weight = new TFIDF();
          } else {
            throw new OptionException(weightOpt);
          }
        } else {
          weight = new TFIDF();
        }

        String field = cmdLine.getValue(fieldOpt).toString();

        int minDf = 1;
        if (cmdLine.hasOption(minDFOpt)) {
          minDf = Integer.parseInt(cmdLine.getValue(minDFOpt).toString());
        }

        int maxDFPercent = 99;
        if (cmdLine.hasOption(maxDFPercentOpt)) {
          maxDFPercent = Integer.parseInt(cmdLine.getValue(maxDFPercentOpt).toString());
        }

        TermInfo termInfo = new CachedTermInfo(reader, field, minDf, maxDFPercent);
        VectorMapper mapper = new TFDFMapper(reader, weight, termInfo);

        double norm = LuceneIterable.NO_NORMALIZING;
        if (cmdLine.hasOption(powerOpt)) {
          String power = cmdLine.getValue(powerOpt).toString();
          if ("INF".equals(power)) {
            norm = Double.POSITIVE_INFINITY;
          } else {
            norm = Double.parseDouble(power);
          }
        }

        String idField = null;
        if (cmdLine.hasOption(idFieldOpt)) {
          idField = cmdLine.getValue(idFieldOpt).toString();
        }

        LuceneIterable iterable;
        if (norm == LuceneIterable.NO_NORMALIZING) {
          iterable = new LuceneIterable(reader, idField, field, mapper, LuceneIterable.NO_NORMALIZING);
        } else {
          iterable = new LuceneIterable(reader, idField, field, mapper, norm);
        }

        String outFile = cmdLine.getValue(outputOpt).toString();
        log.info("Output File: {}", outFile);

        VectorWriter vectorWriter;
        if (cmdLine.hasOption(outWriterOpt)) {
          String outWriter = cmdLine.getValue(outWriterOpt).toString();
          if ("file".equals(outWriter)) {
            BufferedWriter writer = new BufferedWriter(new FileWriter(outFile));
            vectorWriter = new JWriterVectorWriter(writer);
          } else {
            vectorWriter = getSeqFileWriter(outFile);
          }
        } else {
          vectorWriter = getSeqFileWriter(outFile);
        }

        long numDocs = vectorWriter.write(iterable, maxDocs);
        vectorWriter.close();
        log.info("Wrote: {} vectors", numDocs);

        String delimiter = cmdLine.hasOption(delimiterOpt) ? cmdLine.getValue(delimiterOpt).toString() : "\t";
       
        File dictOutFile = new File(cmdLine.getValue(dictOutOpt).toString());
        log.info("Dictionary Output file: {}", dictOutFile);
        BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
            new FileOutputStream(dictOutFile), Charset.forName("UTF8")));
        JWriterTermInfoWriter tiWriter = new JWriterTermInfoWriter(writer, delimiter, field);
        tiWriter.write(termInfo);
View Full Code Here

            printKeyOpt).withOption(sizeOpt).create();

    try {
      Parser parser = new Parser();
      parser.setGroup(group);
      CommandLine cmdLine = parser.parse(args);

      if (cmdLine.hasOption(helpOpt)) {

        printHelp(group);
        return;
      }

      if (cmdLine.hasOption(seqOpt)) {
        Path path = new Path(cmdLine.getValue(seqOpt).toString());
        //System.out.println("Input Path: " + path); interferes with output?
        Configuration conf = new Configuration();

        FileSystem fs = FileSystem.get(path.toUri(), conf);

        String dictionaryType = "text";
        if (cmdLine.hasOption(dictTypeOpt)) {
          dictionaryType = cmdLine.getValue(dictTypeOpt).toString();
        }

        String[] dictionary = null;
        if (cmdLine.hasOption(dictOpt)) {
          if ("text".equals(dictionaryType)) {
            dictionary = VectorHelper.loadTermDictionary(new File(cmdLine.getValue(dictOpt).toString()));
          } else if ("sequencefile".equals(dictionaryType)) {
            dictionary = VectorHelper.loadTermDictionary(conf, fs, cmdLine.getValue(dictOpt).toString());
          } else {
            throw new OptionException(dictTypeOpt);
          }
        }
        boolean useJSON = cmdLine.hasOption(centroidJSonOpt);
        boolean sizeOnly = cmdLine.hasOption(sizeOpt);
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);

        Writable keyWritable = reader.getKeyClass().asSubclass(Writable.class).newInstance();
        Writable valueWritable = reader.getValueClass().asSubclass(Writable.class).newInstance();
        boolean transposeKeyValue = cmdLine.hasOption(vectorAsKeyOpt);
        try {
          Writer writer = cmdLine.hasOption(outputOpt)
                  ? new FileWriter(cmdLine.getValue(outputOpt).toString())
                  : new OutputStreamWriter(System.out);
          try {
            boolean printKey = cmdLine.hasOption(printKeyOpt);
            long i = 0;
            while (reader.next(keyWritable, valueWritable)) {
              if (printKey) {
                Writable notTheVectorWritable = transposeKeyValue ? valueWritable : keyWritable;
                writer.write(notTheVectorWritable.toString());
View Full Code Here

      .withOption(substringOpt).withOption(countOpt).withOption(helpOpt).create();
   
    try {
      Parser parser = new Parser();
      parser.setGroup(group);
      CommandLine cmdLine = parser.parse(args);
     
      if (cmdLine.hasOption(helpOpt)) {
       
        printHelp(group);
        return;
      }
     
      if (cmdLine.hasOption(seqOpt)) {
        Path path = new Path(cmdLine.getValue(seqOpt).toString());
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(path.toUri(), conf);
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
       
        Writer writer;
        if (cmdLine.hasOption(outputOpt)) {
          writer = new FileWriter(cmdLine.getValue(outputOpt).toString());
        } else {
          writer = new OutputStreamWriter(System.out);
        }
        try {
          writer.append("Input Path: ").append(String.valueOf(path)).append('\n');

          int sub = Integer.MAX_VALUE;
          if (cmdLine.hasOption(substringOpt)) {
            sub = Integer.parseInt(cmdLine.getValue(substringOpt).toString());
          }
          boolean countOnly = cmdLine.hasOption(countOpt);
          Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
          Writable value = reader.getValueClass().asSubclass(Writable.class).newInstance();
          writer.append("Key class: ").append(String.valueOf(reader.getKeyClass()));
          writer.append(" Value Class: ").append(String.valueOf(value.getClass())).append('\n');
          writer.flush();
View Full Code Here

        .withOption(outputOpt).withOption(analyzeOpt).withOption(mrOpt).withOption(helpOpt).create();

    try {
      Parser parser = new Parser();
      parser.setGroup(group);
      CommandLine cmdLine = parser.parse(args);

      if (cmdLine.hasOption("help")) {
        CommandLineUtil.printHelp(group);
        return -1;
      }

      String dataName = cmdLine.getValue(inputOpt).toString();
      String datasetName = cmdLine.getValue(datasetOpt).toString();
      String modelName = cmdLine.getValue(modelOpt).toString();
      String outputName = (cmdLine.hasOption(outputOpt)) ? cmdLine.getValue(outputOpt).toString() : null;
      analyze = cmdLine.hasOption(analyzeOpt);
      useMapreduce = cmdLine.hasOption(mrOpt);

      log.debug("inout     : {}", dataName);
      log.debug("dataset   : {}", datasetName);
      log.debug("model     : {}", modelName);
      log.debug("output    : {}", outputName);
View Full Code Here

TOP

Related Classes of SevenZip.LzmaAlone$CommandLine

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.