Package org.apache.mahout.utils.email

Examples of org.apache.mahout.utils.email.MailOptions


        prefix = (String) cmdLine.getValue(keyPrefixOpt);
      }

      Charset charset = Charset.forName((String) cmdLine.getValue(charsetOpt));
      SequenceFilesFromMailArchives dir = new SequenceFilesFromMailArchives();
      MailOptions options = new MailOptions();
      options.setInput(input);
      options.setOutputDir(outputDir);
      options.setPrefix(prefix);
      options.setChunkSize(chunkSize);
      options.setCharset(charset);


      List<Pattern> patterns = new ArrayList<Pattern>(5);
      // patternOrder is used downstream so that we can know what order the text is in instead
      // of encoding it in the string, which
      // would require more processing later to remove it pre feature selection.
      Map<String, Integer> patternOrder = new HashMap<String, Integer>();
      int order = 0;
      if (cmdLine.hasOption(fromOpt)) {
        patterns.add(MailProcessor.FROM_PREFIX);
        patternOrder.put(MailOptions.FROM, order++);
      }
      if (cmdLine.hasOption(toOpt)) {
        patterns.add(MailProcessor.TO_PREFIX);
        patternOrder.put(MailOptions.TO, order++);
      }
      if (cmdLine.hasOption(refsOpt)) {
        patterns.add(MailProcessor.REFS_PREFIX);
        patternOrder.put(MailOptions.REFS, order++);
      }
      if (cmdLine.hasOption(subjectOpt)) {
        patterns.add(MailProcessor.SUBJECT_PREFIX);
        patternOrder.put(MailOptions.SUBJECT, order++);
      }
      options.setStripQuotedText(cmdLine.hasOption(quotedOpt));

      options.setPatternsToMatch(patterns.toArray(new Pattern[patterns.size()]));
      options.setPatternOrder(patternOrder);
      options.setIncludeBody(cmdLine.hasOption(bodyOpt));
      options.setSeparator("\n");
      if (cmdLine.hasOption(separatorOpt)) {
        options.setSeparator(cmdLine.getValue(separatorOpt).toString());
      }
      if (cmdLine.hasOption(bodySeparatorOpt)) {
        options.setBodySeparator(cmdLine.getValue(bodySeparatorOpt).toString());
      }
      if (cmdLine.hasOption(quotedRegexOpt)){
        options.setQuotedTextPattern(Pattern.compile(cmdLine.getValue(quotedRegexOpt).toString()));
      }
      long start = System.currentTimeMillis();
      dir.createSequenceFiles(options);
      long finish = System.currentTimeMillis();
      log.info("Conversion took {}ms", finish - start);
View Full Code Here


    if (hasOption(KEY_PREFIX_OPTION[0])) {
      prefix = getOption(KEY_PREFIX_OPTION[0]);
    }

    Charset charset = Charset.forName(getOption(CHARSET_OPTION[0]));
    MailOptions options = new MailOptions();
    options.setInput(input);
    options.setOutputDir(outputDir);
    options.setPrefix(prefix);
    options.setChunkSize(chunkSize);
    options.setCharset(charset);

    List<Pattern> patterns = Lists.newArrayListWithCapacity(5);
    // patternOrder is used downstream so that we can know what order the text
    // is in instead of encoding it in the string, which
    // would require more processing later to remove it pre feature selection.
    Map<String, Integer> patternOrder = Maps.newHashMap();
    int order = 0;
    if (hasOption(FROM_OPTION[0])) {
      patterns.add(MailProcessor.FROM_PREFIX);
      patternOrder.put(MailOptions.FROM, order++);
    }
    if (hasOption(TO_OPTION[0])) {
      patterns.add(MailProcessor.TO_PREFIX);
      patternOrder.put(MailOptions.TO, order++);
    }
    if (hasOption(REFERENCES_OPTION[0])) {
      patterns.add(MailProcessor.REFS_PREFIX);
      patternOrder.put(MailOptions.REFS, order++);
    }
    if (hasOption(SUBJECT_OPTION[0])) {
      patterns.add(MailProcessor.SUBJECT_PREFIX);
      patternOrder.put(MailOptions.SUBJECT, order += 1);
    }
    options.setStripQuotedText(hasOption(STRIP_QUOTED_OPTION[0]));

    options.setPatternsToMatch(patterns.toArray(new Pattern[patterns.size()]));
    options.setPatternOrder(patternOrder);
    options.setIncludeBody(hasOption(BODY_OPTION[0]));

    if (hasOption(SEPARATOR_OPTION[0])) {
      options.setSeparator(getOption(SEPARATOR_OPTION[0]));
    } else {
      options.setSeparator("\n");
    }

    if (hasOption(BODY_SEPARATOR_OPTION[0])) {
      options.setBodySeparator(getOption(BODY_SEPARATOR_OPTION[0]));
    }

    if (hasOption(QUOTED_REGEX_OPTION[0])) {
      options.setQuotedTextPattern(Pattern.compile(getOption(QUOTED_REGEX_OPTION[0])));
    }

    if (getOption(DefaultOptionCreator.METHOD_OPTION,
      DefaultOptionCreator.MAPREDUCE_METHOD).equals(DefaultOptionCreator.SEQUENTIAL_METHOD)) {
      runSequential(options);
View Full Code Here

  public void setup(Context context) throws IOException, InterruptedException {

    Configuration configuration = context.getConfiguration();

    // absorb all of the options into the MailOptions object
    this.options = new MailOptions();

    options.setPrefix(configuration.get(KEY_PREFIX_OPTION[1], ""));

    if (!configuration.get(CHUNK_SIZE_OPTION[0], "").equals("")) {
      options.setChunkSize(configuration.getInt(CHUNK_SIZE_OPTION[0], 64));
View Full Code Here

    if (hasOption(KEY_PREFIX_OPTION[0])) {
      prefix = getOption(KEY_PREFIX_OPTION[0]);
    }

    Charset charset = Charset.forName(getOption(CHARSET_OPTION[0]));
    MailOptions options = new MailOptions();
    options.setInput(input);
    options.setOutputDir(outputDir);
    options.setPrefix(prefix);
    options.setChunkSize(chunkSize);
    options.setCharset(charset);

    List<Pattern> patterns = Lists.newArrayListWithCapacity(5);
    // patternOrder is used downstream so that we can know what order the text
    // is in instead of encoding it in the string, which
    // would require more processing later to remove it pre feature selection.
    Map<String, Integer> patternOrder = Maps.newHashMap();
    int order = 0;
    if (hasOption(FROM_OPTION[0])) {
      patterns.add(MailProcessor.FROM_PREFIX);
      patternOrder.put(MailOptions.FROM, order++);
    }
    if (hasOption(TO_OPTION[0])) {
      patterns.add(MailProcessor.TO_PREFIX);
      patternOrder.put(MailOptions.TO, order++);
    }
    if (hasOption(REFERENCES_OPTION[0])) {
      patterns.add(MailProcessor.REFS_PREFIX);
      patternOrder.put(MailOptions.REFS, order++);
    }
    if (hasOption(SUBJECT_OPTION[0])) {
      patterns.add(MailProcessor.SUBJECT_PREFIX);
      patternOrder.put(MailOptions.SUBJECT, order += 1);
    }
    options.setStripQuotedText(hasOption(STRIP_QUOTED_OPTION[0]));

    options.setPatternsToMatch(patterns.toArray(new Pattern[patterns.size()]));
    options.setPatternOrder(patternOrder);
    options.setIncludeBody(hasOption(BODY_OPTION[0]));

    if (hasOption(SEPARATOR_OPTION[0])) {
      options.setSeparator(getOption(SEPARATOR_OPTION[0]));
    } else {
      options.setSeparator("\n");
    }

    if (hasOption(BODY_SEPARATOR_OPTION[0])) {
      options.setBodySeparator(getOption(BODY_SEPARATOR_OPTION[0]));
    }

    if (hasOption(QUOTED_REGEX_OPTION[0])) {
      options.setQuotedTextPattern(Pattern.compile(getOption(QUOTED_REGEX_OPTION[0])));
    }

    if (getOption(DefaultOptionCreator.METHOD_OPTION,
      DefaultOptionCreator.MAPREDUCE_METHOD).equals(DefaultOptionCreator.SEQUENTIAL_METHOD)) {
      runSequential(options);
View Full Code Here

    if (hasOption("keyPrefix")) {
      prefix = getOption("keyPrefix");
    }

    Charset charset = Charset.forName(getOption("charset"));
    MailOptions options = new MailOptions();
    options.setInput(input);
    options.setOutputDir(outputDir);
    options.setPrefix(prefix);
    options.setChunkSize(chunkSize);
    options.setCharset(charset);

    List<Pattern> patterns = new ArrayList<Pattern>(5);
    // patternOrder is used downstream so that we can know what order the text
    // is in instead
    // of encoding it in the string, which
    // would require more processing later to remove it pre feature selection.
    Map<String,Integer> patternOrder = new HashMap<String,Integer>();
    int order = 0;
    if (hasOption("from")) {
      patterns.add(MailProcessor.FROM_PREFIX);
      patternOrder.put(MailOptions.FROM, order++);
    }
    if (hasOption("to")) {
      patterns.add(MailProcessor.TO_PREFIX);
      patternOrder.put(MailOptions.TO, order++);
    }
    if (hasOption("references")) {
      patterns.add(MailProcessor.REFS_PREFIX);
      patternOrder.put(MailOptions.REFS, order++);
    }
    if (hasOption("subject")) {
      patterns.add(MailProcessor.SUBJECT_PREFIX);
      patternOrder.put(MailOptions.SUBJECT, order++);
    }
    options.setStripQuotedText(hasOption("stripQuoted"));

    options.setPatternsToMatch(patterns.toArray(new Pattern[patterns.size()]));
    options.setPatternOrder(patternOrder);
    options.setIncludeBody(hasOption("body"));
    options.setSeparator("\n");
    if (hasOption("separator")) {
      options.setSeparator(getOption("separator"));
    }
    if (hasOption("bodySeparator")) {
      options.setBodySeparator(getOption("bodySeparator"));
    }
    if (hasOption("quotedRegex")) {
      options.setQuotedTextPattern(Pattern.compile(getOption("quotedRegex")));
    }
    long start = System.currentTimeMillis();
    createSequenceFiles(options);
    long finish = System.currentTimeMillis();
    log.info("Conversion took {}ms", finish - start);
View Full Code Here

TOP

Related Classes of org.apache.mahout.utils.email.MailOptions

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.