prefix = (String) cmdLine.getValue(keyPrefixOpt);
}
Charset charset = Charset.forName((String) cmdLine.getValue(charsetOpt));
SequenceFilesFromMailArchives dir = new SequenceFilesFromMailArchives();
MailOptions options = new MailOptions();
options.setInput(input);
options.setOutputDir(outputDir);
options.setPrefix(prefix);
options.setChunkSize(chunkSize);
options.setCharset(charset);
List<Pattern> patterns = new ArrayList<Pattern>(5);
// patternOrder is used downstream so that we can know what order the text is in instead
// of encoding it in the string, which
// would require more processing later to remove it pre feature selection.
Map<String, Integer> patternOrder = new HashMap<String, Integer>();
int order = 0;
if (cmdLine.hasOption(fromOpt)) {
patterns.add(MailProcessor.FROM_PREFIX);
patternOrder.put(MailOptions.FROM, order++);
}
if (cmdLine.hasOption(toOpt)) {
patterns.add(MailProcessor.TO_PREFIX);
patternOrder.put(MailOptions.TO, order++);
}
if (cmdLine.hasOption(refsOpt)) {
patterns.add(MailProcessor.REFS_PREFIX);
patternOrder.put(MailOptions.REFS, order++);
}
if (cmdLine.hasOption(subjectOpt)) {
patterns.add(MailProcessor.SUBJECT_PREFIX);
patternOrder.put(MailOptions.SUBJECT, order++);
}
options.setStripQuotedText(cmdLine.hasOption(quotedOpt));
options.setPatternsToMatch(patterns.toArray(new Pattern[patterns.size()]));
options.setPatternOrder(patternOrder);
options.setIncludeBody(cmdLine.hasOption(bodyOpt));
options.setSeparator("\n");
if (cmdLine.hasOption(separatorOpt)) {
options.setSeparator(cmdLine.getValue(separatorOpt).toString());
}
if (cmdLine.hasOption(bodySeparatorOpt)) {
options.setBodySeparator(cmdLine.getValue(bodySeparatorOpt).toString());
}
if (cmdLine.hasOption(quotedRegexOpt)){
options.setQuotedTextPattern(Pattern.compile(cmdLine.getValue(quotedRegexOpt).toString()));
}
long start = System.currentTimeMillis();
dir.createSequenceFiles(options);
long finish = System.currentTimeMillis();
log.info("Conversion took {}ms", finish - start);