encodingOpt).withOption(analyzerOpt).withOption(defaultCatOpt).withOption(gramSizeOpt).withOption(
typeOpt).withOption(dataSourceOpt).create();
Parser parser = new Parser();
parser.setGroup(options);
CommandLine cmdLine = parser.parse(args);
int gramSize = 1;
if (cmdLine.hasOption(gramSizeOpt)) {
gramSize = Integer.parseInt((String) cmdLine.getValue(gramSizeOpt));
}
BayesParameters params = new BayesParameters();
params.setGramSize(gramSize);
String modelBasePath = (String) cmdLine.getValue(pathOpt);
params.setBasePath(modelBasePath);
log.info("Loading model from: {}", params.print());
Algorithm algorithm;
Datastore datastore;
String classifierType = (String) cmdLine.getValue(typeOpt);
String dataSource = (String) cmdLine.getValue(dataSourceOpt);
if ("hdfs".equals(dataSource)) {
if ("bayes".equalsIgnoreCase(classifierType)) {
log.info("Using Bayes Classifier");
algorithm = new BayesAlgorithm();
datastore = new InMemoryBayesDatastore(params);
} else if ("cbayes".equalsIgnoreCase(classifierType)) {
log.info("Using Complementary Bayes Classifier");
algorithm = new CBayesAlgorithm();
datastore = new InMemoryBayesDatastore(params);
} else {
throw new IllegalArgumentException("Unrecognized classifier type: " + classifierType);
}
} else {
throw new IllegalArgumentException("Unrecognized dataSource type: " + dataSource);
}
ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
classifier.initialize();
String defaultCat = "unknown";
if (cmdLine.hasOption(defaultCatOpt)) {
defaultCat = (String) cmdLine.getValue(defaultCatOpt);
}
File docPath = new File((String) cmdLine.getValue(classifyOpt));
String encoding = "UTF-8";
if (cmdLine.hasOption(encodingOpt)) {
encoding = (String) cmdLine.getValue(encodingOpt);
}
Analyzer analyzer = null;
if (cmdLine.hasOption(analyzerOpt)) {
analyzer = ClassUtils.instantiateAs((String) cmdLine.getValue(analyzerOpt), Analyzer.class);
}
if (analyzer == null) {
analyzer = new StandardAnalyzer(Version.LUCENE_31);
}