useMapRed = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(DefaultOptionCreator.MAPREDUCE_METHOD);
if (useMapRed) {
if (!hasOption("randomSelectionPct")) {
throw new OptionException(getCLIOption("randomSelectionPct"),
"must set randomSelectionPct when mapRed option is used");
}
if (!hasOption("mapRedOutputDir")) {
throw new OptionException(getCLIOption("mapRedOutputDir"),
"mapRedOutputDir must be set when mapRed option is used");
}
mapRedOutputDirectory = new Path(getOption("mapRedOutputDir"));
if (hasOption("keepPct")) {
keepPct = Integer.parseInt(getOption("keepPct"));
}
if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
HadoopUtil.delete(getConf(), mapRedOutputDirectory);
}
} else {
if (!hasOption("trainingOutput")
|| !hasOption("testOutput")) {
throw new OptionException(getCLIOption("trainingOutput"),
"trainingOutput and testOutput must be set if mapRed option is not used");
}
if (!hasOption("testSplitSize")
&& !hasOption("testSplitPct")
&& !hasOption("randomSelectionPct")
&& !hasOption("randomSelectionSize")) {
throw new OptionException(getCLIOption("testSplitSize"),
"must set one of test split size/percentage or randomSelectionSize/percentage");
}
trainingOutputDirectory = new Path(getOption("trainingOutput"));
testOutputDirectory = new Path(getOption("testOutput"));
FileSystem fs = trainingOutputDirectory.getFileSystem(getConf());
if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
HadoopUtil.delete(fs.getConf(), trainingOutputDirectory);
HadoopUtil.delete(fs.getConf(), testOutputDirectory);
}
fs.mkdirs(trainingOutputDirectory);
fs.mkdirs(testOutputDirectory);
}
if (hasOption("charset")) {
charset = Charset.forName(getOption("charset"));
}
if (hasOption("testSplitSize") && hasOption("testSplitPct")) {
throw new OptionException(getCLIOption("testSplitPct"), "must have either split size or split percentage "
+ "option, not BOTH");
}
if (hasOption("testSplitSize")) {
setTestSplitSize(Integer.parseInt(getOption("testSplitSize")));