private int iterations;
private boolean inMemory;
private File tmpFile;
public LogisticRegression(String modelParams) throws IOException {
Splitter onComma = Splitter.on(",").trimResults().omitEmptyStrings();
Splitter onEquals = Splitter.on("=").trimResults();
Splitter onSpaces = Splitter.on(" ");
Joiner withSpaces = Joiner.on(" ");
Map<String, String> options = Maps.newHashMap();
for (String option : onComma.split(modelParams)) {
List<String> values = Lists.newArrayList(onEquals.split(option));
options.put(values.get(0), values.get(1));
}
if (options.containsKey("model")) {
if (options.containsKey("categories")) {
categories = Lists.newArrayList(onSpaces.split(options.get("categories")));
Configuration conf = UDFContext.getUDFContext().getJobConf();
model = PolymorphicWritable.read(FileSystem.get(conf).open(new Path(options.get("model"))), OnlineLogisticRegression.class);
options.remove("model");
options.remove(("categories"));
} else {
throw new BadClassifierSpecException("Must specify \"categories\" if pre-existing model is used");
}
} else {
if (options.containsKey("categories") && options.containsKey("features")) {
categories = Lists.newArrayList(onSpaces.split(options.get("categories")));
if (categories.size() < 2) {
throw new BadClassifierSpecException("Must have more than one target category. Remember that categories is a space separated list");
}
model = new OnlineLogisticRegression(categories.size(), Integer.parseInt(options.get("features")), new L1());
options.remove("categories");