public LogisticRegression(String modelParams) throws IOException {
Splitter onComma = Splitter.on(",").trimResults().omitEmptyStrings();
Splitter onEquals = Splitter.on("=").trimResults();
Splitter onSpaces = Splitter.on(" ");
Joiner withSpaces = Joiner.on(" ");
Map<String, String> options = Maps.newHashMap();
for (String option : onComma.split(modelParams)) {
List<String> values = Lists.newArrayList(onEquals.split(option));
options.put(values.get(0), values.get(1));
}
if (options.containsKey("model")) {
if (options.containsKey("categories")) {
categories = Lists.newArrayList(onSpaces.split(options.get("categories")));
Configuration conf = UDFContext.getUDFContext().getJobConf();
model = PolymorphicWritable.read(FileSystem.get(conf).open(new Path(options.get("model"))), OnlineLogisticRegression.class);
options.remove("model");
options.remove(("categories"));
} else {
throw new BadClassifierSpecException("Must specify \"categories\" if pre-existing model is used");
}
} else {
if (options.containsKey("categories") && options.containsKey("features")) {
categories = Lists.newArrayList(onSpaces.split(options.get("categories")));
if (categories.size() < 2) {
throw new BadClassifierSpecException("Must have more than one target category. Remember that categories is a space separated list");
}
model = new OnlineLogisticRegression(categories.size(), Integer.parseInt(options.get("features")), new L1());
options.remove("categories");
options.remove("features");
} else {
throw new BadClassifierSpecException("Must specify previous model location using \"file\" or supply \"categories\" and \"features\"");
}
if (options.containsKey("decayExponent")) {
model.decayExponent(Double.parseDouble(options.get("decayExponent")));
options.remove("decayExponent");
}
if (options.containsKey("lambda")) {
model.lambda(Double.parseDouble(options.get("lambda")));
options.remove("lambda");
}
if (options.containsKey("stepOffset")) {
model.stepOffset(Integer.parseInt(options.get("stepOffset")));
options.remove("stepOffset");
}
if (options.containsKey("learningRate")) {
model.learningRate(Double.parseDouble(options.get("learningRate")));
options.remove("learningRate");
}
}
iterations = options.containsKey("iterations") ? Integer.parseInt(options.get("iterations")) : 1;
options.remove("iterations");
inMemory = options.containsKey("inMemory") ? Boolean.parseBoolean(options.get("inMemory")) : true;
options.remove("inMemory");
if (options.size() > 0) {
throw new BadClassifierSpecException("Extra options supplied: " + withSpaces.join(options.keySet()));
}
if (!inMemory) {
tmpFile = File.createTempFile("trainingData", "tmp");
tmpFile.deleteOnExit();