leakType = Integer.parseInt(args[1]);
}
Dictionary newsGroups = new Dictionary();
NewsgroupHelper helper = new NewsgroupHelper();
helper.getEncoder().setProbes(2);
AdaptiveLogisticRegression learningAlgorithm =
new AdaptiveLogisticRegression(20, NewsgroupHelper.FEATURES, new L1());
learningAlgorithm.setInterval(800);
learningAlgorithm.setAveragingWindow(500);
List<File> files = Lists.newArrayList();
for (File newsgroup : base.listFiles()) {
if (newsgroup.isDirectory()) {
newsGroups.intern(newsgroup.getName());
files.addAll(Arrays.asList(newsgroup.listFiles()));
}
}
Collections.shuffle(files);
System.out.println(files.size() + " training files");
SGDInfo info = new SGDInfo();
int k = 0;
for (File file : files) {
String ng = file.getParentFile().getName();
int actual = newsGroups.intern(ng);
Vector v = helper.encodeFeatureVector(file, actual, leakType, overallCounts);
learningAlgorithm.train(actual, v);
k++;
State<AdaptiveLogisticRegression.Wrapper, CrossFoldLearner> best = learningAlgorithm.getBest();