StringBuilder prefsAsText = new StringBuilder();
String separator = "";
Iterator<MatrixSlice> sliceIterator = preferences.iterateAll();
while (sliceIterator.hasNext()) {
MatrixSlice slice = sliceIterator.next();
Iterator<Vector.Element> elementIterator = slice.vector().iterateNonZero();
while (elementIterator.hasNext()) {
Vector.Element e = elementIterator.next();
if (!Double.isNaN(e.get())) {
prefsAsText.append(separator).append(slice.index()).append(',').append(e.index()).append(',').append(e.get());
separator = "\n";
}
}
}
logger.info("Input matrix:\n" + prefsAsText);
writeLines(inputFile, prefsAsText.toString());
ParallelALSFactorizationJob alsFactorization = new ParallelALSFactorizationJob();
Configuration conf = new Configuration();
conf.set("mapred.input.dir", inputFile.getAbsolutePath());
conf.set("mapred.output.dir", outputDir.getAbsolutePath());
conf.setBoolean("mapred.output.compress", false);
alsFactorization.setConf(conf);
int numFeatures = 3;
int numIterations = 5;
double lambda = 0.065;
alsFactorization.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(), "--lambda", String.valueOf(lambda),
"--numFeatures", String.valueOf(numFeatures), "--numIterations", String.valueOf(numIterations) });
Matrix u = MathHelper.readEntries(conf, new Path(outputDir.getAbsolutePath(), "U/part-r-00000"),
preferences.numRows(), numFeatures);
Matrix m = MathHelper.readEntries(conf, new Path(outputDir.getAbsolutePath(), "M/part-r-00000"),
preferences.numCols(), numFeatures);
RunningAverage avg = new FullRunningAverage();
sliceIterator = preferences.iterateAll();
while (sliceIterator.hasNext()) {
MatrixSlice slice = sliceIterator.next();
Iterator<Vector.Element> elementIterator = slice.vector().iterateNonZero();
while (elementIterator.hasNext()) {
Vector.Element e = elementIterator.next();
if (!Double.isNaN(e.get())) {
double pref = e.get();
double estimate = u.getRow(slice.index()).dot(m.getRow(e.index()));
double err = pref - estimate;
avg.addDatum(err * err);
logger.info("Comparing preference of user [" + slice.index() + "] towards item [" + e.index() + "], " +
"was [" + pref + "] estimate is [" + estimate + ']');
}
}
}
double rmse = Math.sqrt(avg.getAverage());