Factorization factorization = sgdFactorizer.factorize();
log.info("Estimating validation preferences...");
int prefsProcessed = 0;
RunningAverage average = new FullRunningAverage();
DataFileIterable validations = new DataFileIterable(KDDCupDataModel.getValidationFile(dataFileDirectory));
for (Pair<PreferenceArray,long[]> validationPair : validations) {
for (Preference validationPref : validationPair.getFirst()) {
double estimate = estimatePreference(factorization, validationPref.getUserID(), validationPref.getItemID(),
factorizablePreferences.getMinPreference(), factorizablePreferences.getMaxPreference());
double error = validationPref.getValue() - estimate;
average.addDatum(error * error);
prefsProcessed++;
if (prefsProcessed % 100000 == 0) {
log.info("Computed {} estimations", prefsProcessed);
}
}
}
log.info("Computed {} estimations, done.", prefsProcessed);
double rmse = Math.sqrt(average.getAverage());
log.info("RMSE {}", rmse);
log.info("Estimating test preferences...");
OutputStream out = null;
try {
out = new BufferedOutputStream(new FileOutputStream(resultFile));
DataFileIterable tests = new DataFileIterable(KDDCupDataModel.getTestFile(dataFileDirectory));
for (Pair<PreferenceArray,long[]> testPair : tests) {
for (Preference testPref : testPair.getFirst()) {
double estimate = estimatePreference(factorization, testPref.getUserID(), testPref.getItemID(),
factorizablePreferences.getMinPreference(), factorizablePreferences.getMaxPreference());
byte result = EstimateConverter.convert(estimate, testPref.getUserID(), testPref.getItemID());