* @param test The instances for which to cache predictions.
* @throws Exception if somethng goes wrong
*/
private void cachePredictions(Instances test) throws Exception {
m_cachedPredictions = new HashMap();
Evaluation evalModel = null;
Instances originalInstances = null;
// If the verbose flag is set, we'll also print out the performances of
// all the individual models w.r.t. this test set while we're at it.
boolean printModelPerformances = getVerboseOutput();
if (printModelPerformances) {
// To get performances, we need to keep the class attribute.
originalInstances = new Instances(test);
}
// For each model, we'll go through the dataset and get predictions.
// The idea is we want to only have one model in memory at a time, so
// we'll
// load one model in to memory, get all its predictions, and add them to
// the
// hash map. Then we can release it from memory and move on to the next.
for (int i = 0; i < m_chosen_models.length; ++i) {
if (printModelPerformances) {
// If we're going to print predictions, we need to make a new
// Evaluation object.
evalModel = new Evaluation(originalInstances);
}
Date startTime = new Date();
// Load the model in to memory.
m_chosen_models[i].rehydrateModel(m_workingDirectory.getAbsolutePath());
// Now loop through all the instances and get the model's
// predictions.
for (int j = 0; j < test.numInstances(); ++j) {
Instance currentInstance = test.instance(j);
// When we're looking for a cached prediction later, we'll only
// have the non-class attributes, so we set the class missing
// here
// in order to make the string match up properly.
currentInstance.setClassMissing();
String stringInstance = currentInstance.toString();
// When we come in here with the first model, the instance will
// not
// yet be part of the map.
if (!m_cachedPredictions.containsKey(stringInstance)) {
// The instance isn't in the map yet, so add it.
// For each instance, we store a two-dimensional array - the
// first
// index is over all the models in the ensemble, and the
// second
// index is over the (i.e., typical prediction array).
int predSize = test.classAttribute().isNumeric() ? 1 : test
.classAttribute().numValues();
double predictionArray[][] = new double[m_chosen_models.length][predSize];
m_cachedPredictions.put(stringInstance, predictionArray);
}
// Get the array from the map which is associated with this
// instance
double predictions[][] = (double[][]) m_cachedPredictions
.get(stringInstance);
// And add our model's prediction for it.
predictions[i] = m_chosen_models[i].getAveragePrediction(test
.instance(j));
if (printModelPerformances) {
evalModel.evaluateModelOnceAndRecordPrediction(
predictions[i], originalInstances.instance(j));
}
}
// Now we're done with model #i, so we can release it.
m_chosen_models[i].releaseModel();
Date endTime = new Date();
long diff = endTime.getTime() - startTime.getTime();
if (m_Debug)
System.out.println("Test time for "
+ m_chosen_models[i].getStringRepresentation()
+ " was: " + diff);
if (printModelPerformances) {
String output = new String(m_chosen_models[i]
.getStringRepresentation()
+ ": ");
output += "\tRMSE:" + evalModel.rootMeanSquaredError();
output += "\tACC:" + evalModel.pctCorrect();
if (test.numClasses() == 2) {
// For multiclass problems, we could print these too, but
// it's
// not clear which class we should use in that case... so
// instead
// we only print these metrics for binary classification
// problems.
output += "\tROC:" + evalModel.areaUnderROC(1);
output += "\tPREC:" + evalModel.precision(1);
output += "\tFSCR:" + evalModel.fMeasure(1);
}
System.out.println(output);
}
}
}