} else if (m_algorithm == ALGORITHM_BEST) {
// If we want to choose the best model, just make a model bag that
// includes all the models, then sort initialize to find the 1 that
// performs best.
ModelBag model_bag = new ModelBag(predictions, 1.0, m_Debug);
int[] modelPicked = model_bag.sortInitialize(1, false, data,
m_hillclimbMetric);
// Then give it a weight of 1, while all others remain 0.
modelWeights[modelPicked[0]] = 1;
} else {
if (m_Debug)
System.out.println("Starting hillclimbing algorithm: "
+ m_algorithm);
for (int i = 0; i < getNumModelBags(); ++i) {
// For the number of bags,
if (m_Debug)
System.out.println("Starting on ensemble bag: " + i);
// Create a new bag of the appropriate size
ModelBag modelBag = new ModelBag(predictions, getModelRatio(),
m_Debug);
// And shuffle it.
modelBag.shuffle(rand);
if (getSortInitializationRatio() > 0.0) {
// Sort initialize, if the ratio greater than 0.
modelBag.sortInitialize((int) (getSortInitializationRatio()
* getModelRatio() * numModels),
getGreedySortInitialization(), data,
m_hillclimbMetric);
}
if (m_algorithm == ALGORITHM_BACKWARD) {
// If we're doing backwards elimination, we just give all
// models
// a weight of 1 initially. If the # of hillclimb iterations
// is too high, we'll end up with just one model in the end
// (we never delete all models from a bag). TODO - it might
// be
// smarter to base this weight off of how many models we
// have.
modelBag.weightAll(1); // for now at least, I'm just
// assuming 1.
}
// Now the bag is initialized, and we're ready to hillclimb.
for (int j = 0; j < getHillclimbIterations(); ++j) {
if (m_algorithm == ALGORITHM_FORWARD) {
modelBag.forwardSelect(getReplacement(), data,
m_hillclimbMetric);
} else if (m_algorithm == ALGORITHM_BACKWARD) {
modelBag.backwardEliminate(data, m_hillclimbMetric);
} else if (m_algorithm == ALGORITHM_FORWARD_BACKWARD) {
modelBag.forwardSelectOrBackwardEliminate(
getReplacement(), data, m_hillclimbMetric);
}
}
// Now that we've done all the hillclimbing steps, we can just
// get
// the model weights that the bag determined, and add them to
// our
// running total.
int[] bagWeights = modelBag.getModelWeights();
for (int j = 0; j < bagWeights.length; ++j) {
modelWeights[j] += bagWeights[j];
}
}
}
// Now we've done the hard work of actually learning the ensemble. Now
// we set up the appropriate data structures so that Ensemble Selection
// can
// make predictions for future test examples.
Set modelNames = m_library.getModelNames();
String[] modelNamesArray = new String[m_library.size()];
Iterator iter = modelNames.iterator();
// libraryIndex indexes over all the models in the library (not just
// those
// which we chose for the ensemble).
int libraryIndex = 0;
// chosenModels will count the total number of models which were
// selected
// by EnsembleSelection (those that have non-zero weight).
int chosenModels = 0;
while (iter.hasNext()) {
// Note that we have to be careful of order. Our model_weights array
// is in the same order as our list of models in m_library.
// Get the name of the model,
modelNamesArray[libraryIndex] = (String) iter.next();
// and its weight.
int weightOfModel = modelWeights[libraryIndex++];
m_total_weight += weightOfModel;
if (weightOfModel > 0) {
// If the model was chosen at least once, increment the
// number of chosen models.
++chosenModels;
}
}
if (m_verboseOutput) {
// Output every model and its performance with respect to the
// validation
// data.
ModelBag bag = new ModelBag(predictions, 1.0, m_Debug);
int modelIndexes[] = bag.sortInitialize(modelNamesArray.length,
false, data, m_hillclimbMetric);
double modelPerformance[] = bag.getIndividualPerformance(data,
m_hillclimbMetric);
for (int i = 0; i < modelIndexes.length; ++i) {
// TODO - Could do this in a more readable way.
System.out.println("" + modelPerformance[i] + " "
+ modelNamesArray[modelIndexes[i]]);