// Initialize ensemble with "best" method
logger.verbose("Distance function: " + wdist);
logger.verbose("Initial estimation of outliers: " + union_outliers);
logger.verbose("Initializing ensemble with: " + labels.get(bestid));
ModifiableDBIDs ensemble = DBIDUtil.newArray(bestid);
ModifiableDBIDs enscands = DBIDUtil.newHashSet(relation.getDBIDs());
enscands.remove(bestid);
enscands.remove(firstid);
final double[] greedyensemble = new double[dim];
{
final NumberVector<?, ?> vec = relation.get(bestid);
for(int i = 0; i < dim; i++) {
greedyensemble[i] = vec.doubleValue(i + 1);
}
}
// Greedily grow the ensemble
final double[] testensemble = new double[dim];
while(enscands.size() > 0) {
NumberVector<?, ?> greedyvec = refvec.newNumberVector(greedyensemble);
// Weighting factors for combining:
double s1 = ensemble.size() / (ensemble.size() + 1.);
double s2 = 1. / (ensemble.size() + 1.);
final int heapsize = enscands.size();
TopBoundedHeap<DoubleObjPair<DBID>> heap = new TopBoundedHeap<DoubleObjPair<DBID>>(heapsize, Collections.reverseOrder());
for(DBID id : enscands) {
final NumberVector<?, ?> vec = relation.get(id);
double diversity = wdist.doubleDistance(vec, greedyvec);
heap.add(new DoubleObjPair<DBID>(diversity, id));
}
while(heap.size() > 0) {
DBID bestadd = heap.poll().second;
enscands.remove(bestadd);
// Update ensemble:
final NumberVector<?, ?> vec = relation.get(bestadd);
for(int i = 0; i < dim; i++) {
testensemble[i] = greedyensemble[i] * s1 + vec.doubleValue(i + 1) * s2;
}