Centroid c_1 = new Centroid((WeightedVector)datapoints.get(seedSelector.sample()).clone());
c_1.setIndex(0);
// Construct a set of weighted things which can be used for random selection. Initial weights are
// set to the squared distance from c_1
for (int i = 0; i < datapoints.size(); ++i) {
WeightedVector row = datapoints.get(i);
final double w = l2.distance(c_1, row) * row.getWeight();
seedSelector.set(i, w);
}
// From here, seeds are selected with probablity proportional to:
//
// r_i = min_{c_j} || x_i - c_j ||^2
//
// when we only have c_1, we have already set these distances and as we select each new
// seed, we update the minimum distances.
centroids.add(c_1);
int clusterIndex = 1;
while (centroids.size() < numClusters) {
// Select according to weights.
int seedIndex = seedSelector.sample();
Centroid nextSeed = new Centroid((WeightedVector)datapoints.get(seedIndex).clone());
nextSeed.setIndex(clusterIndex++);
centroids.add(nextSeed);
// Don't select this one again.
seedSelector.set(seedIndex, 0);
// Re-weight everything according to the minimum distance to a seed.
for (int currSeedIndex : seedSelector) {
WeightedVector curr = datapoints.get(currSeedIndex);
double newWeight = nextSeed.getWeight() * l2.distance(nextSeed, curr);
if (newWeight < seedSelector.getWeight(currSeedIndex)) {
seedSelector.set(currSeedIndex, newWeight);
}
}