// Copies the current cluster centroids to newClusters and sets their weights to 0. This is
// so we calculate the new centroids as we go through the datapoints.
List<Centroid> newCentroids = Lists.newArrayList();
for (Vector centroid : centroids) {
// need a deep copy because we will mutate these values
Centroid newCentroid = (Centroid)centroid.clone();
newCentroid.setWeight(0);
newCentroids.add(newCentroid);
}
// Pass over the datapoints computing new centroids.
for (int j = 0; j < datapoints.size(); ++j) {
WeightedVector datapoint = datapoints.get(j);
// Get the closest cluster this point belongs to.
WeightedThing<Vector> closestPair = centroids.searchFirst(datapoint, false);
int closestIndex = ((WeightedVector) closestPair.getValue()).getIndex();
double closestDistance = closestPair.getWeight();
// Update its cluster assignment if necessary.
if (closestIndex != clusterAssignments.get(j)) {
changed = true;
clusterAssignments.set(j, closestIndex);
}
// Only update if the datapoints point is near enough. What this means is that the weight
// of outliers is NOT taken into account and the final weights of the centroids will
// reflect this (it will be less or equal to the initial sum of the weights).
if (closestDistance < trimFraction * closestClusterDistances.get(closestIndex)) {
newCentroids.get(closestIndex).update(datapoint);
}
}
// Add the new centers back into searcher.
centroids.clear();
centroids.addAll(newCentroids);
}
if (correctWeights) {
for (Vector v : centroids) {
((Centroid)v).setWeight(0);
}
for (WeightedVector datapoint : datapoints) {
Centroid closestCentroid = (Centroid) centroids.searchFirst(datapoint, false).getValue();
closestCentroid.setWeight(closestCentroid.getWeight() + datapoint.getWeight());
}
}
}