}
}
@Test
public void testClustering() {
UpdatableSearcher searcher = new BruteSearch(new SquaredEuclideanDistanceMeasure());
BallKMeans clusterer = new BallKMeans(searcher, 1 << NUM_DIMENSIONS, NUM_ITERATIONS);
long startTime = System.currentTimeMillis();
clusterer.cluster(syntheticData.getFirst());
long endTime = System.currentTimeMillis();
assertEquals("Total weight not preserved", totalWeight(syntheticData.getFirst()), totalWeight(clusterer), 1.0e-9);
// Verify that each corner of the cube has a centroid very nearby.
// This is probably FALSE for large-dimensional spaces!
OnlineSummarizer summarizer = new OnlineSummarizer();
for (Vector mean : syntheticData.getSecond()) {
WeightedThing<Vector> v = searcher.search(mean, 1).get(0);
summarizer.add(v.getWeight());
}
assertTrue(String.format("Median weight [%f] too large [>%f]", summarizer.getMedian(),
DISTRIBUTION_RADIUS), summarizer.getMedian() < DISTRIBUTION_RADIUS);
double clusterTime = (endTime - startTime) / 1000.0;
System.out.printf("%s\n%.2f for clustering\n%.1f us per row\n\n",
searcher.getClass().getName(), clusterTime,
clusterTime / syntheticData.getFirst().size() * 1.0e6);
// Verify that the total weight of the centroids near each corner is correct.
double[] cornerWeights = new double[1 << NUM_DIMENSIONS];
Searcher trueFinder = new BruteSearch(new EuclideanDistanceMeasure());
for (Vector trueCluster : syntheticData.getSecond()) {
trueFinder.add(trueCluster);
}
for (Centroid centroid : clusterer) {
WeightedThing<Vector> closest = trueFinder.search(centroid, 1).get(0);
cornerWeights[((Centroid)closest.getValue()).getIndex()] += centroid.getWeight();
}
int expectedNumPoints = NUM_DATA_POINTS / (1 << NUM_DIMENSIONS);
for (double v : cornerWeights) {
System.out.printf("%f ", v);