}
}
@Test
public void testClustering() {
UpdatableSearcher searcher = new BruteSearch(new SquaredEuclideanDistanceMeasure());
BallKMeans clusterer = new BallKMeans(searcher, 1 << NUM_DIMENSIONS, NUM_ITERATIONS);
long startTime = System.currentTimeMillis();
Pair<List<Centroid>, List<Centroid>> data = syntheticData;
clusterer.cluster(data.getFirst());
long endTime = System.currentTimeMillis();
long hash = 0;
for (Centroid centroid : data.getFirst()) {
for (Vector.Element element : centroid.all()) {
hash = 31 * hash + 17 * element.index() + Double.toHexString(element.get()).hashCode();
}
}
System.out.printf("Hash = %08x\n", hash);
assertEquals("Total weight not preserved", totalWeight(syntheticData.getFirst()), totalWeight(clusterer), 1.0e-9);
// Verify that each corner of the cube has a centroid very nearby.
// This is probably FALSE for large-dimensional spaces!
OnlineSummarizer summarizer = new OnlineSummarizer();
for (Vector mean : syntheticData.getSecond()) {
WeightedThing<Vector> v = searcher.search(mean, 1).get(0);
summarizer.add(v.getWeight());
}
assertTrue(String.format("Median weight [%f] too large [>%f]", summarizer.getMedian(),
DISTRIBUTION_RADIUS), summarizer.getMedian() < DISTRIBUTION_RADIUS);
double clusterTime = (endTime - startTime) / 1000.0;
System.out.printf("%s\n%.2f for clustering\n%.1f us per row\n\n",
searcher.getClass().getName(), clusterTime,
clusterTime / syntheticData.getFirst().size() * 1.0e6);
// Verify that the total weight of the centroids near each corner is correct.
double[] cornerWeights = new double[1 << NUM_DIMENSIONS];
Searcher trueFinder = new BruteSearch(new EuclideanDistanceMeasure());