*
* @throws Exception
*/
public void testKMeansMapper() throws Exception {
KMeansMapper mapper = new KMeansMapper();
EuclideanDistanceMeasure euclideanDistanceMeasure = new EuclideanDistanceMeasure();
Cluster.config(euclideanDistanceMeasure, 0.001);
List<Vector> points = getPoints(reference);
for (int k = 0; k < points.size(); k++) {
// pick k initial cluster centers at random
DummyOutputCollector<Text, Text> collector = new DummyOutputCollector<Text, Text>();
List<Cluster> clusters = new ArrayList<Cluster>();
for (int i = 0; i < k + 1; i++) {
Cluster cluster = new Cluster(points.get(i));
// add the center so the centroid will be correct upon output
cluster.addPoint(cluster.getCenter());
clusters.add(cluster);
}
Map<String, Cluster> clusterMap = loadClusterMap(clusters);
mapper.config(clusters);
// map the data
for (Vector point : points) {
mapper.map(new Text(), new Text(point.asFormatString()), collector,
null);
}
assertEquals("Number of map results", k + 1, collector.getData().size());
// now verify that all points are correctly allocated
for (String key : collector.getKeys()) {
Cluster cluster = clusterMap.get(key);
List<Text> values = collector.getValue(key);
for (Writable value : values) {
String[] pointInfo = value.toString().split("\t");
Vector point = AbstractVector.decodeVector(pointInfo[1]);
double distance = euclideanDistanceMeasure.distance(cluster
.getCenter(), point);
for (Cluster c : clusters)
assertTrue("distance error", distance <= euclideanDistanceMeasure
.distance(point, c.getCenter()));
}
}
}
}