System.out.println("testKFuzzyKMeansMRJob k= " + k);
// pick k initial cluster centers at random
List<SoftCluster> clusterList = new ArrayList<SoftCluster>();
for (int i = 0; i < k + 1; i++) {
Vector vec = tweakValue(points.get(i).get());
SoftCluster cluster = new SoftCluster(vec, i);
cluster.addPoint(cluster.getCenter(), 1);
clusterList.add(cluster);
}
// run mapper
FuzzyKMeansMapper mapper = new FuzzyKMeansMapper();
mapper.config(clusterList);
JobConf conf = new JobConf();
conf.set(FuzzyKMeansConfigKeys.DISTANCE_MEASURE_KEY,
"org.apache.mahout.common.distance.EuclideanDistanceMeasure");
conf.set(FuzzyKMeansConfigKeys.CLUSTER_CONVERGENCE_KEY, "0.001");
conf.set(FuzzyKMeansConfigKeys.M_KEY, "2");
mapper.configure(conf);
DummyOutputCollector<Text,FuzzyKMeansInfo> mapCollector = new DummyOutputCollector<Text,FuzzyKMeansInfo>();
for (VectorWritable point : points) {
mapper.map(new Text(), point, mapCollector, null);
}
for (SoftCluster softCluster : clusterList) {
softCluster.recomputeCenter();
}
// run combiner
DummyOutputCollector<Text,FuzzyKMeansInfo> combinerCollector = new DummyOutputCollector<Text,FuzzyKMeansInfo>();
FuzzyKMeansCombiner combiner = new FuzzyKMeansCombiner();
combiner.configure(conf);
for (String key : mapCollector.getKeys()) {
List<FuzzyKMeansInfo> values = mapCollector.getValue(key);
combiner.reduce(new Text(key), values.iterator(), combinerCollector, null);
}
// run reducer
DummyOutputCollector<Text,SoftCluster> reducerCollector = new DummyOutputCollector<Text,SoftCluster>();
FuzzyKMeansReducer reducer = new FuzzyKMeansReducer();
reducer.config(clusterList);
reducer.configure(conf);
for (String key : combinerCollector.getKeys()) {
List<FuzzyKMeansInfo> values = combinerCollector.getValue(key);
reducer.reduce(new Text(key), values.iterator(), reducerCollector, null);
}
// run clusterMapper
List<SoftCluster> reducerCluster = new ArrayList<SoftCluster>();
for (String key : reducerCollector.getKeys()) {
List<SoftCluster> values = reducerCollector.getValue(key);
reducerCluster.add(values.get(0));
}
for (SoftCluster softCluster : reducerCluster) {
softCluster.recomputeCenter();
}
DummyOutputCollector<Text,FuzzyKMeansOutput> clusterMapperCollector = new DummyOutputCollector<Text,FuzzyKMeansOutput>();
FuzzyKMeansClusterMapper clusterMapper = new FuzzyKMeansClusterMapper();
clusterMapper.config(reducerCluster);
clusterMapper.configure(conf);
for (VectorWritable point : points) {
clusterMapper.map(new Text(), point, clusterMapperCollector, null);
}
// now run for one iteration of referencefuzzykmeans and compare the
// results
// compute the reference result after one iteration and compare
List<SoftCluster> reference = new ArrayList<SoftCluster>();
for (int i = 0; i < k + 1; i++) {
Vector vec = tweakValue(points.get(i).get());
reference.add(new SoftCluster(vec, i));
}
Map<String,String> pointClusterInfo = new HashMap<String,String>();
List<Vector> pointsVectors = new ArrayList<Vector>();
for(VectorWritable point : points)