if(!parseParameters(args)) {
throw new RuntimeException("Unable to parse the arguments");
}
// set up execution environment
ExecutionEnvironment env = new RemoteEnvironment("localhost", 1, null);
// get input data
DataSet<Point> points = getPointDataSet(env);
DataSet<Centroid> centroids = getCentroidDataSet(env);
// set number of bulk iterations for KMeans algorithm
IterativeDataSet<Centroid> loop = centroids.iterate(numIterations);
DataSet<Centroid> newCentroids = points
// compute closest centroid for each point
.map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids")
// count and sum point coordinates for each centroid
.map(new CountAppender())
.groupBy(0).reduce(new CentroidAccumulator())
// compute new centroids from point counts and coordinate sums
.map(new CentroidAverager());
// feed new centroids back into next iteration
DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids);
DataSet<Tuple2<Integer, Point>> clusteredPoints = points
// assign points to final clusters
.map(new SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids");
// emit result
if(fileOutput) {
clusteredPoints.writeAsCsv(outputPath, "\n", " ");
} else {
clusteredPoints.print();
}
return env.createProgramPlan();
}