generateSamples(100, 0, 2, 0.3);
generateSamples(100, 2, 2, 1);
ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("input/data.txt"), fs, conf);
// Now run the driver using the run() method. Others can use runJob() as before
Integer maxIterations = 5;
AbstractVectorModelDistribution modelDistribution = new SampledNormalDistribution(new VectorWritable(new DenseVector(2)));
String[] args = { optKey(DefaultOptionCreator.INPUT_OPTION), getTestTempDirPath("input").toString(),
optKey(DefaultOptionCreator.OUTPUT_OPTION), getTestTempDirPath("output").toString(),
optKey(DirichletDriver.MODEL_DISTRIBUTION_CLASS_OPTION), modelDistribution.getClass().getName(),
optKey(DirichletDriver.MODEL_PROTOTYPE_CLASS_OPTION), modelDistribution.getModelPrototype().get().getClass().getName(),
optKey(DefaultOptionCreator.NUM_CLUSTERS_OPTION), "20", optKey(DefaultOptionCreator.MAX_ITERATIONS_OPTION),
maxIterations.toString(), optKey(DirichletDriver.ALPHA_OPTION), "1.0", optKey(DefaultOptionCreator.OVERWRITE_OPTION),
optKey(DefaultOptionCreator.CLUSTERING_OPTION), optKey(DefaultOptionCreator.METHOD_OPTION),
DefaultOptionCreator.SEQUENTIAL_METHOD };
new DirichletDriver().run(args);
// and inspect results
Collection<List<DirichletCluster>> clusters = new ArrayList<List<DirichletCluster>>();
Configuration conf = new Configuration();
conf.set(DirichletDriver.MODEL_DISTRIBUTION_KEY, modelDistribution.asJsonString());
conf.set(DirichletDriver.NUM_CLUSTERS_KEY, "20");
conf.set(DirichletDriver.ALPHA_0_KEY, "1.0");
for (int i = 0; i <= maxIterations; i++) {
conf.set(DirichletDriver.STATE_IN_KEY, new Path(getTestTempDirPath("output"), "clusters-" + i).toString());
clusters.add(DirichletMapper.getDirichletState(conf).getClusters());