generateAsymmetricSamples(100, 0, 0, 0.5, 3.0);
generateAsymmetricSamples(100, 0, 3, 0.3, 4.0);
ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("input/data.txt"), fs, conf);
// Now run the driver using the run() method. Others can use runJob() as before
MahalanobisDistanceMeasure measure = new MahalanobisDistanceMeasure();
DistributionDescription description =
new DistributionDescription(DistanceMeasureClusterDistribution.class.getName(),
DenseVector.class.getName(),
MahalanobisDistanceMeasure.class.getName(),
2);
Vector meanVector = new DenseVector(new double[] { 0.0, 0.0 });
measure.setMeanVector(meanVector);
Matrix m= new DenseMatrix(new double [][] {{0.5, 0.0}, {0.0, 4.0}});
measure.setCovarianceMatrix(m);
Path inverseCovarianceFile =
new Path(getTestTempDirPath("mahalanobis"), "MahalanobisDistanceMeasureInverseCovarianceFile");
conf.set("MahalanobisDistanceMeasure.inverseCovarianceFile", inverseCovarianceFile.toString());
FileSystem fs = FileSystem.get(inverseCovarianceFile.toUri(), conf);
MatrixWritable inverseCovarianceMatrix = new MatrixWritable(measure.getInverseCovarianceMatrix());
DataOutputStream out = fs.create(inverseCovarianceFile);
try {
inverseCovarianceMatrix.write(out);
} finally {
Closeables.closeQuietly(out);
}
Path meanVectorFile = new Path(getTestTempDirPath("mahalanobis"), "MahalanobisDistanceMeasureMeanVectorFile");
conf.set("MahalanobisDistanceMeasure.meanVectorFile", meanVectorFile.toString());
fs = FileSystem.get(meanVectorFile.toUri(), conf);
VectorWritable meanVectorWritable = new VectorWritable(meanVector);
out = fs.create(meanVectorFile);
try {
meanVectorWritable.write(out);
} finally {
Closeables.closeQuietly(out);
}
conf.set("MahalanobisDistanceMeasure.maxtrixClass", MatrixWritable.class.getName());
conf.set("MahalanobisDistanceMeasure.vectorClass", VectorWritable.class.getName());
Integer maxIterations = 5;
String[] args = { optKey(DefaultOptionCreator.INPUT_OPTION), getTestTempDirPath("input").toString(),
optKey(DefaultOptionCreator.OUTPUT_OPTION), getTestTempDirPath("output").toString(),
optKey(DirichletDriver.MODEL_DISTRIBUTION_CLASS_OPTION), description.getModelFactory(),
optKey(DefaultOptionCreator.DISTANCE_MEASURE_OPTION), description.getDistanceMeasure(),
optKey(DirichletDriver.MODEL_PROTOTYPE_CLASS_OPTION), description.getModelPrototype(),
optKey(DefaultOptionCreator.NUM_CLUSTERS_OPTION), "20", optKey(DefaultOptionCreator.MAX_ITERATIONS_OPTION),
maxIterations.toString(), optKey(DirichletDriver.ALPHA_OPTION), "1.0",
optKey(DefaultOptionCreator.OVERWRITE_OPTION),
optKey(DefaultOptionCreator.CLUSTERING_OPTION), optKey(DefaultOptionCreator.METHOD_OPTION),
DefaultOptionCreator.SEQUENTIAL_METHOD };
DirichletDriver dirichletDriver = new DirichletDriver();
dirichletDriver.setConf(conf);
dirichletDriver.run(args);
// and inspect results
Collection<List<DirichletCluster>> clusters = Lists.newArrayList();
Configuration conf = new Configuration();
conf.set(DirichletDriver.MODEL_DISTRIBUTION_KEY, description.toString());
conf.set(DirichletDriver.NUM_CLUSTERS_KEY, "20");
conf.set(DirichletDriver.ALPHA_0_KEY, "1.0");
for (int i = 0; i <= maxIterations; i++) {
conf.set(DirichletDriver.STATE_IN_KEY, new Path(getTestTempDirPath("output"), "clusters-" + i).toString());
clusters.add(DirichletMapper.getDirichletState(conf).getClusters());