generateAsymmetricSamples(100, 0, 3, 0.3, 4.0);
ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("input/data.txt"), fs, conf);
// Now run the driver using the run() method. Others can use runJob() as
// before
MahalanobisDistanceMeasure measure = new MahalanobisDistanceMeasure();
DistributionDescription description = new DistributionDescription(
DistanceMeasureClusterDistribution.class.getName(), DenseVector.class.getName(),
MahalanobisDistanceMeasure.class.getName(), 2);
Vector meanVector = new DenseVector(new double[] {0.0, 0.0});
measure.setMeanVector(meanVector);
Matrix m = new DenseMatrix(new double[][] { {0.5, 0.0}, {0.0, 4.0}});
measure.setCovarianceMatrix(m);
Path inverseCovarianceFile = new Path(getTestTempDirPath("mahalanobis"),
"MahalanobisDistanceMeasureInverseCovarianceFile");
conf.set("MahalanobisDistanceMeasure.inverseCovarianceFile", inverseCovarianceFile.toString());
FileSystem fs = FileSystem.get(inverseCovarianceFile.toUri(), conf);
MatrixWritable inverseCovarianceMatrix = new MatrixWritable(measure.getInverseCovarianceMatrix());
DataOutputStream out = fs.create(inverseCovarianceFile);
try {
inverseCovarianceMatrix.write(out);
} finally {
Closeables.close(out, true);
}
Path meanVectorFile = new Path(getTestTempDirPath("mahalanobis"), "MahalanobisDistanceMeasureMeanVectorFile");
conf.set("MahalanobisDistanceMeasure.meanVectorFile", meanVectorFile.toString());
fs = FileSystem.get(meanVectorFile.toUri(), conf);
VectorWritable meanVectorWritable = new VectorWritable(meanVector);
out = fs.create(meanVectorFile);
try {
meanVectorWritable.write(out);
} finally {
Closeables.close(out, true);
}
conf.set("MahalanobisDistanceMeasure.maxtrixClass", MatrixWritable.class.getName());
conf.set("MahalanobisDistanceMeasure.vectorClass", VectorWritable.class.getName());
Integer maxIterations = 5;
Path outputPath = getTestTempDirPath("output");
String[] args = {optKey(DefaultOptionCreator.INPUT_OPTION), getTestTempDirPath("input").toString(),
optKey(DefaultOptionCreator.OUTPUT_OPTION), outputPath.toString(),
optKey(DirichletDriver.MODEL_DISTRIBUTION_CLASS_OPTION), description.getModelFactory(),
optKey(DefaultOptionCreator.DISTANCE_MEASURE_OPTION), description.getDistanceMeasure(),
optKey(DirichletDriver.MODEL_PROTOTYPE_CLASS_OPTION), description.getModelPrototype(),
optKey(DefaultOptionCreator.NUM_CLUSTERS_OPTION), "20", optKey(DefaultOptionCreator.MAX_ITERATIONS_OPTION),
maxIterations.toString(), optKey(DirichletDriver.ALPHA_OPTION), "1.0",
optKey(DefaultOptionCreator.OVERWRITE_OPTION), optKey(DefaultOptionCreator.CLUSTERING_OPTION),
optKey(DefaultOptionCreator.METHOD_OPTION), DefaultOptionCreator.SEQUENTIAL_METHOD};
DirichletDriver dirichletDriver = new DirichletDriver();