Package org.apache.mahout.clustering.dirichlet.models

Examples of org.apache.mahout.clustering.dirichlet.models.DistributionDescription


 
  @Test
  public void testDirichlet3() throws Exception {
    Path output = getTestTempDirPath("output");
    NamedVector prototype = (NamedVector) sampleData.get(0).get();
    DistributionDescription description = new DistributionDescription(
        DistanceMeasureClusterDistribution.class.getName(),
        RandomAccessSparseVector.class.getName(),
        ManhattanDistanceMeasure.class.getName(), prototype.getDelegate()
            .size());
    Configuration conf = new Configuration();
View Full Code Here


  }
 
  @Test
  public void testDirichlet() throws Exception {
    ClusteringTestUtils.writePointsToFile(sampleData, new Path(testdata, "file1"), fs, conf);
    DistributionDescription description = new DistributionDescription(GaussianClusterDistribution.class.getName(),
        DenseVector.class.getName(), null, 2);
    DirichletDriver.run(new Configuration(), testdata, output, description, 15, 5, 1.0, true, true, 0.0, true);
    int numIterations = 10;
    Configuration conf = new Configuration();
    Path clustersIn = new Path(output, "clusters-5-final");
View Full Code Here

  }
 
  @Test
  public void testDocs() throws Exception {
    getSampleData(DOCS);
    DistributionDescription description = new DistributionDescription(GaussianClusterDistribution.class.getName(),
        RandomAccessSparseVector.class.getName(), ManhattanDistanceMeasure.class.getName(), sampleData.get(0).size());
   
    List<Cluster> models = Lists.newArrayList();
    ModelDistribution<VectorWritable> modelDist = description.createModelDistribution(new Configuration());
    for (Model<VectorWritable> cluster : modelDist.sampleFromPrior(15)) {
      models.add((Cluster) cluster);
    }
   
    ClusterClassifier classifier = new ClusterClassifier(models, new DirichletClusteringPolicy(15, 1.0));
View Full Code Here

 
  @Test
  public void testDMDocs() throws Exception {
   
    getSampleData(DOCS);
    DistributionDescription description = new DistributionDescription(
        DistanceMeasureClusterDistribution.class.getName(), RandomAccessSparseVector.class.getName(),
        CosineDistanceMeasure.class.getName(), sampleData.get(0).size());
   
    List<Cluster> models = Lists.newArrayList();
    ModelDistribution<VectorWritable> modelDist = description.createModelDistribution(new Configuration());
    for (Model<VectorWritable> cluster : modelDist.sampleFromPrior(15)) {
      models.add((Cluster) cluster);
    }
   
    ClusterClassifier classifier = new ClusterClassifier(models, new DirichletClusteringPolicy(15, 1.0));
View Full Code Here

  }
 
  @Test
  public void testDocs2() throws Exception {
    getSampleData(DOCS2);
    DistributionDescription description = new DistributionDescription(GaussianClusterDistribution.class.getName(),
        RandomAccessSparseVector.class.getName(), ManhattanDistanceMeasure.class.getName(), sampleData.get(0).size());
   
    List<Cluster> models = Lists.newArrayList();
    ModelDistribution<VectorWritable> modelDist = description.createModelDistribution(new Configuration());
    for (Model<VectorWritable> cluster : modelDist.sampleFromPrior(15)) {
      models.add((Cluster) cluster);
    }
   
    ClusterClassifier classifier = new ClusterClassifier(models, new DirichletClusteringPolicy(15, 1.0));
View Full Code Here

 
  @Test
  public void testDMDocs2() throws Exception {
   
    getSampleData(DOCS);
    DistributionDescription description = new DistributionDescription(
        DistanceMeasureClusterDistribution.class.getName(), RandomAccessSparseVector.class.getName(),
        CosineDistanceMeasure.class.getName(), sampleData.get(0).size());
   
    List<Cluster> models = Lists.newArrayList();
    ModelDistribution<VectorWritable> modelDist = description.createModelDistribution(new Configuration());
    for (Model<VectorWritable> cluster : modelDist.sampleFromPrior(15)) {
      models.add((Cluster) cluster);
    }
   
    ClusterClassifier classifier = new ClusterClassifier(models, new DirichletClusteringPolicy(15, 1.0));
View Full Code Here

    generateSamples(100, 2, 2, 1);
    ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("input/data.txt"), fs, conf);
    // Now run the driver using the run() method. Others can use runJob() as
    // before
    Integer maxIterations = 5;
    DistributionDescription description = new DistributionDescription(GaussianClusterDistribution.class.getName(),
        DenseVector.class.getName(), null, 2);
    Path outputPath = getTestTempDirPath("output");
    String[] args = {optKey(DefaultOptionCreator.INPUT_OPTION), getTestTempDirPath("input").toString(),
        optKey(DefaultOptionCreator.OUTPUT_OPTION), outputPath.toString(),
        optKey(DirichletDriver.MODEL_DISTRIBUTION_CLASS_OPTION), description.getModelFactory(),
        optKey(DirichletDriver.MODEL_PROTOTYPE_CLASS_OPTION), description.getModelPrototype(),
        optKey(DefaultOptionCreator.NUM_CLUSTERS_OPTION), "20", optKey(DefaultOptionCreator.MAX_ITERATIONS_OPTION),
        maxIterations.toString(), optKey(DirichletDriver.ALPHA_OPTION), "1.0",
        optKey(DefaultOptionCreator.OVERWRITE_OPTION), optKey(DefaultOptionCreator.CLUSTERING_OPTION),
        optKey(DefaultOptionCreator.METHOD_OPTION), DefaultOptionCreator.SEQUENTIAL_METHOD};
    ToolRunner.run(conf, new DirichletDriver(), args);
View Full Code Here

    generateSamples(100, 2, 2, 1);
    ClusteringTestUtils.writePointsToFile(sampleData, true, getTestTempFilePath("input/data.txt"), fs, conf);
    // Now run the driver using the run() method. Others can use runJob() as
    // before
    Integer maxIterations = 5;
    DistributionDescription description = new DistributionDescription(GaussianClusterDistribution.class.getName(),
        DenseVector.class.getName(), null, 2);
    Path outputPath = getTestTempDirPath("output");
    String[] args = {optKey(DefaultOptionCreator.INPUT_OPTION), getTestTempDirPath("input").toString(),
        optKey(DefaultOptionCreator.OUTPUT_OPTION), outputPath.toString(),
        optKey(DirichletDriver.MODEL_DISTRIBUTION_CLASS_OPTION), description.getModelFactory(),
        optKey(DirichletDriver.MODEL_PROTOTYPE_CLASS_OPTION), description.getModelPrototype(),
        optKey(DefaultOptionCreator.NUM_CLUSTERS_OPTION), "20", optKey(DefaultOptionCreator.MAX_ITERATIONS_OPTION),
        maxIterations.toString(), optKey(DirichletDriver.ALPHA_OPTION), "1.0",
        optKey(DefaultOptionCreator.OVERWRITE_OPTION), optKey(DefaultOptionCreator.CLUSTERING_OPTION)};
    ToolRunner.run(conf, new DirichletDriver(), args);
    // and inspect results
View Full Code Here

    generateAsymmetricSamples(100, 0, 3, 0.3, 4.0);
    ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("input/data.txt"), fs, conf);
    // Now run the driver using the run() method. Others can use runJob() as
    // before
    MahalanobisDistanceMeasure measure = new MahalanobisDistanceMeasure();
    DistributionDescription description = new DistributionDescription(
        DistanceMeasureClusterDistribution.class.getName(), DenseVector.class.getName(),
        MahalanobisDistanceMeasure.class.getName(), 2);
   
    Vector meanVector = new DenseVector(new double[] {0.0, 0.0});
    measure.setMeanVector(meanVector);
    Matrix m = new DenseMatrix(new double[][] { {0.5, 0.0}, {0.0, 4.0}});
    measure.setCovarianceMatrix(m);
   
    Path inverseCovarianceFile = new Path(getTestTempDirPath("mahalanobis"),
        "MahalanobisDistanceMeasureInverseCovarianceFile");
    conf.set("MahalanobisDistanceMeasure.inverseCovarianceFile", inverseCovarianceFile.toString());
    FileSystem fs = FileSystem.get(inverseCovarianceFile.toUri(), conf);
    MatrixWritable inverseCovarianceMatrix = new MatrixWritable(measure.getInverseCovarianceMatrix());
    DataOutputStream out = fs.create(inverseCovarianceFile);
    try {
      inverseCovarianceMatrix.write(out);
    } finally {
      Closeables.close(out, true);
    }
   
    Path meanVectorFile = new Path(getTestTempDirPath("mahalanobis"), "MahalanobisDistanceMeasureMeanVectorFile");
    conf.set("MahalanobisDistanceMeasure.meanVectorFile", meanVectorFile.toString());
    fs = FileSystem.get(meanVectorFile.toUri(), conf);
    VectorWritable meanVectorWritable = new VectorWritable(meanVector);
    out = fs.create(meanVectorFile);
    try {
      meanVectorWritable.write(out);
    } finally {
      Closeables.close(out, true);
    }
   
    conf.set("MahalanobisDistanceMeasure.maxtrixClass", MatrixWritable.class.getName());
    conf.set("MahalanobisDistanceMeasure.vectorClass", VectorWritable.class.getName());
   
    Integer maxIterations = 5;
    Path outputPath = getTestTempDirPath("output");
    String[] args = {optKey(DefaultOptionCreator.INPUT_OPTION), getTestTempDirPath("input").toString(),
        optKey(DefaultOptionCreator.OUTPUT_OPTION), outputPath.toString(),
        optKey(DirichletDriver.MODEL_DISTRIBUTION_CLASS_OPTION), description.getModelFactory(),
        optKey(DefaultOptionCreator.DISTANCE_MEASURE_OPTION), description.getDistanceMeasure(),
        optKey(DirichletDriver.MODEL_PROTOTYPE_CLASS_OPTION), description.getModelPrototype(),
        optKey(DefaultOptionCreator.NUM_CLUSTERS_OPTION), "20", optKey(DefaultOptionCreator.MAX_ITERATIONS_OPTION),
        maxIterations.toString(), optKey(DirichletDriver.ALPHA_OPTION), "1.0",
        optKey(DefaultOptionCreator.OVERWRITE_OPTION), optKey(DefaultOptionCreator.CLUSTERING_OPTION),
        optKey(DefaultOptionCreator.METHOD_OPTION), DefaultOptionCreator.SEQUENTIAL_METHOD};
    DirichletDriver dirichletDriver = new DirichletDriver();
View Full Code Here

    ClusteringTestUtils.writePointsToFile(sampleData, true, getTestTempFilePath("input/data.txt"), fs, conf);
    // Now run the driver using the run() method. Others can use runJob() as
    // before
   
    MahalanobisDistanceMeasure measure = new MahalanobisDistanceMeasure();
    DistributionDescription description = new DistributionDescription(
        DistanceMeasureClusterDistribution.class.getName(), DenseVector.class.getName(),
        MahalanobisDistanceMeasure.class.getName(), 2);
   
    Vector meanVector = new DenseVector(new double[] {0.0, 0.0});
    measure.setMeanVector(meanVector);
    Matrix m = new DenseMatrix(new double[][] { {0.5, 0.0}, {0.0, 4.0}});
    measure.setCovarianceMatrix(m);
   
    Path inverseCovarianceFile = new Path(getTestTempDirPath("mahalanobis"),
        "MahalanobisDistanceMeasureInverseCovarianceFile");
    conf.set("MahalanobisDistanceMeasure.inverseCovarianceFile", inverseCovarianceFile.toString());
    FileSystem fs = FileSystem.get(inverseCovarianceFile.toUri(), conf);
    MatrixWritable inverseCovarianceMatrix = new MatrixWritable(measure.getInverseCovarianceMatrix());
    DataOutputStream out = fs.create(inverseCovarianceFile);
    try {
      inverseCovarianceMatrix.write(out);
    } finally {
      Closeables.close(out, false);
    }
   
    Path meanVectorFile = new Path(getTestTempDirPath("mahalanobis"), "MahalanobisDistanceMeasureMeanVectorFile");
    conf.set("MahalanobisDistanceMeasure.meanVectorFile", meanVectorFile.toString());
    fs = FileSystem.get(meanVectorFile.toUri(), conf);
    VectorWritable meanVectorWritable = new VectorWritable(meanVector);
    out = fs.create(meanVectorFile);
    try {
      meanVectorWritable.write(out);
    } finally {
      Closeables.close(out, false);
    }
   
    conf.set("MahalanobisDistanceMeasure.maxtrixClass", MatrixWritable.class.getName());
    conf.set("MahalanobisDistanceMeasure.vectorClass", VectorWritable.class.getName());
   
    Integer maxIterations = 5;
    Path outputPath = getTestTempDirPath("output");
    String[] args = {optKey(DefaultOptionCreator.INPUT_OPTION), getTestTempDirPath("input").toString(),
        optKey(DefaultOptionCreator.OUTPUT_OPTION), outputPath.toString(),
        optKey(DirichletDriver.MODEL_DISTRIBUTION_CLASS_OPTION), description.getModelFactory(),
        optKey(DefaultOptionCreator.DISTANCE_MEASURE_OPTION), description.getDistanceMeasure(),
        optKey(DirichletDriver.MODEL_PROTOTYPE_CLASS_OPTION), description.getModelPrototype(),
        optKey(DefaultOptionCreator.NUM_CLUSTERS_OPTION), "20", optKey(DefaultOptionCreator.MAX_ITERATIONS_OPTION),
        maxIterations.toString(), optKey(DirichletDriver.ALPHA_OPTION), "1.0",
        optKey(DefaultOptionCreator.OVERWRITE_OPTION), optKey(DefaultOptionCreator.CLUSTERING_OPTION)};
    Tool dirichletDriver = new DirichletDriver();
    dirichletDriver.setConf(conf);
View Full Code Here

TOP

Related Classes of org.apache.mahout.clustering.dirichlet.models.DistributionDescription

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.