Package org.apache.mahout.clustering.dirichlet.models

Examples of org.apache.mahout.clustering.dirichlet.models.DistributionDescription


 
  @Test
  public void testDirichlet() throws Exception {
    ClusteringTestUtils.writePointsToFile(sampleData, new Path(testdata,
        "file1"), fs, conf);
    DistributionDescription description = new DistributionDescription(
        GaussianClusterDistribution.class.getName(),
        DenseVector.class.getName(), null, 2);
    DirichletDriver.run(testdata, output, description, 15, 5, 1.0, true, true,
        0, true);
    int numIterations = 10;
View Full Code Here


 
  @Test
  public void testDirichlet() throws Exception {
    ClusteringTestUtils.writePointsToFile(sampleData,
        getTestTempFilePath("testdata/file1"), fs, conf);
    DistributionDescription description = new DistributionDescription(
        GaussianClusterDistribution.class.getName(),
        DenseVector.class.getName(), null, 2);
    DirichletDriver.run(testdata, output, description, 15, 5, 1.0, true, true,
        0, true);
    int numIterations = 10;
View Full Code Here

      ToolRunner.run(new Configuration(), new Job(), args);
    } else {
      log.info("Running with default arguments");
      Path output = new Path("output");
      HadoopUtil.delete(new Configuration(), output);
      DistributionDescription description = new DistributionDescription(GaussianClusterDistribution.class.getName(),
          RandomAccessSparseVector.class.getName(), null, 60);
      run(new Path("testdata"), output, description, 10, 5, 1.0, true, 0);
    }
  }
View Full Code Here

    int numModels = Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION));
    int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
    boolean emitMostLikely = Boolean.parseBoolean(getOption(DefaultOptionCreator.EMIT_MOST_LIKELY_OPTION));
    double threshold = Double.parseDouble(getOption(DefaultOptionCreator.THRESHOLD_OPTION));
    double alpha0 = Double.parseDouble(getOption(DirichletDriver.ALPHA_OPTION));
    DistributionDescription description = new DistributionDescription(modelFactory, modelPrototype, distanceMeasure,
        60);
   
    run(input, output, description, numModels, maxIterations, alpha0, emitMostLikely, threshold);
    return 0;
  }
View Full Code Here

    boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
    boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
        DefaultOptionCreator.SEQUENTIAL_METHOD);
    int prototypeSize = readPrototypeSize(input);
   
    DistributionDescription description = new DistributionDescription(modelFactory, modelPrototype, distanceMeasure,
        prototypeSize);
   
    run(getConf(), input, output, description, numModels, maxIterations, alpha0, runClustering, emitMostLikely,
        threshold, runSequential);
    return 0;
View Full Code Here

    ClusteringTestUtils.writePointsToFile(sampleData,
            getTestTempFilePath("testdata/file1"), fs, conf);

    DenseVector prototype = (DenseVector) sampleData.get(0).get();
   
    DistributionDescription description = new DistributionDescription(
        DistanceMeasureClusterDistribution.class.getName(),
        RandomAccessSparseVector.class.getName(),
        ManhattanDistanceMeasure.class.getName(), prototype.size());
   
    DirichletDriver.run(conf, getTestTempDirPath("testdata"), output,
View Full Code Here

    ClusteringTestUtils.writePointsToFile(sampleData, true,
            getTestTempFilePath("testdata/file1"), fs, conf);

    DenseVector prototype = (DenseVector) sampleData.get(0).get();
   
    DistributionDescription description = new DistributionDescription(
        DistanceMeasureClusterDistribution.class.getName(),
        RandomAccessSparseVector.class.getName(),
        ManhattanDistanceMeasure.class.getName(), prototype.size());
   
    DirichletDriver.run(conf, getTestTempDirPath("testdata"), output,
View Full Code Here

  }
 
  @Test
  public void testDirichlet() throws Exception {
    ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("testdata/file1"), fs, conf);
    DistributionDescription description = new DistributionDescription(GaussianClusterDistribution.class.getName(),
        DenseVector.class.getName(), null, 2);
    DirichletDriver.run(new Configuration(), testdata, output, description, 15, 5, 1.0, true, true, 0.0, true);
    int numIterations = 10;
    Path clustersIn = new Path(output, "clusters-0");
    RepresentativePointsDriver.run(conf, clustersIn, new Path(output, "clusteredPoints"), output,
View Full Code Here

  }
 
  private static void runSequentialDirichletClusterer(Path input, Path output,
      ModelDistribution<VectorWritable> modelDist, int numClusters, int numIterations, double alpha0)
    throws IOException, ClassNotFoundException, InterruptedException {
    DistributionDescription description = new DistributionDescription(modelDist.getClass().getName(),
        RandomAccessSparseVector.class.getName(), ManhattanDistanceMeasure.class.getName(), 2);
   
    DirichletDriver.run(new Configuration(), input, output, description, numClusters, numIterations, alpha0, true,
        true, 0, true);
  }
View Full Code Here

 
  @Test
  public void testDirichlet2() throws Exception {
    Path output = getTestTempDirPath("output");
    NamedVector prototype = (NamedVector) sampleData.get(0).get();
    DistributionDescription description = new DistributionDescription(
        GaussianClusterDistribution.class.getName(),
        RandomAccessSparseVector.class.getName(), null, prototype.getDelegate()
            .size());
    Configuration conf = new Configuration();
    DirichletDriver.run(conf, getTestTempDirPath("testdata"), output,
View Full Code Here

TOP

Related Classes of org.apache.mahout.clustering.dirichlet.models.DistributionDescription

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.