Package org.apache.mahout.clustering.dirichlet.models

Examples of org.apache.mahout.clustering.dirichlet.models.DistributionDescription


    boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
    boolean runSequential =
        getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
    int prototypeSize = readPrototypeSize(input);

    DistributionDescription description =
        new DistributionDescription(modelFactory, modelPrototype, distanceMeasure, prototypeSize);

    run(getConf(),
        input,
        output,
        description,
View Full Code Here


    generateSamples(100, 0, 2, 0.3);
    generateSamples(100, 2, 2, 1);
    ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("input/data.txt"), fs, conf);
    // Now run the driver using the run() method. Others can use runJob() as before
    Integer maxIterations = 5;
    DistributionDescription description =
        new DistributionDescription(GaussianClusterDistribution.class.getName(),
                                    DenseVector.class.getName(),
                                    null,
                                    2);
    String[] args = { optKey(DefaultOptionCreator.INPUT_OPTION), getTestTempDirPath("input").toString(),
        optKey(DefaultOptionCreator.OUTPUT_OPTION), getTestTempDirPath("output").toString(),
        optKey(DirichletDriver.MODEL_DISTRIBUTION_CLASS_OPTION), description.getModelFactory(),
        optKey(DirichletDriver.MODEL_PROTOTYPE_CLASS_OPTION), description.getModelPrototype(),
        optKey(DefaultOptionCreator.NUM_CLUSTERS_OPTION), "20", optKey(DefaultOptionCreator.MAX_ITERATIONS_OPTION),
        maxIterations.toString(), optKey(DirichletDriver.ALPHA_OPTION), "1.0",
        optKey(DefaultOptionCreator.OVERWRITE_OPTION),
        optKey(DefaultOptionCreator.CLUSTERING_OPTION), optKey(DefaultOptionCreator.METHOD_OPTION),
        DefaultOptionCreator.SEQUENTIAL_METHOD };
    DirichletDriver dirichletDriver = new DirichletDriver();
    dirichletDriver.setConf(conf);
    dirichletDriver.run(args);
    // and inspect results
    Collection<List<DirichletCluster>> clusters = Lists.newArrayList();
    Configuration conf = new Configuration();
    conf.set(DirichletDriver.MODEL_DISTRIBUTION_KEY, description.toString());
    conf.set(DirichletDriver.NUM_CLUSTERS_KEY, "20");
    conf.set(DirichletDriver.ALPHA_0_KEY, "1.0");
    for (int i = 0; i <= maxIterations; i++) {
      conf.set(DirichletDriver.STATE_IN_KEY, new Path(getTestTempDirPath("output"), "clusters-" + i).toString());
      clusters.add(DirichletMapper.getDirichletState(conf).getClusters());
View Full Code Here

    generateSamples(100, 0, 2, 0.3);
    generateSamples(100, 2, 2, 1);
    ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("input/data.txt"), fs, conf);
    // Now run the driver using the run() method. Others can use runJob() as before
    Integer maxIterations = 5;
    DistributionDescription description =
        new DistributionDescription(GaussianClusterDistribution.class.getName(),
                                    DenseVector.class.getName(),
                                    null,
                                    2);
    String[] args = { optKey(DefaultOptionCreator.INPUT_OPTION), getTestTempDirPath("input").toString(),
        optKey(DefaultOptionCreator.OUTPUT_OPTION), getTestTempDirPath("output").toString(),
        optKey(DirichletDriver.MODEL_DISTRIBUTION_CLASS_OPTION), description.getModelFactory(),
        optKey(DirichletDriver.MODEL_PROTOTYPE_CLASS_OPTION), description.getModelPrototype(),
        optKey(DefaultOptionCreator.NUM_CLUSTERS_OPTION), "20", optKey(DefaultOptionCreator.MAX_ITERATIONS_OPTION),
        maxIterations.toString(), optKey(DirichletDriver.ALPHA_OPTION), "1.0",
        optKey(DefaultOptionCreator.OVERWRITE_OPTION),
        optKey(DefaultOptionCreator.CLUSTERING_OPTION)};
    ToolRunner.run(new Configuration(), new DirichletDriver(), args);
    // and inspect results
    Collection<List<DirichletCluster>> clusters = Lists.newArrayList();
    Configuration conf = new Configuration();
    conf.set(DirichletDriver.MODEL_DISTRIBUTION_KEY, description.toString());
    conf.set(DirichletDriver.NUM_CLUSTERS_KEY, "20");
    conf.set(DirichletDriver.ALPHA_0_KEY, "1.0");
    for (int i = 0; i <= maxIterations; i++) {
      conf.set(DirichletDriver.STATE_IN_KEY, new Path(getTestTempDirPath("output"), "clusters-" + i).toString());
      clusters.add(DirichletMapper.getDirichletState(conf).getClusters());
View Full Code Here

  @Test
  public void testDriverMnRIterations() throws Exception {
    generate4Datasets();
    // Now run the driver
    int maxIterations = 3;
    DistributionDescription description =
        new DistributionDescription(GaussianClusterDistribution.class.getName(),
                                    DenseVector.class.getName(),
                                    null,
                                    2);
    Configuration conf = new Configuration();
    DirichletDriver.run(conf,
                        getTestTempDirPath("input"),
                        getTestTempDirPath("output"),
                        description,
                        20,
                        maxIterations,
                        1.0,
                        false,
                        true,
                        0,
                        false);
    // and inspect results
    Collection<List<DirichletCluster>> clusters = Lists.newArrayList();
    conf.set(DirichletDriver.MODEL_DISTRIBUTION_KEY, description.toString());
    conf.set(DirichletDriver.NUM_CLUSTERS_KEY, "20");
    conf.set(DirichletDriver.ALPHA_0_KEY, "1.0");
    for (int i = 0; i <= maxIterations; i++) {
      conf.set(DirichletDriver.STATE_IN_KEY, new Path(getTestTempDirPath("output"), "clusters-" + i).toString());
      clusters.add(DirichletMapper.getDirichletState(conf).getClusters());
View Full Code Here

    generateAsymmetricSamples(100, 0, 0, 0.5, 3.0);
    generateAsymmetricSamples(100, 0, 3, 0.3, 4.0);
    ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("input/data.txt"), fs, conf);
    // Now run the driver using the run() method. Others can use runJob() as before
    MahalanobisDistanceMeasure measure = new MahalanobisDistanceMeasure();
    DistributionDescription description =
        new DistributionDescription(DistanceMeasureClusterDistribution.class.getName(),
                                    DenseVector.class.getName(),
                                    MahalanobisDistanceMeasure.class.getName(),
                                    2);

    Vector meanVector = new DenseVector(new double[] { 0.0, 0.0 });
    measure.setMeanVector(meanVector);
    Matrix m= new DenseMatrix(new double [][] {{0.5, 0.0}, {0.0, 4.0}});
    measure.setCovarianceMatrix(m);

    Path inverseCovarianceFile =
        new Path(getTestTempDirPath("mahalanobis"), "MahalanobisDistanceMeasureInverseCovarianceFile");
    conf.set("MahalanobisDistanceMeasure.inverseCovarianceFile", inverseCovarianceFile.toString());
    FileSystem fs = FileSystem.get(inverseCovarianceFile.toUri(), conf);
    MatrixWritable inverseCovarianceMatrix = new MatrixWritable(measure.getInverseCovarianceMatrix());
    DataOutputStream out = fs.create(inverseCovarianceFile);
    try {
      inverseCovarianceMatrix.write(out);
    } finally {
      Closeables.closeQuietly(out);
    }

    Path meanVectorFile = new Path(getTestTempDirPath("mahalanobis"), "MahalanobisDistanceMeasureMeanVectorFile");
    conf.set("MahalanobisDistanceMeasure.meanVectorFile", meanVectorFile.toString());
    fs = FileSystem.get(meanVectorFile.toUri(), conf);
    VectorWritable meanVectorWritable = new VectorWritable(meanVector);
    out = fs.create(meanVectorFile);
    try {
      meanVectorWritable.write(out);
    } finally {
      Closeables.closeQuietly(out);
    }

    conf.set("MahalanobisDistanceMeasure.maxtrixClass", MatrixWritable.class.getName());
    conf.set("MahalanobisDistanceMeasure.vectorClass", VectorWritable.class.getName());

    Integer maxIterations = 5;
    String[] args = { optKey(DefaultOptionCreator.INPUT_OPTION), getTestTempDirPath("input").toString(),
        optKey(DefaultOptionCreator.OUTPUT_OPTION), getTestTempDirPath("output").toString(),
        optKey(DirichletDriver.MODEL_DISTRIBUTION_CLASS_OPTION), description.getModelFactory(),
        optKey(DefaultOptionCreator.DISTANCE_MEASURE_OPTION), description.getDistanceMeasure(),
        optKey(DirichletDriver.MODEL_PROTOTYPE_CLASS_OPTION), description.getModelPrototype(),
        optKey(DefaultOptionCreator.NUM_CLUSTERS_OPTION), "20", optKey(DefaultOptionCreator.MAX_ITERATIONS_OPTION),
        maxIterations.toString(), optKey(DirichletDriver.ALPHA_OPTION), "1.0",
        optKey(DefaultOptionCreator.OVERWRITE_OPTION),
        optKey(DefaultOptionCreator.CLUSTERING_OPTION), optKey(DefaultOptionCreator.METHOD_OPTION),
        DefaultOptionCreator.SEQUENTIAL_METHOD };
    DirichletDriver dirichletDriver = new DirichletDriver();
    dirichletDriver.setConf(conf);
    dirichletDriver.run(args);
    // and inspect results
    Collection<List<DirichletCluster>> clusters = Lists.newArrayList();
    Configuration conf = new Configuration();
    conf.set(DirichletDriver.MODEL_DISTRIBUTION_KEY, description.toString());
    conf.set(DirichletDriver.NUM_CLUSTERS_KEY, "20");
    conf.set(DirichletDriver.ALPHA_0_KEY, "1.0");
    for (int i = 0; i <= maxIterations; i++) {
      conf.set(DirichletDriver.STATE_IN_KEY, new Path(getTestTempDirPath("output"), "clusters-" + i).toString());
      clusters.add(DirichletMapper.getDirichletState(conf).getClusters());
View Full Code Here

    generateAsymmetricSamples(100, 0, 3, 0.3, 4.0);
    ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("input/data.txt"), fs, conf);
    // Now run the driver using the run() method. Others can use runJob() as before

    MahalanobisDistanceMeasure measure = new MahalanobisDistanceMeasure();
    DistributionDescription description =
        new DistributionDescription(DistanceMeasureClusterDistribution.class.getName(),
                                    DenseVector.class.getName(),
                                    MahalanobisDistanceMeasure.class.getName(),
                                    2);

    Vector meanVector = new DenseVector(new double[]{0.0, 0.0});
    measure.setMeanVector(meanVector);
    Matrix m = new DenseMatrix(new double [][] {{0.5, 0.0}, {0.0, 4.0}});
    measure.setCovarianceMatrix(m);

    Path inverseCovarianceFile =
        new Path(getTestTempDirPath("mahalanobis"), "MahalanobisDistanceMeasureInverseCovarianceFile");
    conf.set("MahalanobisDistanceMeasure.inverseCovarianceFile", inverseCovarianceFile.toString());
    FileSystem fs = FileSystem.get(inverseCovarianceFile.toUri(), conf);
    MatrixWritable inverseCovarianceMatrix = new MatrixWritable(measure.getInverseCovarianceMatrix());
    DataOutputStream out = fs.create(inverseCovarianceFile);
    try {
      inverseCovarianceMatrix.write(out);
    } finally {
      Closeables.closeQuietly(out);
    }

    Path meanVectorFile = new Path(getTestTempDirPath("mahalanobis"), "MahalanobisDistanceMeasureMeanVectorFile");
    conf.set("MahalanobisDistanceMeasure.meanVectorFile", meanVectorFile.toString());
    fs = FileSystem.get(meanVectorFile.toUri(), conf);
    VectorWritable meanVectorWritable = new VectorWritable(meanVector);
    out = fs.create(meanVectorFile);
    try {
      meanVectorWritable.write(out);
    } finally {
      Closeables.closeQuietly(out);
    }

    conf.set("MahalanobisDistanceMeasure.maxtrixClass", MatrixWritable.class.getName());
    conf.set("MahalanobisDistanceMeasure.vectorClass", VectorWritable.class.getName());

    Integer maxIterations = 5;
    String[] args = { optKey(DefaultOptionCreator.INPUT_OPTION), getTestTempDirPath("input").toString(),
        optKey(DefaultOptionCreator.OUTPUT_OPTION), getTestTempDirPath("output").toString(),
        optKey(DirichletDriver.MODEL_DISTRIBUTION_CLASS_OPTION), description.getModelFactory(),
        optKey(DefaultOptionCreator.DISTANCE_MEASURE_OPTION), description.getDistanceMeasure(),
        optKey(DirichletDriver.MODEL_PROTOTYPE_CLASS_OPTION), description.getModelPrototype(),
        optKey(DefaultOptionCreator.NUM_CLUSTERS_OPTION), "20", optKey(DefaultOptionCreator.MAX_ITERATIONS_OPTION),
        maxIterations.toString(), optKey(DirichletDriver.ALPHA_OPTION), "1.0",
        optKey(DefaultOptionCreator.OVERWRITE_OPTION),
        optKey(DefaultOptionCreator.CLUSTERING_OPTION)};
    Tool dirichletDriver = new DirichletDriver();
    dirichletDriver.setConf(conf);
    ToolRunner.run(conf, dirichletDriver, args);
    // and inspect results
    Collection<List<DirichletCluster>> clusters = Lists.newArrayList();
    Configuration conf = new Configuration();
    conf.set(DirichletDriver.MODEL_DISTRIBUTION_KEY, description.toString());
    conf.set(DirichletDriver.NUM_CLUSTERS_KEY, "20");
    conf.set(DirichletDriver.ALPHA_0_KEY, "1.0");
    for (int i = 0; i <= maxIterations; i++) {
      conf.set(DirichletDriver.STATE_IN_KEY, new Path(getTestTempDirPath("output"), "clusters-" + i).toString());
      clusters.add(DirichletMapper.getDirichletState(conf).getClusters());
View Full Code Here

 
  @Test
  public void testDirichlet2() throws Exception {
    Path output = getTestTempDirPath("output");
    NamedVector prototype = (NamedVector) sampleData.get(0).get();
    DistributionDescription description = new DistributionDescription(
        GaussianClusterDistribution.class.getName(),
        RandomAccessSparseVector.class.getName(), null, prototype.getDelegate()
            .size());
    Configuration conf = new Configuration();
    DirichletDriver.run(conf, getTestTempDirPath("testdata"), output,
View Full Code Here

 
  @Test
  public void testDirichlet3() throws Exception {
    Path output = getTestTempDirPath("output");
    NamedVector prototype = (NamedVector) sampleData.get(0).get();
    DistributionDescription description = new DistributionDescription(
        DistanceMeasureClusterDistribution.class.getName(),
        RandomAccessSparseVector.class.getName(),
        ManhattanDistanceMeasure.class.getName(), prototype.getDelegate()
            .size());
    Configuration conf = new Configuration();
View Full Code Here

      ToolRunner.run(new Configuration(), new Job(), args);
    } else {
      log.info("Running with default arguments");
      Path output = new Path("output");
      HadoopUtil.delete(new Configuration(), output);
      DistributionDescription description =
          new DistributionDescription(GaussianClusterDistribution.class.getName(),
                                      RandomAccessSparseVector.class.getName(),
                                      null,
                                      60);
      run(new Path("testdata"), output, description, 10, 5, 1.0, true, 0);
    }
View Full Code Here

    int numModels = Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION));
    int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
    boolean emitMostLikely = Boolean.parseBoolean(getOption(DefaultOptionCreator.EMIT_MOST_LIKELY_OPTION));
    double threshold = Double.parseDouble(getOption(DefaultOptionCreator.THRESHOLD_OPTION));
    double alpha0 = Double.parseDouble(getOption(DirichletDriver.ALPHA_OPTION));
    DistributionDescription description =
        new DistributionDescription(modelFactory, modelPrototype, distanceMeasure, 60);

    run(input, output, description, numModels, maxIterations, alpha0, emitMostLikely, threshold);
    return 0;
  }
View Full Code Here

TOP

Related Classes of org.apache.mahout.clustering.dirichlet.models.DistributionDescription

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.