Package org.apache.mahout.clustering.dirichlet.models

Examples of org.apache.mahout.clustering.dirichlet.models.AbstractVectorModelDistribution


    boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
    boolean runSequential =
        getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
    int prototypeSize = readPrototypeSize(input);

    AbstractVectorModelDistribution modelDistribution = createModelDistribution(modelFactory,
                                                                                modelPrototype,
                                                                                distanceMeasure,
                                                                                prototypeSize);

    run(getConf(),
View Full Code Here


    throws ClassNotFoundException, InstantiationException, IllegalAccessException,
    NoSuchMethodException, InvocationTargetException {
    ClassLoader ccl = Thread.currentThread().getContextClassLoader();
    Class<? extends AbstractVectorModelDistribution> cl = ccl.loadClass(modelFactory)
        .asSubclass(AbstractVectorModelDistribution.class);
    AbstractVectorModelDistribution modelDistribution = cl.newInstance();

    Class<? extends Vector> vcl = ccl.loadClass(modelPrototype).asSubclass(Vector.class);
    Constructor<? extends Vector> v = vcl.getConstructor(int.class);
    modelDistribution.setModelPrototype(new VectorWritable(v.newInstance(prototypeSize)));

    if (modelDistribution instanceof DistanceMeasureClusterDistribution) {
      Class<? extends DistanceMeasure> measureCl = ccl.loadClass(distanceMeasure).asSubclass(DistanceMeasure.class);
      DistanceMeasure measure = measureCl.newInstance();
      ((DistanceMeasureClusterDistribution) modelDistribution).setMeasure(measure);
View Full Code Here

  @Test
  public void testDirichlet() throws Exception {
    Path output = getTestTempDirPath("output");
    NamedVector prototype = (NamedVector) sampleData.get(0).get();
    AbstractVectorModelDistribution modelDistribution = new SampledNormalDistribution(new VectorWritable(prototype));
    Configuration conf = new Configuration();
    DirichletDriver.run(conf, getTestTempDirPath("testdata"), output, modelDistribution, 15, 10, 1.0, true, true, 0, false);
    // run ClusterDumper
    ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(conf, output, 10), new Path(output, "clusteredPoints"));
    clusterDumper.printClusters(termDictionary);
View Full Code Here

  @Test
  public void testDirichlet2() throws Exception {
    Path output = getTestTempDirPath("output");
    NamedVector prototype = (NamedVector) sampleData.get(0).get();
    AbstractVectorModelDistribution modelDistribution = new GaussianClusterDistribution(new VectorWritable(prototype));
    Configuration conf = new Configuration();
    DirichletDriver.run(conf, getTestTempDirPath("testdata"), output, modelDistribution, 15, 10, 1.0, true, true, 0, true);
    // run ClusterDumper
    ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(conf, output, 10), new Path(output, "clusteredPoints"));
    clusterDumper.printClusters(termDictionary);
View Full Code Here

  @Test
  public void testDirichlet3() throws Exception {
    Path output = getTestTempDirPath("output");
    NamedVector prototype = (NamedVector) sampleData.get(0).get();
    AbstractVectorModelDistribution modelDistribution = new DistanceMeasureClusterDistribution(new VectorWritable(prototype));
    Configuration conf = new Configuration();
    DirichletDriver.run(conf, getTestTempDirPath("testdata"), output, modelDistribution, 15, 10, 1.0, true, true, 0, true);
    // run ClusterDumper
    ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(conf, output, 10), new Path(output, "clusteredPoints"));
    clusterDumper.printClusters(termDictionary);
View Full Code Here

    generateSamples(100, 0, 2, 0.3);
    generateSamples(100, 2, 2, 1);
    ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("input/data.txt"), fs, conf);
    // Now run the driver using the run() method. Others can use runJob() as before
    Integer maxIterations = 5;
    AbstractVectorModelDistribution modelDistribution = new SampledNormalDistribution(new VectorWritable(new DenseVector(2)));
    String[] args = { optKey(DefaultOptionCreator.INPUT_OPTION), getTestTempDirPath("input").toString(),
        optKey(DefaultOptionCreator.OUTPUT_OPTION), getTestTempDirPath("output").toString(),
        optKey(DirichletDriver.MODEL_DISTRIBUTION_CLASS_OPTION), modelDistribution.getClass().getName(),
        optKey(DirichletDriver.MODEL_PROTOTYPE_CLASS_OPTION), modelDistribution.getModelPrototype().get().getClass().getName(),
        optKey(DefaultOptionCreator.NUM_CLUSTERS_OPTION), "20", optKey(DefaultOptionCreator.MAX_ITERATIONS_OPTION),
        maxIterations.toString(), optKey(DirichletDriver.ALPHA_OPTION), "1.0", optKey(DefaultOptionCreator.OVERWRITE_OPTION),
        optKey(DefaultOptionCreator.CLUSTERING_OPTION), optKey(DefaultOptionCreator.METHOD_OPTION),
        DefaultOptionCreator.SEQUENTIAL_METHOD };
    new DirichletDriver().run(args);
    // and inspect results
    Collection<List<DirichletCluster>> clusters = new ArrayList<List<DirichletCluster>>();
    Configuration conf = new Configuration();
    conf.set(DirichletDriver.MODEL_DISTRIBUTION_KEY, modelDistribution.asJsonString());
    conf.set(DirichletDriver.NUM_CLUSTERS_KEY, "20");
    conf.set(DirichletDriver.ALPHA_0_KEY, "1.0");
    for (int i = 0; i <= maxIterations; i++) {
      conf.set(DirichletDriver.STATE_IN_KEY, new Path(getTestTempDirPath("output"), "clusters-" + i).toString());
      clusters.add(DirichletMapper.getDirichletState(conf).getClusters());
View Full Code Here

    generateSamples(100, 0, 2, 0.3);
    generateSamples(100, 2, 2, 1);
    ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("input/data.txt"), fs, conf);
    // Now run the driver using the run() method. Others can use runJob() as before
    Integer maxIterations = 5;
    AbstractVectorModelDistribution modelDistribution = new SampledNormalDistribution(new VectorWritable(new DenseVector(2)));
    String[] args = { optKey(DefaultOptionCreator.INPUT_OPTION), getTestTempDirPath("input").toString(),
        optKey(DefaultOptionCreator.OUTPUT_OPTION), getTestTempDirPath("output").toString(),
        optKey(DirichletDriver.MODEL_DISTRIBUTION_CLASS_OPTION), modelDistribution.getClass().getName(),
        optKey(DirichletDriver.MODEL_PROTOTYPE_CLASS_OPTION), modelDistribution.getModelPrototype().get().getClass().getName(),
        optKey(DefaultOptionCreator.NUM_CLUSTERS_OPTION), "20", optKey(DefaultOptionCreator.MAX_ITERATIONS_OPTION),
        maxIterations.toString(), optKey(DirichletDriver.ALPHA_OPTION), "1.0", optKey(DefaultOptionCreator.OVERWRITE_OPTION),
        optKey(DefaultOptionCreator.CLUSTERING_OPTION) };
    ToolRunner.run(new Configuration(), new DirichletDriver(), args);
    // and inspect results
    Collection<List<DirichletCluster>> clusters = new ArrayList<List<DirichletCluster>>();
    Configuration conf = new Configuration();
    conf.set(DirichletDriver.MODEL_DISTRIBUTION_KEY, modelDistribution.asJsonString());
    conf.set(DirichletDriver.NUM_CLUSTERS_KEY, "20");
    conf.set(DirichletDriver.ALPHA_0_KEY, "1.0");
    for (int i = 0; i <= maxIterations; i++) {
      conf.set(DirichletDriver.STATE_IN_KEY, new Path(getTestTempDirPath("output"), "clusters-" + i).toString());
      clusters.add(DirichletMapper.getDirichletState(conf).getClusters());
View Full Code Here

  @Test
  public void testDriverMnRIterations() throws Exception {
    generate4Datasets();
    // Now run the driver
    int maxIterations = 3;
    AbstractVectorModelDistribution modelDistribution = new SampledNormalDistribution(new VectorWritable(new DenseVector(2)));
    Configuration conf = new Configuration();
    DirichletDriver.run(conf,
                        getTestTempDirPath("input"),
                        getTestTempDirPath("output"),
                        modelDistribution,
                        20,
                        maxIterations,
                        1.0,
                        false,
                        true,
                        0,
                        false);
    // and inspect results
    List<List<DirichletCluster>> clusters = new ArrayList<List<DirichletCluster>>();
    conf.set(DirichletDriver.MODEL_DISTRIBUTION_KEY, modelDistribution.asJsonString());
    conf.set(DirichletDriver.NUM_CLUSTERS_KEY, "20");
    conf.set(DirichletDriver.ALPHA_0_KEY, "1.0");
    for (int i = 0; i <= maxIterations; i++) {
      conf.set(DirichletDriver.STATE_IN_KEY, new Path(getTestTempDirPath("output"), "clusters-" + i).toString());
      clusters.add(DirichletMapper.getDirichletState(conf).getClusters());
View Full Code Here

    int numModels = Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION));
    int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
    boolean emitMostLikely = Boolean.parseBoolean(getOption(DefaultOptionCreator.EMIT_MOST_LIKELY_OPTION));
    double threshold = Double.parseDouble(getOption(DefaultOptionCreator.THRESHOLD_OPTION));
    double alpha0 = Double.parseDouble(getOption(DirichletDriver.ALPHA_OPTION));
    AbstractVectorModelDistribution modelDistribution = DirichletDriver.createModelDistribution(modelFactory,
                                                                                                modelPrototype,
                                                                                                distanceMeasure,
                                                                                                60);

    run(input, output, modelDistribution, numModels, maxIterations, alpha0, emitMostLikely, threshold);
View Full Code Here

TOP

Related Classes of org.apache.mahout.clustering.dirichlet.models.AbstractVectorModelDistribution

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.