Package org.apache.mahout.clustering.classify

Examples of org.apache.mahout.clustering.classify.ClusterClassifier


    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(pointsPath.toUri(), conf);
    List<VectorWritable> points = TestKmeansClustering.getPointsWritable(TestKmeansClustering.REFERENCE);
    ClusteringTestUtils.writePointsToFile(points, new Path(pointsPath, "file1"), fs, conf);
    Path path = new Path(priorPath, "priorClassifier");
    ClusterClassifier prior = newKlusterClassifier();
    prior.writeToSeqFiles(path);
    ClusteringPolicy policy = new KMeansClusteringPolicy();
    ClusterClassifier.writePolicy(policy, path);
    assertEquals(3, prior.getModels().size());
    System.out.println("Prior");
    for (Cluster cluster : prior.getModels()) {
      System.out.println(cluster.asFormatString(null));
    }
    new ClusterIterator().iterateMR(conf, pointsPath, path, outPath, 5);
   
    for (int i = 1; i <= 4; i++) {
      System.out.println("Classifier-" + i);
      ClusterClassifier posterior = new ClusterClassifier();
      String name = i == 4 ? "clusters-4-final" : "clusters-" + i;
      posterior.readFromSeqFiles(conf, new Path(outPath, name));
      assertEquals(3, posterior.getModels().size());
      for (Cluster cluster : posterior.getModels()) {
        System.out.println(cluster.asFormatString(null));
      }    
    }
  }
View Full Code Here


    }
  }
 
  @Test
  public void testCosineKlusterClassification() {
    ClusterClassifier classifier = newCosineKlusterClassifier();
    Vector pdf = classifier.classify(new DenseVector(2));
    assertEquals("[0,0]", "[0.333, 0.333, 0.333]", AbstractCluster.formatVector(pdf, null));
    pdf = classifier.classify(new DenseVector(2).assign(2));
    assertEquals("[2,2]", "[0.545, 0.273, 0.182]", AbstractCluster.formatVector(pdf, null));
  }
View Full Code Here

      runSequentialDirichletClusterer(input, output, modelDist, numClusters, numIterations, alpha0);
    } else {
      runSequentialDirichletClassifier(input, output, modelDist, numClusters, numIterations, alpha0);
    }
    for (int i = 1; i <= numIterations; i++) {
      ClusterClassifier posterior = new ClusterClassifier();
      String name = i == numIterations ? "clusters-" + i + "-final" : "clusters-" + i;
      posterior.readFromSeqFiles(new Configuration(), new Path(output, name));
      List<Cluster> clusters = Lists.newArrayList();
      for (Cluster cluster : posterior.getModels()) {
        if (isSignificant(cluster)) {
          clusters.add(cluster);
        }
      }
      CLUSTERS.add(clusters);
View Full Code Here

      throws IOException {
    List<Cluster> models = Lists.newArrayList();
    for (Model<VectorWritable> cluster : modelDist.sampleFromPrior(numClusters)) {
      models.add((Cluster) cluster);
    }
    ClusterClassifier prior = new ClusterClassifier(models, new DirichletClusteringPolicy(numClusters, alpha0));
    Path priorPath = new Path(output, Cluster.INITIAL_CLUSTERS_DIR);
    prior.writeToSeqFiles(priorPath);
    Configuration conf = new Configuration();
    new ClusterIterator().iterateSeq(conf, input, priorPath, output, numIterations);
  }
View Full Code Here

  }
 
  private List<List<Cluster>> getClusters(Path output, int numIterations) throws IOException {
    List<List<Cluster>> result = new ArrayList<List<Cluster>>();
    for (int i = 1; i <= numIterations; i++) {
      ClusterClassifier posterior = new ClusterClassifier();
      String name = i == numIterations ? "clusters-" + i + "-final" : "clusters-" + i;
      posterior.readFromSeqFiles(conf, new Path(output, name));
      List<Cluster> clusters = Lists.newArrayList();
      for (Cluster cluster : posterior.getModels()) {
        clusters.add(cluster);
      }
      result.add(clusters);
    }
    return result;
View Full Code Here

    List<Cluster> initialClusters = Lists.newArrayList();
    int id = 0;
    for (Vector point : points) {
      initialClusters.add(new org.apache.mahout.clustering.kmeans.Kluster(point, id++, measure));
    }
    ClusterClassifier prior = new ClusterClassifier(initialClusters, new KMeansClusteringPolicy(convergenceDelta));
    Path priorPath = new Path(output, Cluster.INITIAL_CLUSTERS_DIR);
    prior.writeToSeqFiles(priorPath);
   
    int maxIter = 10;
    new ClusterIterator().iterateSeq(conf, samples, priorPath, output, maxIter);
    loadClustersWritable(output);
  }
View Full Code Here

    List<Cluster> models = Lists.newArrayList();
    DistanceMeasure measure = new ManhattanDistanceMeasure();
    models.add(new DistanceMeasureCluster(new DenseVector(2).assign(1), 0, measure));
    models.add(new DistanceMeasureCluster(new DenseVector(2), 1, measure));
    models.add(new DistanceMeasureCluster(new DenseVector(2).assign(-1), 2, measure));
    return new ClusterClassifier(models, new KMeansClusteringPolicy());
  }
View Full Code Here

    List<Cluster> models = Lists.newArrayList();
    DistanceMeasure measure = new ManhattanDistanceMeasure();
    models.add(new org.apache.mahout.clustering.kmeans.Kluster(new DenseVector(2).assign(1), 0, measure));
    models.add(new org.apache.mahout.clustering.kmeans.Kluster(new DenseVector(2), 1, measure));
    models.add(new org.apache.mahout.clustering.kmeans.Kluster(new DenseVector(2).assign(-1), 2, measure));
    return new ClusterClassifier(models, new KMeansClusteringPolicy());
  }
View Full Code Here

    List<Cluster> models = Lists.newArrayList();
    DistanceMeasure measure = new CosineDistanceMeasure();
    models.add(new org.apache.mahout.clustering.kmeans.Kluster(new DenseVector(2).assign(1), 0, measure));
    models.add(new org.apache.mahout.clustering.kmeans.Kluster(new DenseVector(2), 1, measure));
    models.add(new org.apache.mahout.clustering.kmeans.Kluster(new DenseVector(2).assign(-1), 2, measure));
    return new ClusterClassifier(models, new KMeansClusteringPolicy());
  }
View Full Code Here

    List<Cluster> models = Lists.newArrayList();
    DistanceMeasure measure = new ManhattanDistanceMeasure();
    models.add(new SoftCluster(new DenseVector(2).assign(1), 0, measure));
    models.add(new SoftCluster(new DenseVector(2), 1, measure));
    models.add(new SoftCluster(new DenseVector(2).assign(-1), 2, measure));
    return new ClusterClassifier(models, new FuzzyKMeansClusteringPolicy());
  }
View Full Code Here

TOP

Related Classes of org.apache.mahout.clustering.classify.ClusterClassifier

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.