Package org.apache.mahout.clustering.canopy

Examples of org.apache.mahout.clustering.canopy.Canopy


  @Test
  public void testCanopyAsFormatStringSparseWithBindings() {
    double[] d = { 1.1, 0.0, 3.3 };
    Vector m = new SequentialAccessSparseVector(3);
    m.assign(d);
    Cluster cluster = new Canopy(m, 123, measure);
    String formatString = cluster.asFormatString(null);
    assertEquals("C-123{n=0 c=[0:1.100, 2:3.300] r=[]}", formatString);
  }
View Full Code Here


 
  @Test
  public void testCanopyClassification() {
    List<Cluster> models = Lists.newArrayList();
    DistanceMeasure measure = new ManhattanDistanceMeasure();
    models.add(new Canopy(new DenseVector(2).assign(1), 0, measure));
    models.add(new Canopy(new DenseVector(2), 1, measure));
    models.add(new Canopy(new DenseVector(2).assign(-1), 2, measure));
    ClusterClassifier classifier = new ClusterClassifier(models, new CanopyClusteringPolicy());
    Vector pdf = classifier.classify(new DenseVector(2));
    assertEquals("[0,0]", "[0.200, 0.600, 0.200]", AbstractCluster.formatVector(pdf, null));
    pdf = classifier.classify(new DenseVector(2).assign(2));
    assertEquals("[2,2]", "[0.493, 0.296, 0.211]", AbstractCluster.formatVector(pdf, null));
View Full Code Here

   * @param measure
   *          the DistanceMeasure
   */
  private void initData(double dC, double dP, DistanceMeasure measure) {
    clusters = Lists.newArrayList();
    clusters.add(new Canopy(new DenseVector(new double[] {-dC, -dC}), 1, measure));
    clusters.add(new Canopy(new DenseVector(new double[] {-dC, dC}), 3, measure));
    clusters.add(new Canopy(new DenseVector(new double[] {dC, dC}), 5, measure));
    clusters.add(new Canopy(new DenseVector(new double[] {dC, -dC}), 7, measure));
    representativePoints = Maps.newHashMap();
    for (Cluster cluster : clusters) {
      List<VectorWritable> points = Lists.newArrayList();
      representativePoints.put(cluster.getId(), points);
      points.add(new VectorWritable(cluster.getCenter().clone()));
View Full Code Here

  @Test
  public void testEmptyCluster() throws IOException {
    ClusteringTestUtils.writePointsToFile(referenceData, new Path(testdata, "file1"), fs, conf);
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    initData(1, 0.25, measure);
    Canopy cluster = new Canopy(new DenseVector(new double[] {10, 10}), 19, measure);
    clusters.add(cluster);
    List<VectorWritable> points = Lists.newArrayList();
    representativePoints.put(cluster.getId(), points);
    ClusterEvaluator evaluator = new ClusterEvaluator(representativePoints, clusters, measure);
    assertEquals("inter cluster density", 0.371534146934532, evaluator.interClusterDensity(), EPSILON);
    assertEquals("intra cluster density", 0.3656854249492381, evaluator.intraClusterDensity(), EPSILON);
  }
View Full Code Here

  @Test
  public void testSingleValueCluster() throws IOException {
    ClusteringTestUtils.writePointsToFile(referenceData, new Path(testdata, "file1"), fs, conf);
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    initData(1, 0.25, measure);
    Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19, measure);
    clusters.add(cluster);
    List<VectorWritable> points = Lists.newArrayList();
    points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] {1, 1}))));
    representativePoints.put(cluster.getId(), points);
    ClusterEvaluator evaluator = new ClusterEvaluator(representativePoints, clusters, measure);
    assertEquals("inter cluster density", 0.3656854249492381, evaluator.interClusterDensity(), EPSILON);
    assertEquals("intra cluster density", 0.3656854249492381, evaluator.intraClusterDensity(), EPSILON);
  }
View Full Code Here

  @Test
  public void testAllSameValueCluster() throws IOException {
    ClusteringTestUtils.writePointsToFile(referenceData, new Path(testdata, "file1"), fs, conf);
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    initData(1, 0.25, measure);
    Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19, measure);
    clusters.add(cluster);
    List<VectorWritable> points = Lists.newArrayList();
    points.add(new VectorWritable(cluster.getCenter()));
    points.add(new VectorWritable(cluster.getCenter()));
    points.add(new VectorWritable(cluster.getCenter()));
    representativePoints.put(cluster.getId(), points);
    ClusterEvaluator evaluator = new ClusterEvaluator(representativePoints, clusters, measure);
    assertEquals("inter cluster density", 0.3656854249492381, evaluator.interClusterDensity(), EPSILON);
    assertEquals("intra cluster density", 0.3656854249492381, evaluator.intraClusterDensity(), EPSILON);
  }
View Full Code Here

     */
    while (!points.isEmpty()) {
      Iterator<Vector> ptIter = points.iterator();
      Vector p1 = ptIter.next();
      ptIter.remove();
      Canopy canopy = new Canopy(p1);
      canopies.add(canopy);
      while (ptIter.hasNext()) {
        Vector p2 = ptIter.next();
        double dist = measure.distance(p1, p2);
        // Put all points that are within distance threshold T1 into the canopy
        if (dist < t1)
          canopy.addPoint(p2);
        // Remove from the list all points that are within distance threshold T2
        if (dist < t2)
          ptIter.remove();
      }
    }
View Full Code Here

     */
    while (!points.isEmpty()) {
      Iterator<Vector> ptIter = points.iterator();
      Vector p1 = ptIter.next();
      ptIter.remove();
      Canopy canopy = new Canopy(p1);
      canopies.add(canopy);
      while (ptIter.hasNext()) {
        Vector p2 = ptIter.next();
        double dist = measure.distance(p1, p2);
        // Put all points that are within distance threshold T1 into the canopy
        if (dist < t1)
          canopy.addPoint(p2);
        // Remove from the list all points that are within distance threshold T2
        if (dist < t2)
          ptIter.remove();
      }
    }
View Full Code Here

              // get the cluster info
              clusters.add(value);
              value = new Cluster();
            }
          } else if (valueClass.equals(Canopy.class)) {
            Canopy value = new Canopy();
            while (reader.next(key, value)) {
              // get the cluster info
              Cluster cluster = new Cluster(value.getCenter(), value.getId());
              clusters.add(cluster);
              value = new Canopy();
            }
          }
        } finally {
          IOUtils.quietClose(reader);
        }
View Full Code Here

   * @param dP a double representative point offset
   * @param measure the DistanceMeasure
   */
  private void initData(double dC, double dP, DistanceMeasure measure) {
    clusters = new ArrayList<Cluster>();
    clusters.add(new Canopy(new DenseVector(new double[] { -dC, -dC }), 1, measure));
    clusters.add(new Canopy(new DenseVector(new double[] { -dC, dC }), 3, measure));
    clusters.add(new Canopy(new DenseVector(new double[] { dC, dC }), 5, measure));
    clusters.add(new Canopy(new DenseVector(new double[] { dC, -dC }), 7, measure));
    representativePoints = new HashMap<Integer, List<VectorWritable>>();
    for (Cluster cluster : clusters) {
      List<VectorWritable> points = new ArrayList<VectorWritable>();
      representativePoints.put(cluster.getId(), points);
      points.add(new VectorWritable(cluster.getCenter().clone()));
View Full Code Here

TOP

Related Classes of org.apache.mahout.clustering.canopy.Canopy

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.