Package org.apache.mahout.clustering.canopy

Examples of org.apache.mahout.clustering.canopy.Canopy


   * @param measure
   *          the DistanceMeasure
   */
  private void initData(double dC, double dP, DistanceMeasure measure) {
    clusters = Lists.newArrayList();
    clusters.add(new Canopy(new DenseVector(new double[] {-dC, -dC}), 1,
        measure));
    clusters
        .add(new Canopy(new DenseVector(new double[] {-dC, dC}), 3, measure));
    clusters
        .add(new Canopy(new DenseVector(new double[] {dC, dC}), 5, measure));
    clusters
        .add(new Canopy(new DenseVector(new double[] {dC, -dC}), 7, measure));
    representativePoints = Maps.newHashMap();
    for (Cluster cluster : clusters) {
      List<VectorWritable> points = Lists.newArrayList();
      representativePoints.put(cluster.getId(), points);
      points.add(new VectorWritable(cluster.getCenter().clone()));
View Full Code Here


  public void testAllSameValueCluster() throws IOException {
    ClusteringTestUtils.writePointsToFile(referenceData, new Path(testdata,
        "file1"), fs, conf);
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    initData(1, 0.25, measure);
    Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19,
        measure);
    clusters.add(cluster);
    List<VectorWritable> points = Lists.newArrayList();
    points.add(new VectorWritable(cluster.getCenter()));
    points.add(new VectorWritable(cluster.getCenter()));
    points.add(new VectorWritable(cluster.getCenter()));
    representativePoints.put(cluster.getId(), points);
    ClusterEvaluator evaluator = new ClusterEvaluator(representativePoints,
        clusters, measure);
    assertEquals("inter cluster density", 0.33333333333333315,
        evaluator.interClusterDensity(), EPSILON);
    assertEquals("intra cluster density", 0.3656854249492381,
View Full Code Here

  public void testEmptyCluster() throws IOException {
    ClusteringTestUtils.writePointsToFile(referenceData,
        getTestTempFilePath("testdata/file1"), fs, conf);
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    initData(1, 0.25, measure);
    Canopy cluster = new Canopy(new DenseVector(new double[] {10, 10}), 19,
        measure);
    clusters.add(cluster);
    List<VectorWritable> points = Lists.newArrayList();
    representativePoints.put(cluster.getId(), points);
    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters,
        measure);
    assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(),
        EPSILON);
    assertEquals("separation", 20.485281374238568, evaluator.separation(),
View Full Code Here

  public void testSingleValueCluster() throws IOException {
    ClusteringTestUtils.writePointsToFile(referenceData,
        getTestTempFilePath("testdata/file1"), fs, conf);
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    initData(1, 0.25, measure);
    Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19,
        measure);
    clusters.add(cluster);
    List<VectorWritable> points = Lists.newArrayList();
    points.add(new VectorWritable(cluster.getCenter().plus(
        new DenseVector(new double[] {1, 1}))));
    representativePoints.put(cluster.getId(), points);
    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters,
        measure);
    assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(),
        EPSILON);
    assertEquals("separation", 20.485281374238568, evaluator.separation(),
View Full Code Here

  public void testAllSameValueCluster() throws IOException {
    ClusteringTestUtils.writePointsToFile(referenceData,
        getTestTempFilePath("testdata/file1"), fs, conf);
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    initData(1, 0.25, measure);
    Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19,
        measure);
    clusters.add(cluster);
    List<VectorWritable> points = Lists.newArrayList();
    points.add(new VectorWritable(cluster.getCenter()));
    points.add(new VectorWritable(cluster.getCenter()));
    points.add(new VectorWritable(cluster.getCenter()));
    representativePoints.put(cluster.getId(), points);
    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters,
        measure);
    assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(),
        EPSILON);
    assertEquals("separation", 20.485281374238568, evaluator.separation(),
View Full Code Here

  public void testAlmostSameValueCluster() throws IOException {
    ClusteringTestUtils.writePointsToFile(referenceData,
        getTestTempFilePath("testdata/file1"), fs, conf);
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    initData(1, 0.25, measure);
    Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19,
        measure);
    clusters.add(cluster);
    List<VectorWritable> points = Lists.newArrayList();
    Vector delta = new DenseVector(new double[] { 0, Double.MIN_NORMAL });
    points.add(new VectorWritable(delta.clone()));
    points.add(new VectorWritable(delta.clone()));
    points.add(new VectorWritable(delta.clone()));
    points.add(new VectorWritable(delta.clone()));
    points.add(new VectorWritable(delta.clone()));
    representativePoints.put(cluster.getId(), points);
    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters,
        measure);
    assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(),
        EPSILON);
    assertEquals("separation", 28.970562748477143, evaluator.separation(),
View Full Code Here

        } else {
          seedVectors.add(new NamedVector(vector, cluster.getIdentifier()));
        }
      } else if (valueClass.equals(Canopy.class)) {
        // get the cluster info
        Canopy canopy = (Canopy) value;
        Vector vector = canopy.getCenter();
        if (vector instanceof NamedVector) {
          seedVectors.add((NamedVector) vector);
        } else {
          seedVectors.add(new NamedVector(vector, canopy.getIdentifier()));
        }
      } else if (valueClass.equals(Vector.class)) {
        Vector vector = (Vector) value;
        if (vector instanceof NamedVector) {
          seedVectors.add((NamedVector) vector);
View Full Code Here

              // get the cluster info
              clusters.add(value);
              value = new Cluster();
            }
          } else if (valueClass.equals(Canopy.class)) {
            Canopy value = new Canopy();
            while (reader.next(key, value)) {
              // get the cluster info
              Cluster cluster = new Cluster(value.getCenter(), value.getId());
              clusters.add(cluster);
              value = new Canopy();
            }
          }
        } finally {
          IOUtils.quietClose(reader);
        }
View Full Code Here

  @Override
  public void map(LongWritable key, Text values,
      OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
    String foo = values.toString();
    int ix = foo.indexOf(']');
    Canopy canopy = Canopy.decodeCanopy(foo.substring(0, ix + 1));
    Vector point = AbstractVector.decodeVector(foo.substring(ix + 3));
    output.collect(new Text(canopy.getIdentifier()), new Text(point
        .asFormatString()));
  }
View Full Code Here

        } else {
          seedVectors.add(new NamedVector(vector, cluster.getIdentifier()));
        }
      } else if (valueClass.equals(Canopy.class)) {
        // get the cluster info
        Canopy canopy = (Canopy) value;
        Vector vector = canopy.getCenter();
        if (vector instanceof NamedVector) {
          seedVectors.add((NamedVector) vector);
        } else {
          seedVectors.add(new NamedVector(vector, canopy.getIdentifier()));
        }
      } else if (valueClass.equals(Vector.class)) {
        Vector vector = (Vector) value;
        if (vector instanceof NamedVector) {
          seedVectors.add((NamedVector) vector);
View Full Code Here

TOP

Related Classes of org.apache.mahout.clustering.canopy.Canopy

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.