Package org.apache.mahout.clustering

Examples of org.apache.mahout.clustering.Cluster


   */
  private static List<Cluster> loadClusters(Configuration conf, Path clustersIn) {
    List<Cluster> clusters = Lists.newArrayList();
    for (ClusterWritable clusterWritable :
         new SequenceFileDirValueIterable<ClusterWritable>(clustersIn, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
      Cluster cluster = clusterWritable.getValue();     
        clusters.add(cluster);
    }
    return clusters;
  }
View Full Code Here


  private void pruneInvalidClusters() {
    if (pruned) {
      return;
    }
    for (Iterator<Cluster> it = clusters.iterator(); it.hasNext();) {
      Cluster cluster = it.next();
      if (invalidCluster(cluster)) {
        log.info("Pruning cluster Id={}", cluster.getId());
        it.remove();
        representativePoints.remove(cluster.getId());
      }
    }
    pruned = true;
  }
View Full Code Here

      Path inPart = part.getPath();
      Path path = new Path(output, inPart.getName());
      SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class);
      try {
        for (ClusterWritable clusterWritable : new SequenceFileValueIterable<ClusterWritable>(inPart, true, conf)) {
          Cluster cluster = clusterWritable.getValue();
          if (log.isDebugEnabled()) {
            log.debug("C-{}: {}", cluster.getId(), AbstractCluster.formatVector(cluster.getCenter(), null));
          }
          writer.append(new IntWritable(cluster.getId()), new VectorWritable(cluster.getCenter()));
        }
      } finally {
        Closeables.closeQuietly(writer);
      }
    }
View Full Code Here

   */
  private static List<Cluster> loadClusters(Configuration conf, Path clustersIn) {
    List<Cluster> clusters = Lists.newArrayList();
    for (ClusterWritable clusterWritable :
         new SequenceFileDirValueIterable<ClusterWritable>(clustersIn, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
      Cluster cluster = clusterWritable.getValue();
    clusters.add(cluster);
    }
    return clusters;
  }
View Full Code Here

  private void pruneInvalidClusters() {
    if (pruned) {
      return;
    }
    for (Iterator<Cluster> it = clusters.iterator(); it.hasNext();) {
      Cluster cluster = it.next();
      if (invalidCluster(cluster)) {
        log.info("Pruning cluster Id={}", cluster.getId());
        it.remove();
        representativePoints.remove(cluster.getId());
      }
    }
    pruned = true;
  }
View Full Code Here

    double max = 0;
    double min = Double.MAX_VALUE;
    double sum = 0;
    int count = 0;
    for (int i = 0; i < clusters.size(); i++) {
      Cluster clusterI = clusters.get(i);
      for (int j = i + 1; j < clusters.size(); j++) {
        Cluster clusterJ = clusters.get(j);
        double d = measure.distance(clusterI.getCenter(), clusterJ.getCenter());
        min = Math.min(d, min);
        max = Math.max(d, max);
        sum += d;
        count++;
      }
View Full Code Here

    Collection<Integer> set = new HashSet<Integer>();
    Vector v[] = new Vector[3];
    for (ClusterWritable clusterWritable :
         new SequenceFileValueIterable<ClusterWritable>(
             new Path(output, "part-eigenSeed"), true, conf)) {
      Cluster cluster = clusterWritable.getValue();
      int id = cluster.getId();
      assertTrue(set.add(id)); // validate unique id's
      v[id] = cluster.getCenter();
      clusterCount++;
    }
    assertEquals(3, clusterCount); // validate sample count
    // validate pair-wise orthogonality
    assertEquals(0, v[0].dot(v[1]), 1E-10);
View Full Code Here

    int clusterCount = 0;
    Collection<Integer> set = Sets.newHashSet();
    for (ClusterWritable clusterWritable :
         new SequenceFileValueIterable<ClusterWritable>(new Path(output, "part-randomSeed"), true, conf)) {
      clusterCount++;
      Cluster cluster = clusterWritable.getValue();
      int id = cluster.getId();
      assertTrue(set.add(id)); // Validate unique id's
     
      Vector v = cluster.getCenter();
      assertVectorEquals(RAW[id], v); // Validate values match
    }

    assertEquals(4, clusterCount); // Validate sample count
  }
View Full Code Here

    int clusterCount = 0;
    Collection<Integer> set = Sets.newHashSet();
    for (ClusterWritable clusterWritable :
         new SequenceFileValueIterable<ClusterWritable>(new Path(output, "part-randomSeed"), true, conf)) {
      clusterCount++;
      Cluster cluster = clusterWritable.getValue();
      int id = cluster.getId();
      assertTrue(set.add(id)); // validate unique id's
     
      Vector v = cluster.getCenter();
      assertVectorEquals(RAW[id], v); // validate values match
    }

    assertEquals(4, clusterCount); // validate sample count
  }
View Full Code Here

   
    /** run through all clusters once and set sequence of IDs  */ 
    int clusterCount = 0;
    for (ClusterWritable clusterWritable :
         new SequenceFileValueIterable<ClusterWritable>(new Path(output, "part-randomSeed"), true, conf)) {     
      Cluster cluster = clusterWritable.getValue();
      clusterIDSeq[clusterCount] = cluster.getId();
      clusterCount++;
    }
   
    /* Rebuild cluster and run through again making sure all IDs are in the same random sequence
     * Needs a better test because in this case passes when seeded with 1 and 2  fails with 1, 3
     * passes when set to two */
    RandomSeedGenerator.buildRandom(conf, input, output, 4, new ManhattanDistanceMeasure(), randSeed);     clusterCount = 0;   
    for (ClusterWritable clusterWritable :
         new SequenceFileValueIterable<ClusterWritable>(new Path(output, "part-randomSeed"), true, conf)) {      
      Cluster cluster = clusterWritable.getValue();
      // Make sure cluster ids are in same random sequence
      assertEquals(clusterIDSeq[clusterCount], cluster.getId());
      clusterCount++;
    }
}
View Full Code Here

TOP

Related Classes of org.apache.mahout.clustering.Cluster

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.