Package org.apache.mahout.clustering

Examples of org.apache.mahout.clustering.Cluster


  private void pruneInvalidClusters() {
    if (pruned) {
      return;
    }
    for (Iterator<Cluster> it = clusters.iterator(); it.hasNext();) {
      Cluster cluster = it.next();
      if (invalidCluster(cluster)) {
        log.info("Pruning cluster Id=" + cluster.getId());
        it.remove();
        representativePoints.remove(cluster.getId());
      }
    }
    pruned = true;
  }
View Full Code Here


    double max = 0;
    double min = Double.MAX_VALUE;
    double sum = 0;
    int count = 0;
    for (int i = 0; i < clusters.size(); i++) {
      Cluster clusterI = clusters.get(i);
      for (int j = i + 1; j < clusters.size(); j++) {
        Cluster clusterJ = clusters.get(j);
        double d = measure.distance(clusterI.getCenter(), clusterJ.getCenter());
        min = Math.min(d, min);
        max = Math.max(d, max);
        sum += d;
        count++;
      }
View Full Code Here

  private void pruneInvalidClusters() {
    if (pruned) {
      return;
    }
    for (Iterator<Cluster> it = clusters.iterator(); it.hasNext();) {
      Cluster cluster = it.next();
      if (invalidCluster(cluster)) {
        log.info("Pruning cluster Id={}", cluster.getId());
        it.remove();
        representativePoints.remove(cluster.getId());
      }
    }
    pruned = true;
  }
View Full Code Here

    assertEquals("count", 3, count);
    outPart = new Path(output, "clusters-0/part-m-00000");
    Iterator<?> iterator = new SequenceFileValueIterator<Writable>(outPart, true, conf);
    // now test the initial clusters to ensure the type of their centers has been retained
    while (iterator.hasNext()) {
      Cluster canopy = (Cluster) iterator.next();
      assertTrue(canopy.getCenter() instanceof DenseVector);
    }
  }
View Full Code Here

  @Override
  protected void reduce(Text key, Iterable<VectorWritable> values, Context context)
    throws IOException, InterruptedException {
    int k = Integer.parseInt(key.toString());
    Cluster model = newModels[k];
    for (VectorWritable value : values) {
      // only observe real points, not the empty placeholders emitted by each mapper
      if (value.get().size() > 0) {
        model.observe(value);
      }
    }
    DirichletCluster cluster = clusterer.updateCluster(model, k);
    context.write(new Text(String.valueOf(k)), cluster);
  }
View Full Code Here

  }

  /** Reads a typed Model instance from the input stream */
  public static Cluster readModel(DataInput in) throws IOException {
    String modelClassName = in.readUTF();
    Cluster model;
    try {
      model = Class.forName(modelClassName).asSubclass(Cluster.class).newInstance();
    } catch (ClassNotFoundException e) {
      throw new IllegalStateException(e);
    } catch (IllegalAccessException e) {
      throw new IllegalStateException(e);
    } catch (InstantiationException e) {
      throw new IllegalStateException(e);
    }
    model.readFields(in);
    return model;
  }
View Full Code Here

    int clusterCount = 0;
    Collection<Integer> set = Sets.newHashSet();
    for (ClusterWritable clusterWritable :
         new SequenceFileValueIterable<ClusterWritable>(new Path(output, "part-randomSeed"), true, conf)) {
      clusterCount++;
      Cluster cluster = clusterWritable.getValue();
      int id = cluster.getId();
      assertTrue(set.add(id)); // validate unique id's
     
      Vector v = cluster.getCenter();
      assertVectorEquals(RAW[id], v); // validate values match
    }

    assertEquals(4, clusterCount); // validate sample count
  }
View Full Code Here

    Collection<Integer> set = new HashSet<Integer>();
    Vector v[] = new Vector[3];
    for (ClusterWritable clusterWritable :
         new SequenceFileValueIterable<ClusterWritable>(
             new Path(output, "part-eigenSeed"), true, conf)) {
      Cluster cluster = clusterWritable.getValue();
      int id = cluster.getId();
      assertTrue(set.add(id)); // validate unique id's
      v[id] = cluster.getCenter();
      clusterCount++;
    }
    assertEquals(3, clusterCount); // validate sample count
    // validate pair-wise orthogonality
    assertEquals(0, v[0].dot(v[1]), 1E-10);
View Full Code Here

    List<Object> topTerms = getTopFeaturesList(clusterWritable.getValue()
        .getCenter(), dictionary, numTopFeatures);
    res.put("top_terms", topTerms);

    // get human-readable cluster representation
    Cluster cluster = clusterWritable.getValue();
    String fmtStr = cluster.asFormatString(dictionary);
    res.put("cluster_id", cluster.getId());
    res.put("cluster", fmtStr);

    // get points
    List<Object> points = getPoints(cluster, dictionary);
    res.put("points", points);
View Full Code Here

  }

  @Override
  public void write(ClusterWritable clusterWritable) throws IOException {
    StringBuilder line = new StringBuilder();
    Cluster cluster = clusterWritable.getValue();
    line.append(cluster.getId());
    List<WeightedPropertyVectorWritable> points = getClusterIdToPoints().get(cluster.getId());
    if (points != null) {
      for (WeightedPropertyVectorWritable point : points) {
        Vector theVec = point.getVector();
        line.append(',');
        if (theVec instanceof NamedVector) {
View Full Code Here

TOP

Related Classes of org.apache.mahout.clustering.Cluster

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.