Package org.apache.mahout.clustering

Examples of org.apache.mahout.clustering.Cluster


    int clusterCount = 0;
    Collection<Integer> set = Sets.newHashSet();
    for (ClusterWritable clusterWritable :
         new SequenceFileValueIterable<ClusterWritable>(new Path(output, "part-randomSeed"), true, conf)) {
      clusterCount++;
      Cluster cluster = clusterWritable.getValue();
      int id = cluster.getId();
      assertTrue(set.add(id)); // validate unique id's
     
      Vector v = cluster.getCenter();
      assertVectorEquals(RAW[id], v); // validate values match
    }

    assertEquals(4, clusterCount); // validate sample count
  }
View Full Code Here


   */
  private static List<Cluster> loadClusters(Configuration conf, Path clustersIn) {
    List<Cluster> clusters = Lists.newArrayList();
    for (ClusterWritable clusterWritable : new SequenceFileDirValueIterable<ClusterWritable>(clustersIn, PathType.LIST,
        PathFilters.logsCRCFilter(), conf)) {
      Cluster cluster = clusterWritable.getValue();
      clusters.add(cluster);
    }
    return clusters;
  }
View Full Code Here

   * @return a Map<Integer, Vector>
   */
  public Map<Integer,Vector> interClusterDistances() {
    Map<Integer,Vector> distances = new TreeMap<Integer,Vector>();
    for (int i = 0; i < clusters.size(); i++) {
      Cluster clusterI = clusters.get(i);
      RandomAccessSparseVector row = new RandomAccessSparseVector(Integer.MAX_VALUE);
      distances.put(clusterI.getId(), row);
      for (int j = i + 1; j < clusters.size(); j++) {
        Cluster clusterJ = clusters.get(j);
        double d = measure.distance(clusterI.getCenter(), clusterJ.getCenter());
        row.set(clusterJ.getId(), d);
      }
    }
    return distances;
  }
View Full Code Here

    return buf.toString();
  }
 
  private void printClusters(List<Cluster> models, String[] docs) {
    for (int m = 0; m < models.size(); m++) {
      Cluster model = models.get(m);
      long total = model.getTotalObservations();
      if (total == 0) {
        continue;
      }
      System.out.println();
      System.out.println("Model[" + m + "] had " + total + " observations");
      System.out.println("pdf           document");
      MapElement[] map = new MapElement[sampleData.size()];
      // sort the samples by pdf
      double maxPdf = Double.MIN_NORMAL;
      for (int i = 0; i < sampleData.size(); i++) {
        VectorWritable sample = new VectorWritable(sampleData.get(i));
        double pdf = Math.abs(model.pdf(sample));
        maxPdf = Math.max(maxPdf, pdf);
        map[i] = new MapElement(pdf, docs[i]);
      }
      Arrays.sort(map);
      for (MapElement aMap : map) {
View Full Code Here

    Collection<Integer> set = new HashSet<Integer>();
    Vector v[] = new Vector[3];
    for (ClusterWritable clusterWritable :
         new SequenceFileValueIterable<ClusterWritable>(
             new Path(output, "part-eigenSeed"), true, conf)) {
      Cluster cluster = clusterWritable.getValue();
      int id = cluster.getId();
      assertTrue(set.add(id)); // validate unique id's
      v[id] = cluster.getCenter();
      clusterCount++;
    }
    assertEquals(3, clusterCount); // validate sample count
    // validate pair-wise orthogonality
    assertEquals(0, v[0].dot(v[1]), 1E-10);
View Full Code Here

   */
  private static List<Cluster> loadClusters(Configuration conf, Path clustersIn) {
    List<Cluster> clusters = Lists.newArrayList();
    for (ClusterWritable clusterWritable : new SequenceFileDirValueIterable<ClusterWritable>(clustersIn, PathType.LIST,
        PathFilters.logsCRCFilter(), conf)) {
      Cluster cluster = clusterWritable.getValue();
      clusters.add(cluster);
    }
    return clusters;
  }
View Full Code Here

    int row = 0;
    StringBuilder models = new StringBuilder(100);
    for (List<Cluster> r : result) {
      models.append("sample[").append(row++).append("]= ");
      for (int k = 0; k < r.size(); k++) {
        Cluster model = r.get(k);
        models.append('m').append(k).append(model.asFormatString(null)).append(", ");
      }
      models.append('\n');
    }
    models.append('\n');
    System.out.println(models.toString());
View Full Code Here

        Path inPart = part.getPath();
        Path path = new Path(output, inPart.getName());
        SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class);
        try {
          for (ClusterWritable clusterWritable : new SequenceFileValueIterable<ClusterWritable>(inPart, true, conf)) {
            Cluster cluster = clusterWritable.getValue();
            if (log.isDebugEnabled()) {
              log.debug("C-{}: {}", cluster.getId(), AbstractCluster.formatVector(cluster.getCenter(), null));
            }
            writer.append(new IntWritable(cluster.getId()), new VectorWritable(cluster.getCenter()));
          }
        } finally {
          Closeables.close(writer, false);
        }
      }
View Full Code Here

        element.put("valueWeight", vecW.getWeight());
        BasicDBList dbl2 = listFromMahoutVector(vecW.getVector(), "value", element);
        element.put("value", dbl2);         
      }
      else if (value instanceof org.apache.mahout.clustering.iterator.ClusterWritable) {
        Cluster cluster = ((org.apache.mahout.clustering.iterator.ClusterWritable)value).getValue();
        BasicDBObject clusterVal = new BasicDBObject();
        clusterVal.put("center", listFromMahoutVector(cluster.getCenter(), "center", clusterVal));
        clusterVal.put("radius", listFromMahoutVector(cluster.getRadius(), "radius", clusterVal));
        element.put("value", clusterVal);         
      }
      else {
        element.put("unknownValue", value.getClass().toString());
      }
View Full Code Here

    int row = 0;
    StringBuilder models = new StringBuilder(100);
    for (Cluster[] r : result) {
      models.append("sample[").append(row++).append("]= ");
      for (int k = 0; k < r.length; k++) {
        Cluster model = r[k];
        if (model.count() > significant) {
          models.append('m').append(k).append(model.asFormatString(null)).append(", ");
        }
      }
      models.append('\n');
    }
    models.append('\n');
View Full Code Here

TOP

Related Classes of org.apache.mahout.clustering.Cluster

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.