Package org.apache.mahout.clustering

Examples of org.apache.mahout.clustering.Cluster


   */

  @Override
  public void write(ClusterWritable clusterWritable) throws IOException {
    StringBuilder line = new StringBuilder();
    Cluster cluster = clusterWritable.getValue();
    Color rgb = getColor(cluster.getId());

    String topTerms = "";
    if (dictionary != null) {
      topTerms = getTopTerms(cluster.getCenter(), dictionary, numTopFeatures);
    }
    String clusterLabel = String.valueOf(cluster.getId()) + '_' + topTerms;
    //do some positioning so that items are visible and grouped together
    //TODO: put in a real layout algorithm
    float x = lastX + 1000;
    float y = lastY;
    if (x > (1000 + posStep)) {
      y = lastY + 1000;
      x = 0;
    }

    line.append(createNode(clusterLabel, rgb, x, y));
    List<WeightedPropertyVectorWritable> points = clusterIdToPoints.get(cluster.getId());
    if (points != null) {
      for (WeightedVectorWritable point : points) {
        Vector theVec = point.getVector();
        double distance = 1;
        if (measure != null) {
          //scale the distance
          distance = measure.distance(cluster.getCenter().getLengthSquared(), cluster.getCenter(), theVec) * 500;
        }
        String vecStr;
        int angle = random.nextInt(360); //pick an angle at random and then scale along that angle
        double angleRads = Math.toRadians(angle);

View Full Code Here


    Path finalClustersPath = finalClustersPath(conf, clusterOutputPath);
    Iterator<?> it = new SequenceFileDirValueIterator<Writable>(finalClustersPath, PathType.LIST,
        PathFilters.partFilter(), null, false, conf);
    while (it.hasNext()) {
      ClusterWritable next = (ClusterWritable) it.next();
      Cluster cluster = next.getValue();
      cluster.configure(conf);
      clusterModels.add(cluster);
    }
    return clusterModels;
  }
View Full Code Here

  }

  private static void write(List<Cluster> clusterModels, SequenceFile.Writer writer,
      WeightedPropertyVectorWritable weightedPropertyVectorWritable,
      int maxValueIndex) throws IOException {
    Cluster cluster = clusterModels.get(maxValueIndex);

    DistanceMeasureCluster distanceMeasureCluster = (DistanceMeasureCluster) cluster;
    DistanceMeasure distanceMeasure = distanceMeasureCluster.getMeasure();
    double distance = distanceMeasure.distance(cluster.getCenter(), weightedPropertyVectorWritable.getVector());

    weightedPropertyVectorWritable.getProperties().put(new Text("distance"), new Text(Double.toString(distance)));
    writer.append(new IntWritable(cluster.getId()), weightedPropertyVectorWritable);
  }
View Full Code Here

    }
  }
 
  private void write(VectorWritable vw, Context context, int clusterIndex, double weight)
    throws IOException, InterruptedException {
    Cluster cluster = clusterModels.get(clusterIndex);
    clusterId.set(cluster.getId());

    DistanceMeasureCluster distanceMeasureCluster = (DistanceMeasureCluster) cluster;
    DistanceMeasure distanceMeasure = distanceMeasureCluster.getMeasure();
    double distance = distanceMeasure.distance(cluster.getCenter(), vw.get());

    Map<Text, Text> props = Maps.newHashMap();
    props.put(new Text("distance"), new Text(Double.toString(distance)));
    context.write(clusterId, new WeightedPropertyVectorWritable(weight, vw.get(), props));
  }
View Full Code Here

    Iterator<?> it = new SequenceFileDirValueIterator<Writable>(
        clusterFiles[0].getPath(), PathType.LIST, PathFilters.partFilter(),
        null, false, conf);
    while (it.hasNext()) {
      ClusterWritable next = (ClusterWritable) it.next();
      Cluster cluster = next.getValue();
      cluster.configure(conf);
      clusters.add(cluster);
    }
    return clusters;
  }
View Full Code Here

  protected static List<Cluster> readClustersWritable(Path clustersIn) {
    List<Cluster> clusters = Lists.newArrayList();
    Configuration conf = new Configuration();
    for (ClusterWritable value : new SequenceFileDirValueIterable<ClusterWritable>(clustersIn, PathType.LIST,
        PathFilters.logsCRCFilter(), conf)) {
      Cluster cluster = value.getValue();
      log.info(
          "Reading Cluster:{} center:{} numPoints:{} radius:{}",
          cluster.getId(), AbstractCluster.formatVector(cluster.getCenter(), null),
          cluster.getNumObservations(), AbstractCluster.formatVector(cluster.getRadius(), null));
      clusters.add(cluster);
    }
    return clusters;
  }
View Full Code Here

    this.subString = subString;
  }
 
  @Override
  public void write(ClusterWritable clusterWritable) throws IOException {
    Cluster cluster = clusterWritable.getValue();
    String fmtStr = cluster.asFormatString(dictionary);
    Writer writer = getWriter();
    if (subString > 0 && fmtStr.length() > subString) {
      writer.write(':');
      writer.write(fmtStr, 0, Math.min(subString, fmtStr.length()));
    } else {
View Full Code Here

    return buf.toString();
  }
 
  private void printClusters(List<Cluster> models, String[] docs) {
    for (int m = 0; m < models.size(); m++) {
      Cluster model = models.get(m);
      long total = model.getTotalObservations();
      if (total == 0) {
        continue;
      }
      System.out.println();
      System.out.println("Model[" + m + "] had " + total + " observations");
      System.out.println("pdf           document");
      MapElement[] map = new MapElement[sampleData.size()];
      // sort the samples by pdf
      double maxPdf = Double.MIN_NORMAL;
      for (int i = 0; i < sampleData.size(); i++) {
        VectorWritable sample = new VectorWritable(sampleData.get(i));
        double pdf = Math.abs(model.pdf(sample));
        maxPdf = Math.max(maxPdf, pdf);
        map[i] = new MapElement(pdf, docs[i]);
      }
      Arrays.sort(map);
      for (int i = 0; i < map.length; i++) {
View Full Code Here

    int clusterCount = 0;
    Collection<Integer> set = new HashSet<Integer>();
    for (ClusterWritable clusterWritable :
         new SequenceFileValueIterable<ClusterWritable>(new Path(output, "part-randomSeed"), true, conf)) {
      clusterCount++;
      Cluster cluster = clusterWritable.getValue();
      int id = cluster.getId();
      assertTrue(set.add(id)); // validate unique id's
     
      Vector v = cluster.getCenter();
      assertVectorEquals(RAW[id], v); // validate values match
    }

    assertEquals(4, clusterCount); // validate sample count
  }
View Full Code Here

   * @return The list of clusters found by the clustering.
   * @throws IOException
   */
  private static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException {
    List<Cluster> clusterModels = new ArrayList<Cluster>();
    Cluster cluster = null;
    Path finalClustersPath = finalClustersPath(conf, clusterOutputPath);
    Iterator<?> it = new SequenceFileDirValueIterator<Writable>(finalClustersPath, PathType.LIST,
        PathFilters.partFilter(), null, false, conf);
    while (it.hasNext()) {
      ClusterWritable next = (ClusterWritable) it.next();
      cluster = (Cluster) next.getValue();
      cluster.configure(conf);
      clusterModels.add(cluster);
    }
    return clusterModels;
  }
View Full Code Here

TOP

Related Classes of org.apache.mahout.clustering.Cluster

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.