Package org.apache.mahout.clustering

Examples of org.apache.mahout.clustering.Cluster


        Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
        Writable value = reader.getValueClass().asSubclass(Writable.class).newInstance();
        Path path = new Path(output, inPart.getName());
        SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class);
        while (reader.next(key, value)) {
          Cluster cluster = (Cluster) value;
          log.debug("C-" + cluster.getId() + ": " + AbstractCluster.formatVector(cluster.getCenter(), null));
          writer.append(new IntWritable(cluster.getId()), new VectorWritable(cluster.getCenter()));
        }
        writer.close();
      }
    }
  }
View Full Code Here


        Path inPart = part.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inPart, conf);
        Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
        Writable value = reader.getValueClass().asSubclass(Writable.class).newInstance();
        while (reader.next(key, value)) {
          Cluster cluster = (Cluster) value;
          clusters.add(cluster);
          value = reader.getValueClass().asSubclass(Writable.class).newInstance();
        }
        reader.close();
      }
View Full Code Here

  private void pruneInvalidClusters() {
    if (pruned) {
      return;
    }
    for (Iterator<Cluster> it = clusters.iterator(); it.hasNext();) {
      Cluster cluster = it.next();
      if (invalidCluster(cluster)) {
        log.info("Pruning cluster Id=" + cluster.getId());
        it.remove();
        representativePoints.remove(cluster.getId());
      }
    }
    pruned = true;
  }
View Full Code Here

    double max = 0;
    double min = Double.MAX_VALUE;
    double sum = 0;
    int count = 0;
    for (int i = 0; i < clusters.size(); i++) {
      Cluster clusterI = clusters.get(i);
      for (int j = i + 1; j < clusters.size(); j++) {
        Cluster clusterJ = clusters.get(j);
        double d = measure.distance(clusterI.getCenter(), clusterJ.getCenter());
        min = Math.min(d, min);
        max = Math.max(d, max);
        sum += d;
        count++;
      }
View Full Code Here

    try {
      cl = ccl.loadClass(klass);
    } catch (ClassNotFoundException e) {
      log.warn("Error while loading class", e);
    }
    Cluster model = (Cluster) gson.fromJson(modelJson, cl);
    return new DirichletCluster(model, total);
  }
View Full Code Here

  }

  /** Reads a typed Model instance from the input stream */
  public static Cluster readModel(DataInput in) throws IOException {
    String modelClassName = in.readUTF();
    Cluster model;
    try {
      model = Class.forName(modelClassName).asSubclass(Cluster.class).newInstance();
    } catch (ClassNotFoundException e) {
      throw new IllegalStateException(e);
    } catch (IllegalAccessException e) {
      throw new IllegalStateException(e);
    } catch (InstantiationException e) {
      throw new IllegalStateException(e);
    }
    model.readFields(in);
    return model;
  }
View Full Code Here

      SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
      try {
        Writable key = new Text();
        Writable value = reader.getValueClass().asSubclass(Writable.class).newInstance();
        while (reader.next(key, value)) {
          Cluster cluster = (Cluster) value;
          log.info("Reading Cluster:{} center:{} numPoints:{} radius:{}", new Object[] {
              cluster.getId(),
              AbstractCluster.formatVector(cluster.getCenter(), null),
              cluster.getNumPoints(),
              AbstractCluster.formatVector(cluster.getRadius(), null)
          });
          clusters.add(cluster);
          value = reader.getValueClass().asSubclass(Writable.class).newInstance();
        }
      } finally {
View Full Code Here

  }

  @Override
  protected void reduce(Text key, Iterable<VectorWritable> values, Context context) throws IOException, InterruptedException {
    int k = Integer.parseInt(key.toString());
    Cluster model = newModels[k];
    for (VectorWritable value : values) {
      // only observe real points, not the empty placeholders emitted by each mapper
      if (value.get().size() > 0) {
        model.observe(value);
      }
    }
    DirichletCluster cluster = clusterer.updateCluster(model, k);
    context.write(new Text(String.valueOf(k)), cluster);
  }
View Full Code Here

  }

  @Override
  public void write(ClusterWritable clusterWritable) throws IOException {
    StringBuilder line = new StringBuilder();
    Cluster cluster = clusterWritable.getValue();
    line.append(cluster.getId());
    List<WeightedVectorWritable> points = getClusterIdToPoints().get(cluster.getId());
    if (points != null) {
      for (WeightedVectorWritable point : points) {
        Vector theVec = point.getVector();
        line.append(',');
        if (theVec instanceof NamedVector) {
View Full Code Here

   */

  @Override
  public void write(ClusterWritable clusterWritable) throws IOException {
    StringBuilder line = new StringBuilder();
    Cluster cluster = clusterWritable.getValue();
    Color rgb = getColor(cluster.getId());

    String topTerms = "";
    if (dictionary != null) {
      topTerms = getTopTerms(cluster.getCenter(), dictionary, numTopFeatures);
    }
    String clusterLabel = String.valueOf(cluster.getId()) + '_' + topTerms;
    //do some positioning so that items are visible and grouped together
    //TODO: put in a real layout algorithm
    float x = lastX + 1000;
    float y = lastY;
    if (x > (1000 + posStep)) {
      y = lastY + 1000;
      x = 0;
    }

    line.append(createNode(clusterLabel, rgb, x, y));
    List<WeightedVectorWritable> points = clusterIdToPoints.get(cluster.getId());
    if (points != null) {
      for (WeightedVectorWritable point : points) {
        Vector theVec = point.getVector();
        double distance = 1;
        if (measure != null) {
          //scale the distance
          distance = measure.distance(cluster.getCenter().getLengthSquared(), cluster.getCenter(), theVec) * 500;
        }
        String vecStr;
        int angle = random.nextInt(360); //pick an angle at random and then scale along that angle
        double angleRads = Math.toRadians(angle);

View Full Code Here

TOP

Related Classes of org.apache.mahout.clustering.Cluster

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.