Package org.apache.mahout.clustering.iterator

Examples of org.apache.mahout.clustering.iterator.ClusterWritable


      boolean converged = clusterer.shiftToMean(canopy);
      if (converged) {
        context.getCounter("Clustering", "Converged Clusters").increment(1);
      }
      allConverged = converged && allConverged;
      ClusterWritable clusterWritable = new ClusterWritable();
      clusterWritable.setValue(canopy);
      context.write(new Text(canopy.getIdentifier()), clusterWritable);
    }
   
  }
View Full Code Here


      try {
        for (VectorWritable value : new SequenceFileValueIterable<VectorWritable>(
            s.getPath(), conf)) {
          MeanShiftCanopy initialCanopy = MeanShiftCanopy.initialCanopy(value.get(),
              id++, measure);
          ClusterWritable clusterWritable = new ClusterWritable();
          clusterWritable.setValue(initialCanopy);
      writer.append(new Text(), clusterWritable);
        }
      } finally {
        Closeables.closeQuietly(writer);
      }
View Full Code Here

                    AbstractCluster.formatVector(cluster.getCenter(), null),
                    cluster.getNumObservations(),
                    AbstractCluster.formatVector(cluster.getRadius(), null),
                    clustersOut.getName() });
          }
          ClusterWritable clusterWritable = new ClusterWritable();
          clusterWritable.setValue(cluster);
          writer.append(new Text(cluster.getIdentifier()), clusterWritable);
        }
      } finally {
        Closeables.closeQuietly(writer);
      }
View Full Code Here

          output, "part-m-" + part++), IntWritable.class,
          WeightedVectorWritable.class);
      try {
        for (Pair<Writable, ClusterWritable> record : new SequenceFileIterable<Writable, ClusterWritable>(
            s.getPath(), conf)) {
          ClusterWritable clusterWritable = record.getSecond();
      MeanShiftCanopy canopy = (MeanShiftCanopy) clusterWritable.getValue();
          MeanShiftCanopy closest = MeanShiftCanopyClusterer
              .findCoveringCanopy(canopy, clusters);
          writer.append(new IntWritable(closest.getId()),
              new WeightedVectorWritable(1, canopy.getCenter()));
        }
View Full Code Here

  @Override
  protected void cleanup(Context context) throws IOException, InterruptedException {
  int reducer = 0;
    for (MeanShiftCanopy canopy : canopies) {
      clusterer.shiftToMean(canopy);
      ClusterWritable clusterWritable = new ClusterWritable();
      clusterWritable.setValue(canopy);
      context.write(new Text(String.valueOf(reducer)), clusterWritable);
      reducer++;
      if (reducer >= numReducers) {
        reducer=0;
      }
View Full Code Here

          newCluster.observe(value.get(), 1);
          Text newText = new Text(key.toString());
          int currentSize = chosenTexts.size();
          if (currentSize < k) {
            chosenTexts.add(newText);
            ClusterWritable clusterWritable = new ClusterWritable();
            clusterWritable.setValue(newCluster);
            chosenClusters.add(clusterWritable);
          } else if (random.nextInt(currentSize + 1) != 0) { // with chance 1/(currentSize+1) pick new element
            int indexToRemove = random.nextInt(currentSize); // evict one chosen randomly
            chosenTexts.remove(indexToRemove);
            chosenClusters.remove(indexToRemove);
            chosenTexts.add(newText);
            ClusterWritable clusterWritable = new ClusterWritable();
            clusterWritable.setValue(newCluster);
            chosenClusters.add(clusterWritable);
          }
        }
      }
View Full Code Here

    Set<Text> keys = writer.getKeys();
    assertEquals("Number of centroids", 3, keys.size());
    int i = 0;
    for (Text key : keys) {
      List<ClusterWritable> data = writer.getValue(key);
      ClusterWritable clusterWritable = data.get(0);
    Canopy canopy = (Canopy)clusterWritable.getValue();
    assertEquals(manhattanCentroids.get(i).asFormatString()
          + " is not equal to "
          + canopy.computeCentroid().asFormatString(), manhattanCentroids
          .get(i), canopy.computeCentroid());
      i++;
View Full Code Here

    Set<Text> keys = writer.getKeys();
    assertEquals("Number of centroids", 3, keys.size());
    int i = 0;
    for (Text key : keys) {
      List<ClusterWritable> data = writer.getValue(key);
      ClusterWritable clusterWritable = data.get(0);
      Canopy canopy = (Canopy)clusterWritable.getValue();
      assertEquals(euclideanCentroids.get(i).asFormatString()
          + " is not equal to "
          + canopy.computeCentroid().asFormatString(), euclideanCentroids
          .get(i), canopy.computeCentroid());
      i++;
View Full Code Here

    Path path = new Path(output, "clusters-0-final/part-r-00000");
    FileSystem fs = FileSystem.get(path.toUri(), config);
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, config);
    try {
      Writable key = new Text();
      ClusterWritable clusterWritable = new ClusterWritable();
    assertTrue("more to come", reader.next(key, clusterWritable));
      assertEquals("1st key", "C-0", key.toString());

      List<Pair<Double,Double>> refCenters = Lists.newArrayList();
      refCenters.add(new Pair<Double,Double>(1.5,1.5));
      refCenters.add(new Pair<Double,Double>(4.333333333333334,4.333333333333334));
    Pair<Double,Double> c = new Pair<Double,Double>(clusterWritable.getValue() .getCenter().get(0),
      clusterWritable.getValue().getCenter().get(1));
      assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
      assertTrue("more to come", reader.next(key, clusterWritable));
      assertEquals("2nd key", "C-1", key.toString());
      c = new Pair<Double,Double>(clusterWritable.getValue().getCenter().get(0),
          clusterWritable.getValue().getCenter().get(1));
      assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
      assertFalse("more to come", reader.next(key, clusterWritable));
    } finally {
      Closeables.closeQuietly(reader);
    }
View Full Code Here

    Path path = new Path(output, "clusters-0-final/part-r-00000");
    FileSystem fs = FileSystem.get(path.toUri(), config);
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, config);
    try {
      Writable key = new Text();
      ClusterWritable clusterWritable = new ClusterWritable();
      assertTrue("more to come", reader.next(key, clusterWritable));
      assertEquals("1st key", "C-0", key.toString());

      List<Pair<Double,Double>> refCenters = Lists.newArrayList();
      refCenters.add(new Pair<Double,Double>(1.8,1.8));
      refCenters.add(new Pair<Double,Double>(4.433333333333334, 4.433333333333334));
      Pair<Double,Double> c = new Pair<Double,Double>(clusterWritable.getValue().getCenter().get(0),
                                                      clusterWritable.getValue().getCenter().get(1));
      assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
      assertTrue("more to come", reader.next(key, clusterWritable));
      assertEquals("2nd key", "C-1", key.toString());
      c = new Pair<Double,Double>(clusterWritable.getValue().getCenter().get(0),
                                  clusterWritable.getValue().getCenter().get(1));
      assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
      assertFalse("more to come", reader.next(key, clusterWritable));
    } finally {
      Closeables.closeQuietly(reader);
    }
View Full Code Here

TOP

Related Classes of org.apache.mahout.clustering.iterator.ClusterWritable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.