Examples of org.apache.mahout.clustering.iterator.ClusterWritable

Package org.apache.mahout.clustering.iterator

Examples of org.apache.mahout.clustering.iterator.ClusterWritable

org.apache.mahout.clustering.iterator.ClusterWritable

    DummyRecordWriter<Text, ClusterWritable> mapWriter = new DummyRecordWriter<Text, ClusterWritable>();
    Mapper<WritableComparable<?>, ClusterWritable, Text, ClusterWritable>.Context mapContext = DummyRecordWriter
        .build(mapper, conf, mapWriter);
    mapper.setup(mapContext);
    for (MeanShiftCanopy canopy : canopies) {
      ClusterWritable clusterWritable = new ClusterWritable();
      clusterWritable.setValue(canopy);
        mapper.map(new Text(), clusterWritable, mapContext);
    }
    mapper.cleanup(mapContext);


    // now verify the output

View Full Code Here

        .build(mapper, conf, mapWriter);
    mapper.setup(mapContext);


    // map the data
    for (MeanShiftCanopy canopy : canopies) {
      ClusterWritable clusterWritable = new ClusterWritable();
      clusterWritable.setValue(canopy);
        mapper.map(new Text(), clusterWritable, mapContext);
    }
    mapper.cleanup(mapContext);


    assertEquals("Number of map results", 1, mapWriter.getData().size());
    // now reduce the mapper output
    MeanShiftCanopyReducer reducer = new MeanShiftCanopyReducer();
    DummyRecordWriter<Text, ClusterWritable> reduceWriter = new DummyRecordWriter<Text, ClusterWritable>();
    Reducer<Text, ClusterWritable, Text, ClusterWritable>.Context reduceContext = DummyRecordWriter
        .build(reducer, conf, reduceWriter, Text.class, ClusterWritable.class);
    reducer.setup(reduceContext);
    reducer.reduce(new Text("0"), mapWriter.getValue(new Text("0")),
        reduceContext);
    reducer.cleanup(reduceContext);


    // now verify the output
    assertEquals("Number of canopies", reducerReference.size(), reduceWriter
        .getKeys().size());


    // add all points to the reference canopy maps
    Map<String, MeanShiftCanopy> reducerReferenceMap = Maps.newHashMap();
    for (MeanShiftCanopy canopy : reducerReference) {
      reducerReferenceMap.put(canopy.getIdentifier(), canopy);
    }
    // compare the maps
    for (Map.Entry<String, MeanShiftCanopy> mapEntry : reducerReferenceMap
        .entrySet()) {
      MeanShiftCanopy refCanopy = mapEntry.getValue();


      List<ClusterWritable> values = reduceWriter.getValue(new Text((refCanopy
          .isConverged() ? "MSV-" : "MSC-")
          + refCanopy.getId()));
      assertEquals("values", 1, values.size());
      ClusterWritable clusterWritable = values.get(0);
    MeanShiftCanopy reducerCanopy = (MeanShiftCanopy) clusterWritable.getValue();
      assertEquals("ids", refCanopy.getId(), reducerCanopy.getId());
      long refNumPoints = refCanopy.getNumObservations();
      long reducerNumPoints = reducerCanopy.getNumObservations();
      assertEquals("numPoints", refNumPoints, reducerNumPoints);
      String refCenter = refCanopy.getCenter().asFormatString();

View Full Code Here

    Iterator<?> iterator = new SequenceFileValueIterator<Writable>(outPart,
        true, conf);
    // now test the initial clusters to ensure the type of their centers has
    // been retained
    while (iterator.hasNext()) {
      ClusterWritable clusterWritable = (ClusterWritable)iterator.next();
    MeanShiftCanopy canopy = (MeanShiftCanopy) clusterWritable.getValue();
      assertTrue(canopy.getCenter() instanceof DenseVector);
      assertFalse(canopy.getBoundPoints().isEmpty());
    }
  }

View Full Code Here

    long count = HadoopUtil.countRecords(outPart, conf);
    assertEquals("count", 3, count);
    Iterator<?> iterator = new SequenceFileValueIterator<Writable>(outPart,
        true, conf);
    while (iterator.hasNext()) {
      ClusterWritable next = (ClusterWritable)iterator.next();
    MeanShiftCanopy canopy = (MeanShiftCanopy) next.getValue();
      assertTrue(canopy.getCenter() instanceof DenseVector);
      assertEquals(1, canopy.getBoundPoints().size());
    }
  }

View Full Code Here

    long count = HadoopUtil.countRecords(outPart, conf);
    assertEquals("count", 3, count);
    Iterator<?> iterator = new SequenceFileValueIterator<Writable>(outPart,
        true, conf);
    while (iterator.hasNext()) {
      ClusterWritable next = (ClusterWritable)iterator.next();
    MeanShiftCanopy canopy = (MeanShiftCanopy) next.getValue();
      assertEquals(1, canopy.getBoundPoints().size());
    }
  }

View Full Code Here

    int count = 0;
    for (Text k : collector1.getKeys()) {
      count++;
      List<ClusterWritable> vl = collector1.getValue(k);
      assertEquals("non-singleton centroid!", 1, vl.size());
      ClusterWritable clusterWritable = vl.get(0);
      Vector v = clusterWritable.getValue().getCenter();
      assertEquals("cetriod vector is wrong length", 2, v.size());
      if ( (Math.abs(v.get(0) - 1.5) < EPSILON) 
                  && (Math.abs(v.get(1) - 1.5) < EPSILON)
                  && !got15) {
        got15 = true;

View Full Code Here

    Iterable<Text> keys = writer.getKeysInInsertionOrder();
    assertEquals("Number of centroids", 3, Iterables.size(keys));
    int i = 0;
    for (Text key : keys) {
      List<ClusterWritable> data = writer.getValue(key);
      ClusterWritable clusterWritable = data.get(0);
      Canopy canopy = (Canopy) clusterWritable.getValue();
      assertEquals(manhattanCentroids.get(i).asFormatString() + " is not equal to "
          + canopy.computeCentroid().asFormatString(),
          manhattanCentroids.get(i), canopy.computeCentroid());
      i++;
    }

View Full Code Here

    Iterable<Text> keys = writer.getKeysInInsertionOrder();
    assertEquals("Number of centroids", 3, Iterables.size(keys));
    int i = 0;
    for (Text key : keys) {
      List<ClusterWritable> data = writer.getValue(key);
      ClusterWritable clusterWritable = data.get(0);
      Canopy canopy = (Canopy) clusterWritable.getValue();
      assertEquals(euclideanCentroids.get(i).asFormatString() + " is not equal to "
          + canopy.computeCentroid().asFormatString(),
          euclideanCentroids.get(i), canopy.computeCentroid());
      i++;
    }

View Full Code Here

    Path path = new Path(output, "clusters-0-final/part-r-00000");
    FileSystem fs = FileSystem.get(path.toUri(), config);
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, config);
    try {
      Writable key = new Text();
      ClusterWritable clusterWritable = new ClusterWritable();
      assertTrue("more to come", reader.next(key, clusterWritable));
      assertEquals("1st key", "C-0", key.toString());


      List<Pair<Double,Double>> refCenters = Lists.newArrayList();
      refCenters.add(new Pair<Double,Double>(1.5,1.5));
      refCenters.add(new Pair<Double,Double>(4.333333333333334,4.333333333333334));
      Pair<Double,Double> c = new Pair<Double,Double>(clusterWritable.getValue() .getCenter().get(0),
      clusterWritable.getValue().getCenter().get(1));
      assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
      assertTrue("more to come", reader.next(key, clusterWritable));
      assertEquals("2nd key", "C-1", key.toString());
      c = new Pair<Double,Double>(clusterWritable.getValue().getCenter().get(0),
          clusterWritable.getValue().getCenter().get(1));
      assertTrue("center " + c + " not found", findAndRemove(c, refCenters, EPSILON));
      assertFalse("more to come", reader.next(key, clusterWritable));
    } finally {
      Closeables.close(reader, true);
    }

View Full Code Here

    Path path = new Path(output, "clusters-0-final/part-r-00000");
    FileSystem fs = FileSystem.get(path.toUri(), config);
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, config);
    try {
      Writable key = new Text();
      ClusterWritable clusterWritable = new ClusterWritable();
      assertTrue("more to come", reader.next(key, clusterWritable));
      assertEquals("1st key", "C-0", key.toString());


      List<Pair<Double,Double>> refCenters = Lists.newArrayList();
      refCenters.add(new Pair<Double,Double>(1.8,1.8));
      refCenters.add(new Pair<Double,Double>(4.433333333333334, 4.433333333333334));
      Pair<Double,Double> c = new Pair<Double,Double>(clusterWritable.getValue().getCenter().get(0),
                                                      clusterWritable.getValue().getCenter().get(1));
      assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
      assertTrue("more to come", reader.next(key, clusterWritable));
      assertEquals("2nd key", "C-1", key.toString());
      c = new Pair<Double,Double>(clusterWritable.getValue().getCenter().get(0),
                                  clusterWritable.getValue().getCenter().get(1));
      assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
      assertFalse("more to come", reader.next(key, clusterWritable));
    } finally {
      Closeables.close(reader, true);
    }

View Full Code Here

0 1 2 3 4 5 6

TOP

Related Classes of org.apache.mahout.clustering.iterator.ClusterWritable

org.apache.mahout.clustering.canopy.CanopyDriver

org.apache.mahout.clustering.canopy.CanopyReducer

org.apache.mahout.clustering.canopy.TestCanopyCreation

org.apache.mahout.clustering.classify.ClusterClassificationDriver

org.apache.mahout.clustering.classify.ClusterClassificationMapper

org.apache.mahout.clustering.classify.ClusterClassifier

org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansUtil

org.apache.mahout.clustering.kmeans.EigenSeedGenerator

org.apache.mahout.clustering.kmeans.KMeansUtil

org.apache.mahout.clustering.kmeans.RandomSeedGenerator

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.