Package org.apache.mahout.clustering.iterator

Examples of org.apache.mahout.clustering.iterator.ClusterWritable


    DummyRecordWriter<Text, ClusterWritable> mapWriter = new DummyRecordWriter<Text, ClusterWritable>();
    Mapper<WritableComparable<?>, ClusterWritable, Text, ClusterWritable>.Context mapContext =
        DummyRecordWriter.build(mapper, conf, mapWriter);
    mapper.setup(mapContext);
    for (MeanShiftCanopy canopy : canopies) {
      ClusterWritable clusterWritable = new ClusterWritable();
      clusterWritable.setValue(canopy);
        mapper.map(new Text(), clusterWritable, mapContext);
    }
    mapper.cleanup(mapContext);

    // now verify the output
View Full Code Here


        .build(mapper, conf, mapWriter);
    mapper.setup(mapContext);

    // map the data
    for (MeanShiftCanopy canopy : canopies) {
      ClusterWritable clusterWritable = new ClusterWritable();
      clusterWritable.setValue(canopy);
        mapper.map(new Text(), clusterWritable, mapContext);
    }
    mapper.cleanup(mapContext);

    assertEquals("Number of map results", 1, mapWriter.getData().size());
    // now reduce the mapper output
    MeanShiftCanopyReducer reducer = new MeanShiftCanopyReducer();
    DummyRecordWriter<Text, ClusterWritable> reduceWriter = new DummyRecordWriter<Text, ClusterWritable>();
    Reducer<Text, ClusterWritable, Text, ClusterWritable>.Context reduceContext = DummyRecordWriter
        .build(reducer, conf, reduceWriter, Text.class, ClusterWritable.class);
    reducer.setup(reduceContext);
    reducer.reduce(new Text("0"), mapWriter.getValue(new Text("0")),
        reduceContext);
    reducer.cleanup(reduceContext);

    // now verify the output
    assertEquals("Number of canopies", reducerReference.size(), reduceWriter
        .getKeys().size());

    // add all points to the reference canopy maps
    Map<String, MeanShiftCanopy> reducerReferenceMap = Maps.newHashMap();
    for (MeanShiftCanopy canopy : reducerReference) {
      reducerReferenceMap.put(canopy.getIdentifier(), canopy);
    }
    // compare the maps
    for (Map.Entry<String, MeanShiftCanopy> mapEntry : reducerReferenceMap
        .entrySet()) {
      MeanShiftCanopy refCanopy = mapEntry.getValue();

      List<ClusterWritable> values = reduceWriter.getValue(new Text((refCanopy
          .isConverged() ? "MSV-" : "MSC-")
          + refCanopy.getId()));
      assertEquals("values", 1, values.size());
      ClusterWritable clusterWritable = values.get(0);
    MeanShiftCanopy reducerCanopy = (MeanShiftCanopy) clusterWritable.getValue();
      assertEquals("ids", refCanopy.getId(), reducerCanopy.getId());
      long refNumPoints = refCanopy.getNumObservations();
      long reducerNumPoints = reducerCanopy.getNumObservations();
      assertEquals("numPoints", refNumPoints, reducerNumPoints);
      String refCenter = refCanopy.getCenter().asFormatString();
View Full Code Here

    Iterator<?> iterator = new SequenceFileValueIterator<Writable>(outPart,
        true, conf);
    // now test the initial clusters to ensure the type of their centers has
    // been retained
    while (iterator.hasNext()) {
      ClusterWritable clusterWritable = (ClusterWritable)iterator.next();
    MeanShiftCanopy canopy = (MeanShiftCanopy) clusterWritable.getValue();
      assertTrue(canopy.getCenter() instanceof DenseVector);
      assertFalse(canopy.getBoundPoints().isEmpty());
    }
  }
View Full Code Here

    long count = HadoopUtil.countRecords(outPart, conf);
    assertEquals("count", 3, count);
    Iterator<?> iterator = new SequenceFileValueIterator<Writable>(outPart,
        true, conf);
    while (iterator.hasNext()) {
      ClusterWritable next = (ClusterWritable)iterator.next();
    MeanShiftCanopy canopy = (MeanShiftCanopy) next.getValue();
      assertTrue(canopy.getCenter() instanceof DenseVector);
      assertEquals(1, canopy.getBoundPoints().size());
    }
  }
View Full Code Here

    long count = HadoopUtil.countRecords(outPart, conf);
    assertEquals("count", 3, count);
    Iterator<?> iterator = new SequenceFileValueIterator<Writable>(outPart,
        true, conf);
    while (iterator.hasNext()) {
      ClusterWritable next = (ClusterWritable)iterator.next();
    MeanShiftCanopy canopy = (MeanShiftCanopy) next.getValue();
      assertEquals(1, canopy.getBoundPoints().size());
    }
  }
View Full Code Here

    int count = 0;
    for (Text k : collector1.getKeys()) {
      count++;
      List<ClusterWritable> vl = collector1.getValue(k);
      assertEquals("non-singleton centroid!", 1, vl.size());
      ClusterWritable clusterWritable = vl.get(0);
      Vector v = clusterWritable.getValue().getCenter();
      assertEquals("cetriod vector is wrong length", 2, v.size());
      if ( (Math.abs(v.get(0) - 1.5) < EPSILON)
                  && (Math.abs(v.get(1) - 1.5) < EPSILON)
                  && !got15) {
        got15 = true;
View Full Code Here

    List<Cluster> clusterModels = Lists.newArrayList();
    Path finalClustersPath = finalClustersPath(conf, clusterOutputPath);
    Iterator<?> it = new SequenceFileDirValueIterator<Writable>(finalClustersPath, PathType.LIST,
        PathFilters.partFilter(), null, false, conf);
    while (it.hasNext()) {
      ClusterWritable next = (ClusterWritable) it.next();
      Cluster cluster = next.getValue();
      cluster.configure(conf);
      clusterModels.add(cluster);
    }
    return clusterModels;
  }
View Full Code Here

    FileStatus[] clusterFiles = fileSystem.listStatus(clusterOutputPath, PathFilters.finalPartFilter());
    Iterator<?> it = new SequenceFileDirValueIterator<Writable>(
        clusterFiles[0].getPath(), PathType.LIST, PathFilters.partFilter(),
        null, false, conf);
    while (it.hasNext()) {
      ClusterWritable next = (ClusterWritable) it.next();
      Cluster cluster = next.getValue();
      cluster.configure(conf);
      clusters.add(cluster);
    }
    return clusters;
  }
View Full Code Here

    Iterable<Text> keys = writer.getKeysInInsertionOrder();
    assertEquals("Number of centroids", 3, Iterables.size(keys));
    int i = 0;
    for (Text key : keys) {
      List<ClusterWritable> data = writer.getValue(key);
      ClusterWritable clusterWritable = data.get(0);
      Canopy canopy = (Canopy) clusterWritable.getValue();
      assertEquals(manhattanCentroids.get(i).asFormatString() + " is not equal to "
          + canopy.computeCentroid().asFormatString(),
          manhattanCentroids.get(i), canopy.computeCentroid());
      i++;
    }
View Full Code Here

    Iterable<Text> keys = writer.getKeysInInsertionOrder();
    assertEquals("Number of centroids", 3, Iterables.size(keys));
    int i = 0;
    for (Text key : keys) {
      List<ClusterWritable> data = writer.getValue(key);
      ClusterWritable clusterWritable = data.get(0);
      Canopy canopy = (Canopy) clusterWritable.getValue();
      assertEquals(euclideanCentroids.get(i).asFormatString() + " is not equal to "
          + canopy.computeCentroid().asFormatString(),
          euclideanCentroids.get(i), canopy.computeCentroid());
      i++;
    }
View Full Code Here

TOP

Related Classes of org.apache.mahout.clustering.iterator.ClusterWritable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.