Examples of StreamingKMeans

org.apache.mahout.clustering.streaming.cluster.StreamingKMeans
.nips.cc/papers/files/nips22/NIPS2009_1085.pdf - "Fast and Accurate k-means for Large Datasets" by M. Shindler, A. Wong, A. Meyerson, http://books.nips.cc/papers/files/nips24/NIPS2011_1271.pdf

Examples of org.apache.mahout.clustering.streaming.cluster.StreamingKMeans

        estimatePoints.add(datapointsIterator.next());
      }
      estimateDistanceCutoff = ClusteringUtils.estimateDistanceCutoff(estimatePoints, searcher.getDistanceMeasure());
    }


    StreamingKMeans clusterer = new StreamingKMeans(searcher, numClusters, estimateDistanceCutoff);
    while (datapointsIterator.hasNext()) {
      clusterer.cluster(datapointsIterator.next());
    }
    clusterer.reindexCentroids();


    return clusterer;
  }

View Full Code Here

Examples of org.apache.mahout.clustering.streaming.cluster.StreamingKMeans

    if (estimatedDistanceCutoff == StreamingKMeansDriver.INVALID_DISTANCE_CUTOFF) {
      estimateDistanceCutoff = true;
      estimatePoints = Lists.newArrayList();
    }
    // There is no way of estimating the distance cutoff unless we have some data.
    clusterer = new StreamingKMeans(searcher, numClusters, estimatedDistanceCutoff);
  }

View Full Code Here

Examples of org.apache.mahout.clustering.streaming.cluster.StreamingKMeans

    for (org.apache.hadoop.mrunit.types.Pair<IntWritable, CentroidWritable> pair : mapDriver.run()) {
      mapperCentroids.add(pair.getSecond().getCentroid());
    }


    // Clusters the data using local batch StreamingKMeans.
    StreamingKMeans batchClusterer =
        new StreamingKMeans(StreamingKMeansUtilsMR.searcherFromConfiguration(configuration),
            mapDriver.getConfiguration().getInt("estimatedNumMapClusters", -1), DISTANCE_CUTOFF);
    batchClusterer.cluster(syntheticData.getFirst());
    List<Centroid> batchCentroids = Lists.newArrayList();
    for (Vector v : batchClusterer) {
      batchCentroids.add((Centroid) v);
    }


    // Clusters the data using point by point StreamingKMeans.
    StreamingKMeans perPointClusterer =
        new StreamingKMeans(StreamingKMeansUtilsMR.searcherFromConfiguration(configuration),
            (1 << NUM_DIMENSIONS) * (int)Math.log(NUM_DATA_POINTS), DISTANCE_CUTOFF);
    for (Centroid datapoint : syntheticData.getFirst()) {
      perPointClusterer.cluster(datapoint);
    }
    List<Centroid> perPointCentroids = Lists.newArrayList();
    for (Vector v : perPointClusterer) {
      perPointCentroids.add((Centroid) v);
    }

View Full Code Here

Examples of org.apache.mahout.clustering.streaming.cluster.StreamingKMeans

        ReduceDriver.newReduceDriver(new StreamingKMeansReducer());
    Configuration configuration = reduceDriver.getConfiguration();
    configure(configuration);


    System.out.printf("%s reducer test\n", configuration.get(StreamingKMeansDriver.SEARCHER_CLASS_OPTION));
    StreamingKMeans clusterer =
        new StreamingKMeans(StreamingKMeansUtilsMR .searcherFromConfiguration(configuration),
            (1 << NUM_DIMENSIONS) * (int)Math.log(NUM_DATA_POINTS), DISTANCE_CUTOFF);


    long start = System.currentTimeMillis();
    clusterer.cluster(syntheticData.getFirst());
    long end = System.currentTimeMillis();


    System.out.printf("%f [s]\n", (end - start) / 1000.0);
    List<CentroidWritable> reducerInputs = Lists.newArrayList();
    int postMapperTotalWeight = 0;

View Full Code Here

Examples of org.apache.mahout.clustering.streaming.cluster.StreamingKMeans

        log.info("Estimated Points: {}", estimatePoints.size());
      }
      estimateDistanceCutoff = ClusteringUtils.estimateDistanceCutoff(estimatePoints, searcher.getDistanceMeasure());
    }


    StreamingKMeans streamingKMeans = new StreamingKMeans(searcher, numClusters, estimateDistanceCutoff);


    // datapointsIterator could be empty if no estimate distance was initially provided
    // hence creating the iterator again here for the clustering
    if (!dataPointsIterator.hasNext()) {
      dataPointsIterator = dataPoints.iterator();
    }


    while (dataPointsIterator.hasNext()) {
      streamingKMeans.cluster(dataPointsIterator.next());
    }


    streamingKMeans.reindexCentroids();
    return streamingKMeans;
  }

View Full Code Here

Examples of org.apache.mahout.clustering.streaming.cluster.StreamingKMeans

    if (estimatedDistanceCutoff == StreamingKMeansDriver.INVALID_DISTANCE_CUTOFF) {
      estimateDistanceCutoff = true;
      estimatePoints = Lists.newArrayList();
    }
    // There is no way of estimating the distance cutoff unless we have some data.
    clusterer = new StreamingKMeans(searcher, numClusters, estimatedDistanceCutoff);
  }

View Full Code Here

Examples of org.apache.mahout.clustering.streaming.cluster.StreamingKMeans

    for (org.apache.hadoop.mrunit.types.Pair<IntWritable, CentroidWritable> pair : mapDriver.run()) {
      mapperCentroids.add(pair.getSecond().getCentroid());
    }


    // Clusters the data using local batch StreamingKMeans.
    StreamingKMeans batchClusterer =
        new StreamingKMeans(StreamingKMeansUtilsMR.searcherFromConfiguration(configuration),
            mapDriver.getConfiguration().getInt("estimatedNumMapClusters", -1), DISTANCE_CUTOFF);
    batchClusterer.cluster(syntheticData.getFirst());
    List<Centroid> batchCentroids = Lists.newArrayList();
    for (Vector v : batchClusterer) {
      batchCentroids.add((Centroid) v);
    }


    // Clusters the data using point by point StreamingKMeans.
    StreamingKMeans perPointClusterer =
        new StreamingKMeans(StreamingKMeansUtilsMR.searcherFromConfiguration(configuration),
            (1 << NUM_DIMENSIONS) * (int)Math.log(NUM_DATA_POINTS), DISTANCE_CUTOFF);
    for (Centroid datapoint : syntheticData.getFirst()) {
      perPointClusterer.cluster(datapoint);
    }
    List<Centroid> perPointCentroids = Lists.newArrayList();
    for (Vector v : perPointClusterer) {
      perPointCentroids.add((Centroid) v);
    }

View Full Code Here

Examples of org.apache.mahout.clustering.streaming.cluster.StreamingKMeans

        ReduceDriver.newReduceDriver(new StreamingKMeansReducer());
    Configuration configuration = reduceDriver.getConfiguration();
    configure(configuration);


    System.out.printf("%s reducer test\n", configuration.get(StreamingKMeansDriver.SEARCHER_CLASS_OPTION));
    StreamingKMeans clusterer =
        new StreamingKMeans(StreamingKMeansUtilsMR .searcherFromConfiguration(configuration),
            (1 << NUM_DIMENSIONS) * (int)Math.log(NUM_DATA_POINTS), DISTANCE_CUTOFF);


    long start = System.currentTimeMillis();
    clusterer.cluster(syntheticData.getFirst());
    long end = System.currentTimeMillis();


    System.out.printf("%f [s]\n", (end - start) / 1000.0);
    List<CentroidWritable> reducerInputs = Lists.newArrayList();
    int postMapperTotalWeight = 0;

View Full Code Here

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.