Examples of BruteSearch

org.apache.mahout.knn.search.BruteSearch
Search for nearest neighbors using a complete search (i.e. looping through the references and comparing each vector to the query).
org.apache.mahout.math.neighborhood.BruteSearch
Search for nearest neighbors using a complete search (i.e. looping through the references and comparing each vector to the query).

Examples of org.apache.mahout.knn.search.BruteSearch

    return data;
  }


  @Override
  public UpdatableSearcher getSearch(int n) {
    return new BruteSearch(new EuclideanDistanceMeasure());
  }

View Full Code Here

Examples of org.apache.mahout.knn.search.BruteSearch

  }


  @Test
  public void testMatrixSearch() {
    List<WeightedVector> referenceVectors = Lists.newArrayListWithExpectedSize(8);
    BruteSearch searcher = new BruteSearch(new EuclideanDistanceMeasure());
    for (int i = 0; i < 8; i++) {
      referenceVectors.add(new WeightedVector(
          new DenseVector(new double[]{0.125 * (i & 4), i & 2, i & 1}), 1, i));
      searcher.add(referenceVectors.get(referenceVectors.size() - 1));
    }


    final List<List<WeightedThing<Vector>>> searchResults =
        searcher.search(referenceVectors, 3);
    for (List<WeightedThing<Vector>> r : searchResults) {
      assertEquals(0, r.get(0).getWeight(), 1e-8);
      assertEquals(0.5, r.get(1).getWeight(), 1e-8);
      assertEquals(1, r.get(2).getWeight(), 1e-8);
    }

View Full Code Here

Examples of org.apache.mahout.knn.search.BruteSearch


  @Test
  public void testBasicClustering() {
    List<? extends WeightedVector> data = cubishTestData(1);


    BallKMeans r = new BallKMeans(new BruteSearch(new EuclideanDistanceMeasure()), 6, 20);
    r.cluster(data);
    for (Centroid centroid : r) {
      for (int i = 0; i < 10; i++) {
        System.out.printf("%10.4f", centroid.get(i));
      }

View Full Code Here

Examples of org.apache.mahout.knn.search.BruteSearch

  public void testInitialization() {
    // start with super clusterable data
    List<? extends WeightedVector> data = cubishTestData(0.01);


    // just do initialization of ball k-means.  This should drop a point into each of the clusters
    BallKMeans r = new BallKMeans(new BruteSearch(new EuclideanDistanceMeasure()), 6, 20);
    r.cluster(data);


    // put the centroids into a matrix
    Matrix x = new DenseMatrix(6, 5);
    int row = 0;

View Full Code Here

Examples of org.apache.mahout.knn.search.BruteSearch

    }
    System.out.printf("Generated query matrix.\n");


    for (int threads : new int[]{1, 2, 3, 4, 5, 6, 10, 20, 50}) {
      for (int block : new int[]{1, 10, 50}) {
        BruteSearch search = new BruteSearch(new EuclideanDistanceMeasure());
        search.addAll(referenceVectors);
        long t0 = System.nanoTime();
        search.search(queryVectors, block, threads);
        long t1 = System.nanoTime();
        System.out.printf("%d\t%d\t%.2f\n", threads, block, (t1 - t0) / 1e9);
      }
    }
  }

View Full Code Here

Examples of org.apache.mahout.math.neighborhood.BruteSearch

   * @param distanceMeasure the distance measure used to compute the distance between two points.
   * @return the minimum distance between the first sampleLimit points
   * @see org.apache.mahout.clustering.streaming.cluster.StreamingKMeans#clusterInternal(Iterable, boolean)
   */
  public static double estimateDistanceCutoff(List<? extends Vector> data, DistanceMeasure distanceMeasure) {
    BruteSearch searcher = new BruteSearch(distanceMeasure);
    searcher.addAll(data);
    double minDistance = Double.POSITIVE_INFINITY;
    for (Vector vector : data) {
      double closest = searcher.searchFirst(vector, true).getWeight();
      if (minDistance > 0 && closest < minDistance) {
        minDistance = closest;
      }
      searcher.add(vector);
    }
    return minDistance;
  }

View Full Code Here

Examples of org.apache.mahout.math.neighborhood.BruteSearch

   * @param distanceMeasure distance measure to use
   * @return the confusion matrix
   */
  public static Matrix getConfusionMatrix(List<? extends Vector> rowCentroids, List<? extends  Vector> columnCentroids,
                                          Iterable<? extends Vector> datapoints, DistanceMeasure distanceMeasure) {
    Searcher rowSearcher = new BruteSearch(distanceMeasure);
    rowSearcher.addAll(rowCentroids);
    Searcher columnSearcher = new BruteSearch(distanceMeasure);
    columnSearcher.addAll(columnCentroids);


    int numRows = rowCentroids.size();
    int numCols = columnCentroids.size();
    Matrix confusionMatrix = new DenseMatrix(numRows, numCols);


    for (Vector vector : datapoints) {
      WeightedThing<Vector> closestRowCentroid = rowSearcher.search(vector, 1).get(0);
      WeightedThing<Vector> closestColumnCentroid = columnSearcher.search(vector, 1).get(0);
      int row = ((Centroid) closestRowCentroid.getValue()).getIndex();
      int column = ((Centroid) closestColumnCentroid.getValue()).getIndex();
      double vectorWeight;
      if (vector instanceof WeightedVector) {
        vectorWeight = ((WeightedVector) vector).getWeight();

View Full Code Here

Examples of org.apache.mahout.math.neighborhood.BruteSearch

        mapDriver.getConfiguration().get(StreamingKMeansDriver.SEARCHER_CLASS_OPTION));
    for (Centroid datapoint : syntheticData.getFirst()) {
      mapDriver.addInput(new IntWritable(0), new VectorWritable(datapoint));
    }
    List<org.apache.hadoop.mrunit.types.Pair<IntWritable,CentroidWritable>> results = mapDriver.run();
    BruteSearch resultSearcher = new BruteSearch(new SquaredEuclideanDistanceMeasure());
    for (org.apache.hadoop.mrunit.types.Pair<IntWritable, CentroidWritable> result : results) {
      resultSearcher.add(result.getSecond().getCentroid());
    }
    System.out.printf("Clustered the data into %d clusters\n", results.size());
    for (Vector mean : syntheticData.getSecond()) {
      WeightedThing<Vector> closest = resultSearcher.search(mean, 1).get(0);
      assertTrue("Weight " + closest.getWeight() + " not less than 0.5", closest.getWeight() < 0.5);
    }
  }

View Full Code Here

Examples of org.apache.mahout.math.neighborhood.BruteSearch

  private static final int K1 = 100;


  @Test
  public void testClusteringMultipleRuns() {
    for (int i = 1; i <= 10; ++i) {
      BallKMeans clusterer = new BallKMeans(new BruteSearch(new SquaredEuclideanDistanceMeasure()),
          1 << NUM_DIMENSIONS, NUM_ITERATIONS, true, i);
      clusterer.cluster(syntheticData.getFirst());
      double costKMeansPlusPlus = ClusteringUtils.totalClusterCost(syntheticData.getFirst(), clusterer);


      clusterer = new BallKMeans(new BruteSearch(new SquaredEuclideanDistanceMeasure()),
          1 << NUM_DIMENSIONS, NUM_ITERATIONS, false, i);
      clusterer.cluster(syntheticData.getFirst());
      double costKMeansRandom = ClusteringUtils.totalClusterCost(syntheticData.getFirst(), clusterer);


      System.out.printf("%d runs; kmeans++: %f; random: %f\n", i, costKMeansPlusPlus, costKMeansRandom);

View Full Code Here

Examples of org.apache.mahout.math.neighborhood.BruteSearch

    }
  }


  @Test
  public void testClustering() {
    UpdatableSearcher searcher = new BruteSearch(new SquaredEuclideanDistanceMeasure());
    BallKMeans clusterer = new BallKMeans(searcher, 1 << NUM_DIMENSIONS, NUM_ITERATIONS);


    long startTime = System.currentTimeMillis();
    clusterer.cluster(syntheticData.getFirst());
    long endTime = System.currentTimeMillis();


    assertEquals("Total weight not preserved", totalWeight(syntheticData.getFirst()), totalWeight(clusterer), 1.0e-9);


    // Verify that each corner of the cube has a centroid very nearby.
    // This is probably FALSE for large-dimensional spaces!
    OnlineSummarizer summarizer = new OnlineSummarizer();
    for (Vector mean : syntheticData.getSecond()) {
      WeightedThing<Vector> v = searcher.search(mean, 1).get(0);
      summarizer.add(v.getWeight());
    }
    assertTrue(String.format("Median weight [%f] too large [>%f]", summarizer.getMedian(),
        DISTRIBUTION_RADIUS), summarizer.getMedian() < DISTRIBUTION_RADIUS);


    double clusterTime = (endTime - startTime) / 1000.0;
    System.out.printf("%s\n%.2f for clustering\n%.1f us per row\n\n",
        searcher.getClass().getName(), clusterTime,
        clusterTime / syntheticData.getFirst().size() * 1.0e6);


    // Verify that the total weight of the centroids near each corner is correct.
    double[] cornerWeights = new double[1 << NUM_DIMENSIONS];
    Searcher trueFinder = new BruteSearch(new EuclideanDistanceMeasure());
    for (Vector trueCluster : syntheticData.getSecond()) {
      trueFinder.add(trueCluster);
    }
    for (Centroid centroid : clusterer) {
      WeightedThing<Vector> closest = trueFinder.search(centroid, 1).get(0);
      cornerWeights[((Centroid)closest.getValue()).getIndex()] += centroid.getWeight();
    }
    int expectedNumPoints = NUM_DATA_POINTS / (1 << NUM_DIMENSIONS);
    for (double v : cornerWeights) {
      System.out.printf("%f ", v);

View Full Code Here

0 1 2

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.