Package org.apache.mahout.math

Examples of org.apache.mahout.math.WeightedVector


  public static void main(String[] args) {
    Sampler<Vector> rand = new MultiNormal(new ConstantVector(1, VECTOR_DIMENSION));
    List<WeightedVector> referenceVectors = Lists.newArrayListWithExpectedSize(REFERENCE_SIZE);
    for (int i = 0; i < REFERENCE_SIZE; ++i) {
      referenceVectors.add(new WeightedVector(rand.sample(), 1, i));
    }
    System.out.printf("Generated reference matrix.\n");

    List<WeightedVector> queryVectors = Lists.newArrayListWithExpectedSize(QUERY_SIZE);
    for (int i = 0; i < QUERY_SIZE; ++i) {
      queryVectors.add(new WeightedVector(rand.sample(), 1, i));
    }
    System.out.printf("Generated query matrix.\n");

    for (int threads : new int[]{1, 2, 3, 4, 5, 6, 10, 20, 50}) {
      for (int block : new int[]{1, 10, 50}) {
View Full Code Here


    }
  }

  public void addAllMatrixSlicesAsWeightedVectors(Iterable<MatrixSlice> data) {
    for (MatrixSlice slice : data) {
      add(new WeightedVector(slice.vector(), 1, slice.index()));
    }
  }
View Full Code Here

    Centroid c_1 = new Centroid((WeightedVector)datapoints.get(seedSelector.sample()).clone());
    c_1.setIndex(0);
    // Construct a set of weighted things which can be used for random selection.  Initial weights are
    // set to the squared distance from c_1
    for (int i = 0; i < datapoints.size(); ++i) {
      WeightedVector row = datapoints.get(i);
      final double w = l2.distance(c_1, row) * row.getWeight();
      seedSelector.set(i, w);
    }

    // From here, seeds are selected with probablity proportional to:
    //
    // r_i = min_{c_j} || x_i - c_j ||^2
    //
    // when we only have c_1, we have already set these distances and as we select each new
    // seed, we update the minimum distances.
    centroids.add(c_1);
    int clusterIndex = 1;
    while (centroids.size() < numClusters) {
      // Select according to weights.
      int seedIndex = seedSelector.sample();
      Centroid nextSeed = new Centroid((WeightedVector)datapoints.get(seedIndex).clone());
      nextSeed.setIndex(clusterIndex++);
      centroids.add(nextSeed);
      // Don't select this one again.
      seedSelector.set(seedIndex, 0);
      // Re-weight everything according to the minimum distance to a seed.
      for (int currSeedIndex : seedSelector) {
        WeightedVector curr = datapoints.get(currSeedIndex);
        double newWeight = nextSeed.getWeight() * l2.distance(nextSeed, curr);
        if (newWeight < seedSelector.getWeight(currSeedIndex)) {
          seedSelector.set(currSeedIndex, newWeight);
        }
      }
View Full Code Here

        newCentroids.add(newCentroid);
      }

      // Pass over the datapoints computing new centroids.
      for (int j = 0; j < datapoints.size(); ++j) {
        WeightedVector datapoint = datapoints.get(j);
        // Get the closest cluster this point belongs to.
        WeightedThing<Vector> closestPair = centroids.search(datapoint, 1).get(0);
        int closestIndex = ((WeightedVector)closestPair.getValue()).getIndex();
        double closestDistance = closestPair.getWeight();
        // Update its cluster assignment if necessary.
View Full Code Here

      centroids.clear();
      centroids.addAll(bestCentroids);
    }
    if (correctWeights) {
      for (WeightedVector testDatapoint : trainTestSplit.getSecond()) {
        WeightedVector closest = (WeightedVector) centroids.searchFirst(testDatapoint, false).getValue();
        closest.setWeight(closest.getWeight() + testDatapoint.getWeight());
      }
    }
    return centroids;
  }
View Full Code Here

    Centroid c_1 = new Centroid(datapoints.get(selected).clone());
    c_1.setIndex(0);
    // Construct a set of weighted things which can be used for random selection.  Initial weights are
    // set to the squared distance from c_1
    for (int i = 0; i < datapoints.size(); ++i) {
      WeightedVector row = datapoints.get(i);
      double w = distanceMeasure.distance(c_1, row) * 2 * Math.log(1 + row.getWeight());
      seedSelector.set(i, w);
    }

    // From here, seeds are selected with probability proportional to:
    //
    // r_i = min_{c_j} || x_i - c_j ||^2
    //
    // when we only have c_1, we have already set these distances and as we select each new
    // seed, we update the minimum distances.
    centroids.add(c_1);
    int clusterIndex = 1;
    while (centroids.size() < numClusters) {
      // Select according to weights.
      int seedIndex = seedSelector.sample();
      Centroid nextSeed = new Centroid(datapoints.get(seedIndex));
      nextSeed.setIndex(clusterIndex++);
      centroids.add(nextSeed);
      // Don't select this one again.
      seedSelector.delete(seedIndex);
      // Re-weight everything according to the minimum distance to a seed.
      for (int currSeedIndex : seedSelector) {
        WeightedVector curr = datapoints.get(currSeedIndex);
        double newWeight = nextSeed.getWeight() * distanceMeasure.distance(nextSeed, curr);
        if (newWeight < seedSelector.getWeight(currSeedIndex)) {
          seedSelector.set(currSeedIndex, newWeight);
        }
      }
View Full Code Here

        newCentroids.add(newCentroid);
      }

      // Pass over the datapoints computing new centroids.
      for (int j = 0; j < datapoints.size(); ++j) {
        WeightedVector datapoint = datapoints.get(j);
        // Get the closest cluster this point belongs to.
        WeightedThing<Vector> closestPair = centroids.searchFirst(datapoint, false);
        int closestIndex = ((WeightedVector) closestPair.getValue()).getIndex();
        double closestDistance = closestPair.getWeight();
        // Update its cluster assignment if necessary.
        if (closestIndex != clusterAssignments.get(j)) {
          changed = true;
          clusterAssignments.set(j, closestIndex);
        }
        // Only update if the datapoints point is near enough. What this means is that the weight
        // of outliers is NOT taken into account and the final weights of the centroids will
        // reflect this (it will be less or equal to the initial sum of the weights).
        if (closestDistance < trimFraction * closestClusterDistances.get(closestIndex)) {
          newCentroids.get(closestIndex).update(datapoint);
        }
      }
      // Add the new centers back into searcher.
      centroids.clear();
      centroids.addAll(newCentroids);
    }

    if (correctWeights) {
      for (Vector v : centroids) {
        ((Centroid)v).setWeight(0);
      }
      for (WeightedVector datapoint : datapoints) {
        Centroid closestCentroid = (Centroid) centroids.searchFirst(datapoint, false).getValue();
        closestCentroid.setWeight(closestCentroid.getWeight() + datapoint.getWeight());
      }
    }
  }
View Full Code Here

    }
  }

  public void addAllMatrixSlicesAsWeightedVectors(Iterable<MatrixSlice> data) {
    for (MatrixSlice slice : data) {
      add(new WeightedVector(slice.vector(), 1, slice.index()));
    }
  }
View Full Code Here

    List<WeightedVector> data = Lists.newArrayListWithCapacity(K1 + 5000);
    int row = 0;

    MultiNormal g = new MultiNormal(radius, new ConstantVector(0, 10));
    for (int i = 0; i < K1; i++) {
      data.add(new WeightedVector(g.sample(), 1, row++));
    }

    for (int i = 0; i < 5; i++) {
      Vector m = new DenseVector(10);
      m.set(i, 6); // This was originally i == 0 ? 6 : 6 which can't be right
      MultiNormal gx = new MultiNormal(radius, m);
      for (int j = 0; j < 1000; j++) {
        data.add(new WeightedVector(gx.sample(), 1, row++));
      }
    }
    return data;
  }
View Full Code Here

    List<WeightedVector> data = Lists.newArrayListWithCapacity(K1 + 5000);
    int row = 0;

    MultiNormal g = new MultiNormal(radius, new ConstantVector(0, 10));
    for (int i = 0; i < K1; i++) {
      data.add(new WeightedVector(g.sample(), 1, row++));
    }

    for (int i = 0; i < 5; i++) {
      Vector m = new DenseVector(10);
      m.set(i, 6); // This was originally i == 0 ? 6 : 6 which can't be right
      MultiNormal gx = new MultiNormal(radius, m);
      for (int j = 0; j < 1000; j++) {
        data.add(new WeightedVector(gx.sample(), 1, row++));
      }
    }
    return data;
  }
View Full Code Here

TOP

Related Classes of org.apache.mahout.math.WeightedVector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.