Package org.apache.mahout.matrix

Examples of org.apache.mahout.matrix.Vector


    for (int k = 0; k < points.size(); k++) {
      System.out.println("Test k=" + (k + 1) + ':');
      // pick k initial cluster centers at random
      List<Cluster> clusters = new ArrayList<Cluster>();
      for (int i = 0; i < k + 1; i++) {
        Vector vec = points.get(i);
        clusters.add(new VisibleCluster(vec));
      }
      // iterate clusters until they converge
      int maxIter = 10;
      referenceKmeans(points, clusters, measure, maxIter);
View Full Code Here


    for (int canopyIx = 0; canopyIx < canopies.size(); canopyIx++) {
      Canopy refCanopy = reference.get(canopyIx);
      Canopy testCanopy = canopies.get(canopyIx);
      assertEquals("canopy points " + canopyIx, refCanopy.getNumPoints(),
          testCanopy.getNumPoints());
      Vector refCentroid = refCanopy.computeCentroid();
      Vector testCentroid = testCanopy.computeCentroid();
      for (int pointIx = 0; pointIx < refCentroid.cardinality(); pointIx++) {
        assertEquals("canopy centroid " + canopyIx + '[' + pointIx + ']',
            refCentroid.get(pointIx), testCentroid.get(pointIx));
      }
    }
  }
View Full Code Here

        Cluster cluster = clusterMap.get(key);
        List<Text> values = collector.getValue(key);
        for (Writable value : values) {
          String[] pointInfo = value.toString().split("\t");

          Vector point = AbstractVector.decodeVector(pointInfo[1]);
          double distance = euclideanDistanceMeasure.distance(cluster
              .getCenter(), point);
          for (Cluster c : clusters)
            assertTrue("distance error", distance <= euclideanDistanceMeasure
                .distance(point, c.getCenter()));
View Full Code Here

     * the list all points that are within distance threshold T2. Repeat until
     * the list is empty.
     */
    while (!points.isEmpty()) {
      Iterator<Vector> ptIter = points.iterator();
      Vector p1 = ptIter.next();
      ptIter.remove();
      Canopy canopy = new VisibleCanopy(p1);
      canopies.add(canopy);
      while (ptIter.hasNext()) {
        Vector p2 = ptIter.next();
        double dist = measure.distance(p1, p2);
        // Put all points that are within distance threshold T1 into the canopy
        if (dist < t1)
          canopy.addPoint(p2);
        // Remove from the list all points that are within distance threshold T2
View Full Code Here

    for (int k = 0; k < points.size(); k++) {
      // pick k initial cluster centers at random
      DummyOutputCollector<Text, Text> collector = new DummyOutputCollector<Text, Text>();
      List<Cluster> clusters = new ArrayList<Cluster>();
      for (int i = 0; i < k + 1; i++) {
        Vector vec = points.get(i);

        Cluster cluster = new Cluster(vec);
        // add the center so the centroid will be correct upon output
        cluster.addPoint(cluster.getCenter());
        clusters.add(cluster);
      }
      mapper.config(clusters);
      // map the data
      for (Vector point : points) {
        mapper.map(new Text(), new Text(point.asFormatString()), collector,
            null);
      }
      // now combine the data
      KMeansCombiner combiner = new KMeansCombiner();
      DummyOutputCollector<Text, Text> collector2 = new DummyOutputCollector<Text, Text>();
      for (String key : collector.getKeys())
        combiner.reduce(new Text(key), collector.getValue(key).iterator(),
            collector2, null);

      assertEquals("Number of map results", k + 1, collector2.getData().size());
      // now verify that all points are accounted for
      int count = 0;
      Vector total = new DenseVector(2);
      for (String key : collector2.getKeys()) {
        List<Text> values = collector2.getValue(key);
        assertEquals("too many values", 1, values.size());
        String value = values.get(0).toString();

        String[] pointInfo = value.split("\t");
        count += Integer.parseInt(pointInfo[0]);
        total = total.plus(AbstractVector.decodeVector(pointInfo[1]));
      }
      assertEquals("total points", 9, count);
      assertEquals("point total[0]", 27, (int) total.get(0));
      assertEquals("point total[1]", 27, (int) total.get(1));
    }
  }
View Full Code Here

      double[][] expectedCentroids = { { 1.5, 1.5 }, { 4.0, 4.0 },
          { 4.666666666666667, 4.6666666666666667 } };
      assertEquals("canopy points " + canopyIx, expectedNumPoints[canopyIx],
          testCanopy.getNumPoints());
      double[] refCentroid = expectedCentroids[canopyIx];
      Vector testCentroid = testCanopy.computeCentroid();
      for (int pointIx = 0; pointIx < refCentroid.length; pointIx++) {
        assertEquals("canopy centroid " + canopyIx + '[' + pointIx + ']',
            refCentroid[pointIx], testCentroid.get(pointIx));
      }
    }
  }
View Full Code Here

      double[][] expectedCentroids = { { 1.8, 1.8 }, { 4.2, 4.2 },
          { 4.666666666666667, 4.666666666666667 } };
      assertEquals("canopy points " + canopyIx, expectedNumPoints[canopyIx],
          testCanopy.getNumPoints());
      double[] refCentroid = expectedCentroids[canopyIx];
      Vector testCentroid = testCanopy.computeCentroid();
      for (int pointIx = 0; pointIx < refCentroid.length; pointIx++) {
        assertEquals("canopy centroid " + canopyIx + '[' + pointIx + ']',
            refCentroid[pointIx], testCentroid.get(pointIx));
      }
    }
  }
View Full Code Here

      System.out.println("K = " + k);
      // pick k initial cluster centers at random
      DummyOutputCollector<Text, Text> collector = new DummyOutputCollector<Text, Text>();
      List<Cluster> clusters = new ArrayList<Cluster>();
      for (int i = 0; i < k + 1; i++) {
        Vector vec = points.get(i);
        Cluster cluster = new Cluster(vec, i);
        // add the center so the centroid will be correct upon output
        // cluster.addPoint(cluster.getCenter());
        clusters.add(cluster);
      }
      mapper.config(clusters);
      // map the data
      for (Vector point : points) {
        mapper.map(new Text(), new Text(point.asFormatString()), collector,
            null);
      }
      // now combine the data
      KMeansCombiner combiner = new KMeansCombiner();
      DummyOutputCollector<Text, Text> collector2 = new DummyOutputCollector<Text, Text>();
      for (String key : collector.getKeys())
        combiner.reduce(new Text(key), collector.getValue(key).iterator(),
            collector2, null);

      // now reduce the data
      KMeansReducer reducer = new KMeansReducer();
      reducer.config(clusters);
      DummyOutputCollector<Text, Text> collector3 = new DummyOutputCollector<Text, Text>();
      for (String key : collector2.getKeys())
        reducer.reduce(new Text(key), collector2.getValue(key).iterator(),
            collector3, null);

      assertEquals("Number of map results", k + 1, collector3.getData().size());

      // compute the reference result after one iteration and compare
      List<Cluster> reference = new ArrayList<Cluster>();
      for (int i = 0; i < k + 1; i++) {
        Vector vec = points.get(i);
        reference.add(new Cluster(vec, i));
      }
      boolean converged = iterateReference(points, reference,
          euclideanDistanceMeasure);
      if (k == 8)
View Full Code Here

      Path path = new Path("testdata/clusters/part-00000");
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path,
          Text.class, Text.class);

      for (int i = 0; i < k + 1; i++) {
        Vector vec = points.get(i);

        Cluster cluster = new Cluster(vec, i);
        // add the center so the centroid will be correct upon output
        cluster.addPoint(cluster.getCenter());
        writer.append(new Text(cluster.getIdentifier()), new Text(Cluster
View Full Code Here

  public void computeParameters() {
    if (s0 == 0)
      return;
    mean = s1.divide(s0);
    // the average of the two component stds
    Vector ss = s2.times(s0).minus(s1.times(s1));
    if (s0 > 1) {
      sd.set(0, Math.sqrt(ss.get(0)) / s0);
      sd.set(1, Math.sqrt(ss.get(1)) / s0);
    } else {
      sd.set(0, Double.MIN_NORMAL);
      sd.set(1, Double.MIN_NORMAL);
    }
  }
View Full Code Here

TOP

Related Classes of org.apache.mahout.matrix.Vector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.