Package org.apache.mahout.clustering

Examples of org.apache.mahout.clustering.WeightedVectorWritable


    FileStatus[] status = fs.listStatus(clusteredPointsIn, new OutputLogFilter());
    for (FileStatus s : status) {
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
      try {
        IntWritable key = (IntWritable) reader.getKeyClass().asSubclass(Writable.class).newInstance();
        WeightedVectorWritable vw = reader.getValueClass().asSubclass(WeightedVectorWritable.class).newInstance();
        while (reader.next(key, vw)) {
          RepresentativePointsMapper.mapPoint(key, vw, measure, repPoints, mostDistantPoints);
        }
      } finally {
        reader.close();
View Full Code Here


  @Override
  protected void reduce(IntWritable key, Iterable<WeightedVectorWritable> values, Context context)
    throws IOException, InterruptedException {
    // find the most distant point
    WeightedVectorWritable mdp = null;
    for (WeightedVectorWritable dpw : values) {
      if (mdp == null || mdp.getWeight() < dpw.getWeight()) {
        mdp = new WeightedVectorWritable(dpw.getWeight(), dpw.getVector());
      }
    }
    context.write(new IntWritable(key.get()), new VectorWritable(mdp.getVector()));
  }
View Full Code Here

    reader.close();
    path = new Path(output, "clusteredPoints/part-m-0");
    reader = new SequenceFile.Reader(fs, path, config);
    int count = 0;
    Writable clusterId = new IntWritable(0);
    WeightedVectorWritable vector = new WeightedVectorWritable();
    while (reader.next(clusterId, vector)) {
      count++;
      System.out.println("Txt: " + clusterId + " Vec: " + AbstractCluster.formatVector(vector.getVector(), null));
    }
    assertEquals("number of points", points.size(), count);
    reader.close();
  }
View Full Code Here

    reader.close();
    path = new Path(output, "clusteredPoints/part-m-0");
    reader = new SequenceFile.Reader(fs, path, config);
    int count = 0;
    Writable clusterId = new IntWritable(0);
    WeightedVectorWritable vector = new WeightedVectorWritable();
    while (reader.next(clusterId, vector)) {
      count++;
      System.out.println("Txt: " + clusterId + " Vec: " + AbstractCluster.formatVector(vector.getVector(), null));
    }
    assertEquals("number of points", points.size(), count);
    reader.close();
  }
View Full Code Here

    CanopyDriver.run(conf, getTestTempDirPath("testdata"), output, manhattanDistanceMeasure, 3.1, 2.1, true, false);
    Path path = new Path(output, "clusteredPoints/part-m-00000");
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
    int count = 0;
    Writable clusterId = new IntWritable(0);
    WeightedVectorWritable vector = new WeightedVectorWritable();
    while (reader.next(clusterId, vector)) {
      count++;
      System.out.println("Txt: " + clusterId + " Vec: " + AbstractCluster.formatVector(vector.getVector(), null));
    }
    assertEquals("number of points", points.size(), count);
    reader.close();
  }
View Full Code Here

    ToolRunner.run(new Configuration(), new CanopyDriver(), args);
    Path path = new Path(output, "clusteredPoints/part-m-00000");
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
    int count = 0;
    Writable canopyId = new IntWritable(0);
    WeightedVectorWritable vw = new WeightedVectorWritable();
    while (reader.next(canopyId, vw)) {
      count++;
      System.out.println("Txt: " + canopyId.toString() + " Vec: " + AbstractCluster.formatVector(vw.getVector(), null));
    }
    assertEquals("number of points", points.size(), count);
    reader.close();
  }
View Full Code Here

  public void emitPointToClosestCanopy(Vector point,
                                       Iterable<Canopy> canopies,
                                       Mapper<?,?,IntWritable,WeightedVectorWritable>.Context context)
    throws IOException, InterruptedException {
    Canopy closest = findClosestCanopy(point, canopies);
    context.write(new IntWritable(closest.getId()), new WeightedVectorWritable(1, point));
    context.setStatus("Emit Closest Canopy ID:" + closest.getIdentifier());
  }
View Full Code Here

        clusterId = i;
        clusterPdf = pdf;
      }
    }
    //System.out.println(clusterId + ": " + ClusterBase.formatVector(vector.get(), null));
    context.write(new IntWritable(clusterId), new WeightedVectorWritable(clusterPdf, point.get()));
  }
View Full Code Here

    throws IOException, InterruptedException {
    for (int i = 0; i < clusters.size(); i++) {
      double pdf = pi.get(i);
      if (pdf > threshold && clusters.get(i).getTotalCount() > 0) {
        //System.out.println(i + ": " + ClusterBase.formatVector(vector.get(), null));
        context.write(new IntWritable(i), new WeightedVectorWritable(pdf, point.get()));
      }
    }
  }
View Full Code Here

    throws IOException {
    for (int i = 0; i < clusters.size(); i++) {
      double pdf = pi.get(i);
      if (pdf > threshold && clusters.get(i).getTotalCount() > 0) {
        //System.out.println(i + ": " + ClusterBase.formatVector(vector.get(), null));
        writer.append(new IntWritable(i), new WeightedVectorWritable(pdf, vector.get()));
      }
    }
  }
View Full Code Here

TOP

Related Classes of org.apache.mahout.clustering.WeightedVectorWritable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.