Package org.apache.mahout.common.distance

Examples of org.apache.mahout.common.distance.ManhattanDistanceMeasure


    initialize();
    this.setTitle("Spectral k-Means Clusters (>" + (int) (significance * 100) + "% of population)");
  }

  public static void main(String[] args) throws Exception {
    DistanceMeasure measure = new ManhattanDistanceMeasure();
    Path samples = new Path("samples");
    Path output = new Path("output");
    HadoopUtil.overwriteOutput(samples);
    HadoopUtil.overwriteOutput(output);

    RandomUtils.useTestSeed();
    DisplayClustering.generateSamples();
    writeSampleData(samples);
    int maxIter = 10;
    double convergenceDelta = 0.001;
    Path affinities = new Path(output, "affinities");
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(output.toUri(), conf);
    if (!fs.exists(output)) {
      fs.mkdirs(output);
    }
    FileWriter writer = new FileWriter(affinities.toString());
    PrintWriter out = new PrintWriter(writer);
    try {
      for (int i = 0; i < SAMPLE_DATA.size(); i++) {
        for (int j = 0; j < SAMPLE_DATA.size(); j++) {
          out.println(i + "," + j + "," + measure.distance(SAMPLE_DATA.get(i).get(), SAMPLE_DATA.get(j).get()));
        }
      }
    } finally {
      out.close();
    }
View Full Code Here


    job.setMapOutputValueClass(VectorWritable.class);
    Path input = getTestTempFilePath("random-input");
    Path output = getTestTempDirPath("random-output");
    ClusteringTestUtils.writePointsToFile(points, input, fs, conf);
   
    RandomSeedGenerator.buildRandom(input, output, 4, new ManhattanDistanceMeasure());
   
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(output, "part-randomSeed"), conf);
    Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
    AbstractCluster value = (AbstractCluster) reader.getValueClass().newInstance();
   
View Full Code Here

    ClusteringTestUtils.writePointsToFile(points, new Path(pointsPath, "file1"), fs, conf);
    ClusteringTestUtils.writePointsToFile(points, new Path(pointsPath, "file2"), fs, conf);

    Path outputPath = getTestTempDirPath("output");
    // now run the Canopy job
    CanopyDriver.run(conf, pointsPath, outputPath, new ManhattanDistanceMeasure(), 3.1, 2.1, false, false);

    // now run the KMeans job
    KMeansDriver.run(pointsPath,
                     new Path(outputPath, "clusters-0"),
                     outputPath,
View Full Code Here

    initialize();
    setTitle("Spectral k-Means Clusters (>" + (int) (significance * 100) + "% of population)");
  }

  public static void main(String[] args) throws Exception {
    DistanceMeasure measure = new ManhattanDistanceMeasure();
    Path samples = new Path(SAMPLES);
    Path output = new Path(OUTPUT);
    Path tempDir = new Path(TEMP);
    Configuration conf = new Configuration();
    HadoopUtil.delete(conf, samples);
    HadoopUtil.delete(conf, output);

    RandomUtils.useTestSeed();
    DisplayClustering.generateSamples();
    writeSampleData(samples);
    Path affinities = new Path(output, AFFINITIES);
    FileSystem fs = FileSystem.get(output.toUri(), conf);
    if (!fs.exists(output)) {
      fs.mkdirs(output);
    }
    Writer writer = null;
    try {
      writer = Files.newWriter(new File(affinities.toString()), Charsets.UTF_8);
      for (int i = 0; i < SAMPLE_DATA.size(); i++) {
        for (int j = 0; j < SAMPLE_DATA.size(); j++) {
          writer.write(i + "," + j + ',' + measure.distance(SAMPLE_DATA.get(i).get(), SAMPLE_DATA.get(j).get()) + '\n');
        }
      }
    } finally {
      Closeables.close(writer, false);
    }
View Full Code Here

    HadoopUtil.delete(conf, samples);
    HadoopUtil.delete(conf, output);
    RandomUtils.useTestSeed();
    generateSamples();
    writeSampleData(samples);
    CanopyDriver.buildClusters(conf, samples, output, new ManhattanDistanceMeasure(), T1, T2, 0, true);
    loadClustersWritable(output);

    new DisplayCanopy();
  }
View Full Code Here

  private void runClustering(Path pointsPath, Configuration conf,
      Boolean runSequential) throws IOException, InterruptedException,
      ClassNotFoundException {
    CanopyDriver.run(conf, pointsPath, clusteringOutputPath,
        new ManhattanDistanceMeasure(), 3.1, 2.1, false, 0.0, runSequential);
    Path finalClustersPath = new Path(clusteringOutputPath, "clusters-0-final");
    ClusterClassifier.writePolicy(new CanopyClusteringPolicy(),
        finalClustersPath);
  }
View Full Code Here

    for (Entry<String,Path> topLevelCluster : postProcessedClusterDirectories.entrySet()) {
      String clusterId = topLevelCluster.getKey();
      Path topLevelclusterPath = topLevelCluster.getValue();

      Path bottomLevelCluster = PathDirectory.getBottomLevelClusterPath(outputPath, clusterId);
      CanopyDriver.run(conf, topLevelclusterPath, bottomLevelCluster, new ManhattanDistanceMeasure(), 2.1,
        2.0, true, 0.0, true);
      assertBottomLevelCluster(bottomLevelCluster);
    }
  }
View Full Code Here

  }

  private void topLevelClustering(Path pointsPath, Configuration conf) throws IOException,
                                                                      InterruptedException,
                                                                      ClassNotFoundException {
    CanopyDriver.run(conf, pointsPath, outputPath, new ManhattanDistanceMeasure(), 3.1, 2.1, true, 0.0, true);
  }
View Full Code Here

  public DistanceMeasureClusterDistribution() {
  }

  public DistanceMeasureClusterDistribution(VectorWritable modelPrototype) {
    super(modelPrototype);
    this.measure = new ManhattanDistanceMeasure();
  }
View Full Code Here

    ClusteringTestUtils.writePointsToFile(points, true, new Path(pointsPath, "file1"), fs, conf);
    ClusteringTestUtils.writePointsToFile(points, true, new Path(pointsPath, "file2"), fs, conf);
   
    Path outputPath = getTestTempDirPath("output");
    // now run the Canopy job
    CanopyDriver.run(conf, pointsPath, outputPath, new ManhattanDistanceMeasure(), 3.1, 2.1, false, 0.0, false);
   
    DummyOutputCollector<Text, ClusterWritable> collector1 =
        new DummyOutputCollector<Text, ClusterWritable>();

    FileStatus[] outParts = FileSystem.get(conf).globStatus(
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.distance.ManhattanDistanceMeasure

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.