Examples of org.apache.mahout.clustering.classify.ClusterClassifier

Package org.apache.mahout.clustering.classify

Examples of org.apache.mahout.clustering.classify.ClusterClassifier

org.apache.mahout.clustering.classify.ClusterClassifier
This classifier works with any ClusteringPolicy and its associated Clusters. It is initialized with a policy and a list of compatible clusters and thereafter it can classify any new Vector into one or more of the clusters based upon the pdf() function which each cluster supports. In addition, it is an OnlineLearner and can be trained. Training amounts to asking the actual model to observe the vector and closing the classifier causes all the models to computeParameters. Because a ClusterClassifier implements Writable, it can be written-to and read-from a sequence file as a single entity. For sequential and mapreduce clustering in conjunction with a ClusterIterator; however, it utilizes an exploded file format. In this format, the iterator writes the policy to a single POLICY_FILE_NAME file in the clustersOut directory and the models are written to one or more part-n files so that multiple reducers may employed to produce them.

      throw new IllegalStateException("No input clusters found in " + clustersIn + ". Check your -c argument.");
    }
    
    Path priorClustersPath = new Path(output, Cluster.INITIAL_CLUSTERS_DIR);
    ClusteringPolicy policy = new KMeansClusteringPolicy(convergenceDelta);
    ClusterClassifier prior = new ClusterClassifier(clusters, policy);
    prior.writeToSeqFiles(priorClustersPath);
    
    if (runSequential) {
      ClusterIterator.iterateSeq(conf, input, priorClustersPath, output, maxIterations);
    } else {
      ClusterIterator.iterateMR(conf, input, priorClustersPath, output, maxIterations);

View Full Code Here

      throw new IllegalStateException("No input clusters found in " + clustersIn + ". Check your -c argument.");
    }
    
    Path priorClustersPath = new Path(output, Cluster.INITIAL_CLUSTERS_DIR);   
    ClusteringPolicy policy = new FuzzyKMeansClusteringPolicy(m, convergenceDelta);
    ClusterClassifier prior = new ClusterClassifier(clusters, policy);
    prior.writeToSeqFiles(priorClustersPath);
    
    if (runSequential) {
      ClusterIterator.iterateSeq(conf, input, priorClustersPath, output, maxIterations);
    } else {
      ClusterIterator.iterateMR(conf, input, priorClustersPath, output, maxIterations);

View Full Code Here


  @Override
  protected void setup(Context context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    String priorClustersPath = conf.get(ClusterIterator.PRIOR_PATH_KEY);
    classifier = new ClusterClassifier();
    classifier.readFromSeqFiles(conf, new Path(priorClustersPath));
    policy = classifier.getPolicy();
    policy.update(classifier);
    super.setup(context);
  }

View Full Code Here

      Cluster cluster = iter.next().getValue();
      first.observe(cluster);
    }
    List<Cluster> models = Lists.newArrayList();
    models.add(first);
    classifier = new ClusterClassifier(models, policy);
    classifier.close();
    context.write(key, new ClusterWritable(first));
  }

View Full Code Here


  @Override
  protected void setup(Context context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    String priorClustersPath = conf.get(ClusterIterator.PRIOR_PATH_KEY);
    classifier = new ClusterClassifier();
    classifier.readFromSeqFiles(conf, new Path(priorClustersPath));
    policy = classifier.getPolicy();
    policy.update(classifier);
    super.setup(context);
  }

View Full Code Here

    List<Cluster> models = Lists.newArrayList();
    for (Model<VectorWritable> cluster : modelDist.sampleFromPrior(numClusters)) {
      models.add((Cluster) cluster);
    }
    
    ClusterClassifier prior = new ClusterClassifier(models, new DirichletClusteringPolicy(numClusters, alpha0));
    prior.writeToSeqFiles(clustersIn);
    
    if (runSequential) {
      ClusterIterator.iterateSeq(conf, input, clustersIn, output, maxIterations);
    } else {
      ClusterIterator.iterateMR(conf, input, clustersIn, output, maxIterations);

View Full Code Here

   * @param numIterations
   *          the int number of iterations to perform
   */
  public static void iterateSeq(Configuration conf, Path inPath, Path priorPath, Path outPath, int numIterations)
    throws IOException {
    ClusterClassifier classifier = new ClusterClassifier();
    classifier.readFromSeqFiles(conf, priorPath);
    Path clustersOut = null;
    int iteration = 1;
    while (iteration <= numIterations) {
      for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(inPath, PathType.LIST,
          PathFilters.logsCRCFilter(), conf)) {
        Vector vector = vw.get();
        // classification yields probabilities
        Vector probabilities = classifier.classify(vector);
        // policy selects weights for models given those probabilities
        Vector weights = classifier.getPolicy().select(probabilities);
        // training causes all models to observe data
        for (Vector.Element e : weights.nonZeroes()) {
          int index = e.index();
          classifier.train(index, vector, weights.get(index));
        }
      }
      // compute the posterior models
      classifier.close();
      // update the policy
      classifier.getPolicy().update(classifier);
      // output the classifier
      clustersOut = new Path(outPath, Cluster.CLUSTERS_DIR + iteration);
      classifier.writeToSeqFiles(clustersOut);
      FileSystem fs = FileSystem.get(outPath.toUri(), conf);
      iteration++;
      if (isConverged(clustersOut, conf, fs)) {
        break;
      }

View Full Code Here

    List<Cluster> models = Lists.newArrayList();
    DistanceMeasure measure = new ManhattanDistanceMeasure();
    models.add(new Canopy(new DenseVector(2).assign(1), 0, measure));
    models.add(new Canopy(new DenseVector(2), 1, measure));
    models.add(new Canopy(new DenseVector(2).assign(-1), 2, measure));
    ClusterClassifier classifier = new ClusterClassifier(models, new CanopyClusteringPolicy());
    Vector pdf = classifier.classify(new DenseVector(2));
    assertEquals("[0,0]", "[0.200, 0.600, 0.200]", AbstractCluster.formatVector(pdf, null));
    pdf = classifier.classify(new DenseVector(2).assign(2));
    assertEquals("[2,2]", "[0.493, 0.296, 0.211]", AbstractCluster.formatVector(pdf, null));
  }

View Full Code Here

    assertEquals("[2,2]", "[0.493, 0.296, 0.211]", AbstractCluster.formatVector(pdf, null));
  }
  
  @Test
  public void testClusterClassification() {
    ClusterClassifier classifier = newKlusterClassifier();
    Vector pdf = classifier.classify(new DenseVector(2));
    assertEquals("[0,0]", "[0.200, 0.600, 0.200]", AbstractCluster.formatVector(pdf, null));
    pdf = classifier.classify(new DenseVector(2).assign(2));
    assertEquals("[2,2]", "[0.493, 0.296, 0.211]", AbstractCluster.formatVector(pdf, null));
  }

View Full Code Here

    List<Cluster> models = Lists.newArrayList();
    DistanceMeasure measure = new ManhattanDistanceMeasure();
    models.add(new MeanShiftCanopy(new DenseVector(2).assign(1), 0, measure));
    models.add(new MeanShiftCanopy(new DenseVector(2), 1, measure));
    models.add(new MeanShiftCanopy(new DenseVector(2).assign(-1), 2, measure));
    ClusterClassifier classifier = new ClusterClassifier(models, new MeanShiftClusteringPolicy());
    classifier.classify(new DenseVector(2));
  }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.mahout.clustering.classify.ClusterClassifier

org.apache.hadoop.conf.Configuration

org.apache.hadoop.fs.FileSystem

org.apache.hadoop.fs.Path

org.apache.hadoop.io.IntWritable

org.apache.hadoop.io.Text

org.apache.hadoop.io.Writable

org.apache.mahout.clustering.Cluster

org.apache.mahout.clustering.dirichlet.DirichletDriver

org.apache.mahout.clustering.dirichlet.TestL1ModelClustering

org.apache.mahout.clustering.dirichlet.TestMapReduce

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.