Package org.apache.mahout.clustering.dirichlet

Source Code of org.apache.mahout.clustering.dirichlet.DisplayOutputState

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.mahout.clustering.dirichlet;

import java.awt.BasicStroke;
import java.awt.Graphics;
import java.awt.Graphics2D;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.mapred.JobConf;
import org.apache.mahout.clustering.dirichlet.models.Model;
import org.apache.mahout.clustering.dirichlet.models.NormalModel;
import org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution;
import org.apache.mahout.clustering.kmeans.KMeansDriver;
import org.apache.mahout.matrix.AbstractVector;
import org.apache.mahout.matrix.DenseVector;
import org.apache.mahout.matrix.Vector;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.FileLineIterable;

class DisplayOutputState extends DisplayDirichlet {
  DisplayOutputState() {
    initialize();
    this.setTitle("Dirichlet Process Clusters - Map/Reduce Results (>"
        + (int) (significance * 100) + "% of population)");
  }

  @Override
  public void paint(Graphics g) {
    super.plotSampleData(g);
    Graphics2D g2 = (Graphics2D) g;

    Vector dv = new DenseVector(2);
    int i = result.size() - 1;
    for (Model<Vector>[] models : result) {
      g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
      g2.setColor(colors[Math.min(colors.length - 1, i--)]);
      for (Model<Vector> m : models) {
        NormalModel mm = (NormalModel) m;
        dv.assign(mm.getStdDev() * 3);
        if (isSignificant(mm))
          plotEllipse(g2, mm.getMean(), dv);
      }
    }
  }

  /**
   * Return the contents of the given file as a String
   *
   * @param fileName
   *            the String name of the file
   * @return the String contents of the file
   * @throws IOException
   *             if there is an error
   */
  public static List<Vector> readFile(String fileName) throws IOException {
    List<Vector> results = new ArrayList<Vector>();
    for (String line : new FileLineIterable(new File(fileName))) {
      results.add(AbstractVector.decodeVector(line));
    }
    return results;
  }

  private static void getSamples() throws IOException {
    File f = new File("input");
    for (File g : f.listFiles())
      sampleData.addAll(readFile(g.getCanonicalPath()));
  }

  private static void getResults() throws IOException {
    result = new ArrayList<Model<Vector>[]>();
    JobConf conf = new JobConf(KMeansDriver.class);
    conf.set(DirichletDriver.MODEL_FACTORY_KEY,
        "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution");
    conf.set(DirichletDriver.NUM_CLUSTERS_KEY, Integer.toString(20));
    conf.set(DirichletDriver.ALPHA_0_KEY, Double.toString(1.0));
    File f = new File("output");
    for (File g : f.listFiles()) {
      conf.set(DirichletDriver.STATE_IN_KEY, g.getCanonicalPath());
      DirichletState<Vector> dirichletState = DirichletMapper
          .getDirichletState(conf);
      result.add(dirichletState.getModels());
    }
  }

  public static void main(String[] args) throws IOException {
    RandomUtils.useTestSeed();
    getSamples();
    getResults();
    new DisplayOutputState();
  }

  static void generateResults() {
    DisplayDirichlet.generateResults(new NormalModelDistribution());
  }
}
TOP

Related Classes of org.apache.mahout.clustering.dirichlet.DisplayOutputState

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.