Source Code of de.lmu.ifi.dbs.elki.evaluation.clustering.EvaluateClustering$Parameterizer

package de.lmu.ifi.dbs.elki.evaluation.clustering;


/*
 This file is part of ELKI:
 Environment for Developing KDD-Applications Supported by Index-Structures


 Copyright (C) 2012
 Ludwig-Maximilians-Universität München
 Lehr- und Forschungseinheit für Datenbanksysteme
 ELKI Development Team


 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.


 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Affero General Public License for more details.


 You should have received a copy of the GNU Affero General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */


import java.util.Iterator;
import java.util.List;


import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.result.BasicResult;
import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
import de.lmu.ifi.dbs.elki.result.Result;
import de.lmu.ifi.dbs.elki.result.ResultUtil;
import de.lmu.ifi.dbs.elki.result.textwriter.TextWriteable;
import de.lmu.ifi.dbs.elki.result.textwriter.TextWriterStream;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;


/**
 * Evaluate a clustering result by comparing it to an existing cluster label.
 * 
 * @author Erich Schubert
 * 
 * @apiviz.landmark
 * @apiviz.uses ClusterContingencyTable
 * @apiviz.has EvaluateClustering.ScoreResult oneway - - «create»
 */
public class EvaluateClustering implements Evaluator {
  /**
   * Logger for debug output.
   */
  protected static final Logging logger = Logging.getLogger(EvaluateClustering.class);


  /**
   * Parameter to obtain the reference clustering. Defaults to a flat label
   * clustering.
   */
  public static final OptionID REFERENCE_ID = OptionID.getOrCreateOptionID("paircounting.reference", "Reference clustering to compare with. Defaults to a by-label clustering.");


  /**
   * Parameter flag for special noise handling.
   */
  public static final OptionID NOISE_ID = OptionID.getOrCreateOptionID("paircounting.noisespecial", "Use special handling for noise clusters.");


  /**
   * Parameter flag to disable self-pairing
   */
  public static final OptionID SELFPAIR_ID = OptionID.getOrCreateOptionID("paircounting.selfpair", "Enable self-pairing for cluster comparison.");


  /**
   * Reference algorithm.
   */
  private ClusteringAlgorithm<?> referencealg;


  /**
   * Apply special handling to noise "clusters".
   */
  private boolean noiseSpecialHandling;


  /**
   * Use self-pairing in pair-counting measures
   */
  private boolean selfPairing;


  /**
   * Constructor.
   * 
   * @param referencealg Reference clustering
   * @param noiseSpecialHandling Noise handling flag
   * @param selfPairing Self-pairing flag
   */
  public EvaluateClustering(ClusteringAlgorithm<?> referencealg, boolean noiseSpecialHandling, boolean selfPairing) {
    super();
    this.referencealg = referencealg;
    this.noiseSpecialHandling = noiseSpecialHandling;
    this.selfPairing = selfPairing;
  }


  @Override
  public void processNewResult(HierarchicalResult baseResult, Result result) {
    Database db = ResultUtil.findDatabase(baseResult);
    List<Clustering<?>> crs = ResultUtil.getClusteringResults(result);
    if(crs == null || crs.size() < 1) {
      return;
    }
    // Compute the reference clustering
    Clustering<?> refc = null;
    // Try to find an existing reference clustering (globally)
    if(refc == null) {
      Iterator<Clustering<?>> cs = ResultUtil.filteredResults(baseResult, Clustering.class);
      while(cs.hasNext()) {
        Clustering<?> test = cs.next();
        if(isReferenceResult(test)) {
          refc = test;
          break;
        }
      }
    }
    // Try to find an existing reference clustering (locally)
    if(refc == null) {
      Iterator<Clustering<?>> cs = ResultUtil.filteredResults(result, Clustering.class);
      while(cs.hasNext()) {
        Clustering<?> test = cs.next();
        if(isReferenceResult(test)) {
          refc = test;
          break;
        }
      }
    }
    if(refc == null) {
      logger.debug("Generating a new reference clustering.");
      Result refres = referencealg.run(db);
      List<Clustering<?>> refcrs = ResultUtil.getClusteringResults(refres);
      if(refcrs.size() == 0) {
        logger.warning("Reference algorithm did not return a clustering result!");
        return;
      }
      if(refcrs.size() > 1) {
        logger.warning("Reference algorithm returned more than one result!");
      }
      refc = refcrs.get(0);
    }
    else {
      logger.debug("Using existing clustering: " + refc.getLongName() + " " + refc.getShortName());
    }
    for(Clustering<?> c : crs) {
      if(c == refc) {
        continue;
      }
      ClusterContingencyTable contmat = new ClusterContingencyTable(selfPairing, noiseSpecialHandling);
      contmat.process(refc, c);


      db.getHierarchy().add(c, new ScoreResult(contmat));
    }
  }


  private boolean isReferenceResult(Clustering<?> t) {
    // FIXME: don't hard-code strings
    if(t.getShortName().startsWith("bylabel-")) {
      return true;
    }
    if(t.getShortName().startsWith("bymodel-")) {
      return true;
    }
    return false;
  }


  /**
   * Result object for outlier score judgements.
   * 
   * @author Erich Schubert
   * 
   * @apiviz.composedOf ClusterContingencyTable
   */
  public static class ScoreResult extends BasicResult implements TextWriteable {
    /**
     * Cluster contingency table
     */
    protected ClusterContingencyTable contmat;


    /**
     * Constructor.
     * 
     * @param contmat score result
     */
    public ScoreResult(ClusterContingencyTable contmat) {
      super("Cluster-Evalation", "cluster-evaluation");
      this.contmat = contmat;
    }


    /**
     * Get the contingency table
     * 
     * @return the contingency table
     */
    public ClusterContingencyTable getContingencyTable() {
      return contmat;
    }


    @Override
    public void writeToText(TextWriterStream out, String label) {
      out.commentPrint("Pair-F1, ");
      out.commentPrint("Pair-Precision, ");
      out.commentPrint("Pair-Recall, ");
      out.commentPrint("Pair-Rand, ");
      out.commentPrint("Pair-AdjustedRand, ");
      out.commentPrint("Pair-FowlkesMallows, ");
      out.commentPrint("Pair-Jaccard, ");
      out.commentPrint("Pair-Mirkin, ");
      out.commentPrint("Entropy-VI, ");
      out.commentPrint("Entropy-NormalizedVI, ");
      out.commentPrint("Entropy-F1, ");
      out.commentPrint("Edit-F1, ");
      out.commentPrint("SM-InvPurity, ");
      out.commentPrint("SM-Purity, ");
      out.commentPrint("SM-F1, ");
      out.commentPrint("BCubed-Precision, ");
      out.commentPrint("BCubed-Recall, ");
      out.commentPrint("BCubed-F1");
      out.flush();
      out.inlinePrint(contmat.getPaircount().f1Measure());
      out.inlinePrint(contmat.getPaircount().precision());
      out.inlinePrint(contmat.getPaircount().recall());
      out.inlinePrint(contmat.getPaircount().randIndex());
      out.inlinePrint(contmat.getPaircount().adjustedRandIndex());
      out.inlinePrint(contmat.getPaircount().fowlkesMallows());
      out.inlinePrint(contmat.getPaircount().jaccard());
      out.inlinePrint(contmat.getPaircount().mirkin());
      out.inlinePrint(contmat.getEntropy().variationOfInformation());
      out.inlinePrint(contmat.getEntropy().normalizedVariationOfInformation());
      out.inlinePrint(contmat.getEdit().f1Measure());
      out.inlinePrint(contmat.getSetMatching().inversePurity());
      out.inlinePrint(contmat.getSetMatching().purity());
      out.inlinePrint(contmat.getSetMatching().f1Measure());
      out.inlinePrint(contmat.getBCubed().precision());
      out.inlinePrint(contmat.getBCubed().recall());
      out.inlinePrint(contmat.getBCubed().f1Measure());
      out.flush();
    }
  }


  /**
   * Parameterization class.
   * 
   * @author Erich Schubert
   * 
   * @apiviz.exclude
   */
  public static class Parameterizer extends AbstractParameterizer {
    protected ClusteringAlgorithm<?> referencealg = null;


    protected boolean noiseSpecialHandling = false;


    protected boolean selfPairing = false;


    @Override
    protected void makeOptions(Parameterization config) {
      super.makeOptions(config);
      ObjectParameter<ClusteringAlgorithm<?>> referencealgP = new ObjectParameter<ClusteringAlgorithm<?>>(REFERENCE_ID, ClusteringAlgorithm.class, ByLabelClustering.class);
      if(config.grab(referencealgP)) {
        referencealg = referencealgP.instantiateClass(config);
      }


      Flag noiseSpecialHandlingF = new Flag(NOISE_ID);
      if(config.grab(noiseSpecialHandlingF)) {
        noiseSpecialHandling = noiseSpecialHandlingF.getValue();
      }


      Flag selfPairingF = new Flag(SELFPAIR_ID);
      if(config.grab(selfPairingF)) {
        selfPairing = selfPairingF.getValue();
      }
    }


    @Override
    protected EvaluateClustering makeInstance() {
      return new EvaluateClustering(referencealg, noiseSpecialHandling, !selfPairing);
    }
  }
}
Source Code of de.lmu.ifi.dbs.elki.evaluation.clustering.EvaluateClustering$Parameterizer

Related Classes of de.lmu.ifi.dbs.elki.evaluation.clustering.EvaluateClustering$Parameterizer