Package edu.ucla.sspace.clustering.criterion

Source Code of edu.ucla.sspace.clustering.criterion.E1Function

/*
* Copyright 2011 Keith Stevens
*
* This file is part of the S-Space package and is covered under the terms and
* conditions therein.
*
* The S-Space package is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation and distributed hereunder to you.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
* EXPRESS OR IMPLIED ARE MADE.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
* NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
* PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
* WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
* RIGHTS.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package edu.ucla.sspace.clustering.criterion;

import edu.ucla.sspace.common.Similarity;

import edu.ucla.sspace.matrix.Matrix;

import edu.ucla.sspace.vector.DenseDynamicMagnitudeVector;
import edu.ucla.sspace.vector.DoubleVector;
import edu.ucla.sspace.vector.VectorMath;

import java.util.List;


/**
* This {@link CriterionFunction} measures the external differences between
* clusters.  It gives a better score to clustering solutions with centroids
* that are more distant from the centroid for the data set as a whole.
*
* @author Keith Stevens
*/
public class E1Function extends BaseFunction {

    /**
     * The centroid for all data points if they were assigned to a single
     * cluster.
     */
    private DoubleVector completeCentroid;

    /**
     * The the dot product between each cluster and {@code completeCentroid}.
     */
    private double[] simToComplete;

    /**
     * Constructs a new {@link E1Function}.
     */
    public E1Function() {
    }

    /**
     * A package private constructor for all {@link CriterionFunction}s
     * subclassing from this {@link BaseFunction}.  This is to facilitate the
     * implementation of {@link HybridBaseFunction}.  The provided objects are
     * intended to replace those that would have been computed by {@link
     * #setup(Matrix, int[], int) setup} so that one class can do this work once
     * and then share the computed values with other functions.
     *
     * @param matrix The list of normalized data points that are to be
     *        clustered
     * @param centroids The set of centroids associated with the dataset.
     * @param costs The set of costs for each centroid.
     * @param assignments The initial assignments for each cluster.
     * @param clusterSizes The size of each cluster.
     * @param completeCentroid The new summation vector of all data points
     * @param simToComplete The distance from each cluster to {@code
     *        completeCentroid}
     */
    E1Function(List<DoubleVector> matrix,
               DoubleVector[] centroids,
               double[] costs,
               int[] assignments,
               int[] clusterSizes,
               DoubleVector completeCentroid,
               double[] simToComplete) {
        super(matrix, centroids, costs, assignments, clusterSizes);
        this.completeCentroid = completeCentroid;
        this.simToComplete = simToComplete;
    }

    /**
     * {@inheritDoc}
     */
    protected void subSetup(Matrix m) {
        completeCentroid = new DenseDynamicMagnitudeVector(m.rows());
        for (DoubleVector v : matrix)
            VectorMath.add(completeCentroid, v);

        simToComplete = new double[centroids.length];
        for (int c = 0; c < centroids.length; ++c)
            simToComplete[c] = VectorMath.dotProduct(
                    centroids[c], completeCentroid);
    }

    /**
     * {@inheritDoc}
     */
    protected double getOldCentroidScore(DoubleVector vector,
                                         int oldCentroidIndex,
                                         int altClusterSize) {
        double newScore = simToComplete[oldCentroidIndex];
        newScore -= VectorMath.dotProduct(completeCentroid, vector);
        newScore /= subtractedMagnitude(centroids[oldCentroidIndex], vector);
        newScore *= altClusterSize;
        return newScore;
    }

    /**
     * {@inheritDoc}
     */
    protected double getNewCentroidScore(int newCentroidIndex,
                                         DoubleVector dataPoint) {
        double newScore = VectorMath.dotProduct(completeCentroid, dataPoint);
        newScore += simToComplete[newCentroidIndex];
        newScore /= modifiedMagnitude(centroids[newCentroidIndex], dataPoint);
        newScore *= (clusterSizes[newCentroidIndex] + 1);
        return newScore;
    }

    /**
     * {@inheritDoc}
     */
    public boolean isMaximize() {
        return false;
    }

    /**
     * {@inheritDoc}
     */
    protected void updateScores(int newCentroidIndex,
                                int oldCentroidIndex,
                                DoubleVector vector) {
        simToComplete[newCentroidIndex] += VectorMath.dotProduct(
                completeCentroid, vector);
        simToComplete[oldCentroidIndex] -= VectorMath.dotProduct(
                completeCentroid, vector);
    }
}
TOP

Related Classes of edu.ucla.sspace.clustering.criterion.E1Function

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.