Package edu.ucla.sspace.matrix

Source Code of edu.ucla.sspace.matrix.RowComparator$Comparison

/*
* Copyright 2009 David Jurgens
*
* This file is part of the S-Space package and is covered under the terms and
* conditions therein.
*
* The S-Space package is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation and distributed hereunder to you.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
* EXPRESS OR IMPLIED ARE MADE.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
* NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
* PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
* WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
* RIGHTS.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package edu.ucla.sspace.matrix;

import edu.ucla.sspace.common.Similarity;

import edu.ucla.sspace.util.BoundedSortedMultiMap;
import edu.ucla.sspace.util.MultiMap;
import edu.ucla.sspace.util.SortedMultiMap;
import edu.ucla.sspace.util.WorkQueue;

import edu.ucla.sspace.similarity.SimilarityFunction;

import edu.ucla.sspace.vector.Vector;


/**
* A utility class for finding the {@code k} most-similar words to a provided
* row in a {@link Matrix}.  The comparisons required for generating the list
* maybe be run in parallel by configuring an instance of this class to use
* multiple threads. <p>
*
* All instances of this class are thread-safe.
*
* @author David Jurgens
*/
public class RowComparator {

    /**
     * The work used by all {@code LinkClustering} instances to perform
     * multi-threaded operations.
     */
    private final WorkQueue workQueue;

    /**
     * Creates this {@code WordComparator} with as many threads as processors.
     */
    public RowComparator() {
        this(Runtime.getRuntime().availableProcessors());
    }
   
    /**
     * Creates this {@code WordComparator} with the specified number of threads.
     */
    public RowComparator(int numProcs) {
        this.workQueue = WorkQueue.getWorkQueue(numProcs);
    }

    /**
     * Compares the specified row to all other rows, returning the k-nearest
     * rows according to the similarity metric.
     *
     * @param m The {@link Matrix} containing data points to be compared
     * @param row The current row in {@code m} to be compared against all other
     *        rows
     * @param kNearestRows The number of most similar rows to retain
     * @param similarityType The similarity method to use when comparing rows
     *
     * @return a mapping from the similarity to the {@code kNearestRows} most
     *         similar rows
     */
    public SortedMultiMap<Double,Integer> getMostSimilar(
            Matrix m, int row,
            int kNearestRows, Similarity.SimType similarityType) {
        return getMostSimilar(m, row, kNearestRows,
                              Similarity.getSimilarityFunction(similarityType));
    }

    /**
     * Compares the specified row to all other rows, returning the k-nearest
     * rows according to the similarity metric.
     *
     * @param m The {@link Matrix} containing data points to be compared
     * @param row The current row in {@code m} to be compared against all other
     *        rows
     * @param kNearestRows The number of most similar rows to retain
     * @param simFunction The {@link SimilarityFunction} to use when comparing
     *        rows
     * @return a mapping from the similarity to the k most similar rows
     */
    public SortedMultiMap<Double,Integer> getMostSimilar(
            Matrix m, int row,
            int kNearestRows, SimilarityFunction simFunction) {
       
       
        Object key = workQueue.registerTaskGroup(m.rows() - 1);

        // the most-similar set will automatically retain only a fixed number of
        // elements
        final SortedMultiMap<Double,Integer> mostSimilar =
            new BoundedSortedMultiMap<Double,Integer>(kNearestRows, false);

        // loop through all the other words computing their similarity
        int rows = m.rows();
        Vector v = m.getRowVector(row);
        for (int i = 0; i < rows; ++i) {
            // skip same row
            if (i == row)
                continue;

            workQueue.add(key,
                          new Comparison(m, v, i, simFunction, mostSimilar));
        }
       
        // Wait for all the partition densities to be calculated
        workQueue.await(key);
       
        return mostSimilar;
    }

    /**
     * A comparison task that compares the row vector and updates the mapping
     * from similarity to row.
     */
    private static class Comparison implements Runnable {
       
        private final Matrix m;
        private final Vector row;
        private final int otherRow;
        private final SimilarityFunction simFunction;
        private final MultiMap<Double,Integer> mostSimilar;

        public Comparison(Matrix m,
                          Vector row,
                          int otherRow,
                          SimilarityFunction simFunction,
                          MultiMap<Double,Integer> mostSimilar) {
            this.m = m;
            this.row = row;
            this.otherRow = otherRow;
            this.simFunction = simFunction;
            this.mostSimilar = mostSimilar;
        }

        public void run() {
            Double similarity = simFunction.sim(
                    row, m.getRowVector(otherRow));
           
            // lock on the Map, as it is not thread-safe
            synchronized(mostSimilar) {
                mostSimilar.put(similarity, otherRow);
            }
        }
    }
}
TOP

Related Classes of edu.ucla.sspace.matrix.RowComparator$Comparison

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.