Package edu.ucla.sspace.matrix

Source Code of edu.ucla.sspace.matrix.MinSimilarityAffinityMatrixCreator

package edu.ucla.sspace.matrix;

import edu.ucla.sspace.similarity.SimilarityFunction;

import edu.ucla.sspace.vector.DoubleVector;
import edu.ucla.sspace.vector.SparseDoubleVector;

import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOError;
import java.io.IOException;
import java.io.PrintWriter;

import java.util.logging.Logger;


/**
* This {@link AffinityMatrixCreator} adds an edge between two data points, i
* and j, if the similarity between them is above a certain threshold.  This
* relationship is symmetric.
*
* @author David Jurgens
* @author Keith Stevens
*/
public class MinSimilarityAffinityMatrixCreator
        implements AffinityMatrixCreator {

    private static final Logger LOG =
        Logger.getLogger(MinSimilarityAffinityMatrixCreator.class.getName());

    private SimilarityFunction edgeSim;

    private SimilarityFunction kernelSim;

    private double edgeSimThreshold;

    public void setParams(double... params) {
        this.edgeSimThreshold = params[0];
    }

    /**
     * {@inheritDoc}
     */
    public void setFunctions(SimilarityFunction edgeSim,
                             SimilarityFunction kernelSim) {
        this.edgeSim = edgeSim;
        this.kernelSim = kernelSim;
    }

    /**
     * {@inheritDoc}
     */
    public MatrixFile calculate(Matrix input) {
        try {
            File affMatrixFile = File.createTempFile("affinty-matrix",".dat");
            PrintWriter affMatrixWriter = new PrintWriter(affMatrixFile);
           
            int rows = input.rows();

            // Iterate through each row, i, in the data matrix and compare row i
            // to each proceeding row, j.  If the similarity is above the edge
            // similarity threshold, emit an edge between row i and row j and
            // between row j and row i, assuming that the edge similarity metric
            // is symmetric.  Each edge is written in the Matlab Sparse matrix
            // format.
            for (int i = 0; i < rows; ++i) {
                LOG.fine("computing affinity for row " + i);
                DoubleVector row1 = input.getRowVector(i);
                for (int j = i+1; j < rows; ++j) {
                    DoubleVector row2 = input.getRowVector(j);

                    double dataSimilarity = edgeSim.sim(row1, row2);

                    // If the edge similarity is above the threshold, compute
                    // the kernel similarity for each new edge.
                    if (dataSimilarity > edgeSimThreshold) {
                        double edgeWeight = kernelSim.sim(row1, row2);
                        affMatrixWriter.printf("%d %d %f\n",i+1,j+1,edgeWeight);

                        // If the kernel metric is symmetric, just reuse the
                        // previously calculated edge weight.  Otherwise
                        // recalculate it.
                        edgeWeight = (kernelSim.isSymmetric())
                            ? edgeWeight
                            : kernelSim.sim(row2, row1);
                        affMatrixWriter.printf("%d %d %f\n",j+1,i+1,edgeWeight);
                    }
                }
            }

            affMatrixWriter.close();   
            return new MatrixFile(affMatrixFile, MatrixIO.Format.MATLAB_SPARSE);
        } catch (IOException ioe) {
            throw new IOError(ioe);
        }
    }


    /**
     * {@inheritDoc}
     */
    public MatrixFile calculate(MatrixFile input) {
        return calculate(input, false);
    }

    /**
     * {@inheritDoc}
     */
    public MatrixFile calculate(MatrixFile input, boolean useColumns) {
        File matrixFile = input.getFile();
        MatrixIO.Format format = input.getFormat();

        // IMPLEMENTATION NOTE: since the user has requested the matrix be dealt
        // with as a file, we need to keep the matrix on disk.  However, the
        // input matrix format may not be conducive to efficiently comparing
        // rows with each other (e.g. MATLAB_SPARSE is inefficient), so convert
        // the matrix to a better format.
        try {
            LOG.fine("Converting input matrix to new format for faster " +
                        "calculation of the affinity matrix");
            // Keep the matrix on disk, but convert it to a transposed SVDLIBC
            // sparse binary, which allows for easier efficient row-by-row
            // comparisons (which are really columns).  Note that if the data is
            // already in this format, the conversion is a no-op.
            //
            // NOTE: the !useColumns is used for the transpose because if we
            // want to use the rows, we need the data transposed to begin with
            // since the SVDLIBC sparse binary will give us column information
            // to start with
            File converted =
                MatrixIO.convertFormat(matrixFile, format,
                                       MatrixIO.Format.SVDLIBC_SPARSE_BINARY,
                                       !useColumns);
            LOG.fine("Calculating the affinity matrix");

            // Read off the matrix dimensions
            DataInputStream dis = new DataInputStream(
                new BufferedInputStream(new FileInputStream(converted)));
            // CRITICAL NOTE: because we are interpreting the columns as rows,
            // the dimensions are read in *reverse order* from how they are
            // stored in the file.
            int cols = dis.readInt();
            int rows = dis.readInt();
            dis.close();
           
            // Once we know the matrix dimensions, create an iterator over the
            // data, and repeatedly loop through the columns (which are really
            // rows in the original matrix) to create the affinity matrix.
            File affMatrixFile = File.createTempFile("affinity-matrix",".dat");
            PrintWriter affMatrixWriter = new PrintWriter(affMatrixFile);

            // Keep track of the first row and have a reference to the next row.
            // The nextRow reference avoid us having to advance into data
            // unnecessarily to retrieval the vector for processing to start
            SparseDoubleVector curRow = null;
            SparseDoubleVector nextRow = null;

            SvdlibcSparseBinaryFileRowIterator matrixIter =
                new SvdlibcSparseBinaryFileRowIterator(converted);
           
            for (int row = 0; row < rows; ++row) {
                LOG.fine("computing affinity for row " + row);

                // Loop through each of the rows, gathering the statistics
                // necessary to compute the affinity matrix.
                for (int other = 0; other < rows; ++other) {

                    // Special case for the very first row
                    if (row == 0 && curRow == null) {
                        curRow = matrixIter.next();
                        continue;
                    }
                   
                    SparseDoubleVector otherRow = matrixIter.next();

                    // Special case for the similarity threshold, which is
                    // symmetric.  In this case, we can skip over processing any
                    // rows that occur before the current row
                    if (other < row)
                        continue;

                    // Save the row that will be used next so we have it to do
                    // comparisons with for earlier rows in the file
                    if (other == row + 1)
                        nextRow = otherRow;

                    // Determine if the current row and the other row should be
                    // linked in the affinity matrix.  For code simplicity, both
                    // the k-nearest neighbors and the similarity threshold code
                    // are supported within the I/O, with the caller specifying
                    // which to use.
                    double dataSimilarity = edgeSim.sim(curRow, otherRow);
                   
                    if (dataSimilarity > edgeSimThreshold) {
                        double edgeWeight = kernelSim.sim(curRow, otherRow);
                        affMatrixWriter.printf("%d %d %f\n",
                                               row+1, other+1, edgeWeight);

                        // If the kernel metric is symmetric, just reuse the
                        // previously calculated edge weight.  Otherwise
                        // recalculate it.
                        edgeWeight = (kernelSim.isSymmetric())
                            ? edgeWeight
                            : kernelSim.sim(otherRow, curRow);

                        affMatrixWriter.printf("%d %d %f\n",
                                               other+1, row+1, edgeWeight);
                    }
                }
                curRow = nextRow;
                matrixIter.reset();
            }

            // Finish writing the matrix
            affMatrixWriter.close();
            return new MatrixFile(affMatrixFile, MatrixIO.Format.MATLAB_SPARSE);
        } catch (IOException ioe) {
            throw new IOError(ioe);
        }
    }
}
TOP

Related Classes of edu.ucla.sspace.matrix.MinSimilarityAffinityMatrixCreator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.