Package edu.ucla.sspace.vector

Examples of edu.ucla.sspace.vector.DoubleVector


            // Choose the smaller of the two to use in computing the dot
            // product.  Because it would be more expensive to compute the
            // intersection of the two sets, we assume that any potential
            // misses would be less of a performance hit.
            if (useA) {
                DoubleVector t = a;
                a = b;
                b = t;
            }

            for (DoubleEntry e : ((Iterable<DoubleEntry>)b)) {
                int index = e.index();                   
                double aValue = a.get(index);
                double bValue = e.value();
                dotProduct += aValue * bValue;
            }
        }

        // Check whether both vectors are sparse.  If so, use only the non-zero
        // indices to speed up the computation by avoiding zero multiplications
        else if (a instanceof SparseVector && b instanceof SparseVector) {
            SparseVector svA = (SparseVector)a;
            SparseVector svB = (SparseVector)b;
            int[] nzA = svA.getNonZeroIndices();
            int[] nzB = svB.getNonZeroIndices();

            // Choose the smaller of the two to use in computing the dot
            // product.  Because it would be more expensive to compute the
            // intersection of the two sets, we assume that any potential
            // misses would be less of a performance hit.
            if (a.length() < b.length() ||
                nzA.length < nzB.length) {
                DoubleVector t = a;
                a = b;
                b = t;
            }

            for (int nz : nzB) {
                double aValue = a.get(nz);
                double bValue = b.get(nz);
                dotProduct += aValue * bValue;
            }
        }

        // Check if the second vector is sparse.  If so, use only the non-zero
        // indices of b to speed up the computation by avoiding zero
        // multiplications.
        else if (b instanceof SparseVector) {
            SparseVector svB = (SparseVector)b;
            for (int nz : svB.getNonZeroIndices())
                dotProduct += b.get(nz) * a.get(nz);
        }

        // Check if the first vector is sparse.  If so, use only the non-zero
        // indices of a to speed up the computation by avoiding zero
        // multiplications.
        else if (a instanceof SparseVector) {
            SparseVector svA = (SparseVector)a;
            for (int nz : svA.getNonZeroIndices())
                dotProduct += b.get(nz) * a.get(nz);
        }

        // Otherwise, just assume both are dense and compute the full amount
        else {
            // Swap the vectors such that the b is the shorter vector and a is
            // the longer vector, or of equal length.   In the case that the two
            // vectors of unequal length, this will prevent any calls to out of
            // bounds values in the smaller vector.
            if (a.length() < b.length()) {
                DoubleVector t = a;
                a = b;
                b = t;
            }

            for (int i = 0; i < b.length(); i++) {
View Full Code Here


            sb.append('|');
            Vector vector = sspace.getVector(word);           
            int length = vector.length();
            // Special case for the types just to make writing go a bit faster
            if (vector instanceof DoubleVector) {
                DoubleVector dv = (DoubleVector)vector;
                for (int i = 0; i < length - 1; ++i)
                    sb.append(dv.get(i)).append(" ");
                sb.append(dv.get(length - 1));
            }
            else if (vector instanceof IntegerVector) {
                IntegerVector iv = (IntegerVector)vector;
                for (int i = 0; i < length - 1; ++i)
                    sb.append(iv.get(i)).append(" ");
View Full Code Here

        protected DoubleVector computeSecondEigenVector(Matrix matrix,
                                                        int vectorLength) {
           // Compute pi, and D.  Pi is the normalized form of rho.  D a
           // diagonal matrix with sqrt(pi) as the values along the diagonal.
           // Also compute pi * D^-1.
            DoubleVector pi = new DenseVector(vectorLength);
            DoubleVector D = new DenseVector(vectorLength);
            DoubleVector piDInverse = new DenseVector(vectorLength);
            for (int i = 0; i < vectorLength; ++i) {
                double piValue = rho.get(i)/pSum;
                pi.set(i, piValue);
                if (piValue > 0d) {
                    D.set(i, Math.sqrt(piValue));
                    piDInverse.set(i, piValue / D.get(i));
                }
            }

            // Create the second largest eigenvector of the a scaled form of the
            // row normalized affinity matrix.  The computation is using the
            // power method such that the affinity matrix is never explicitly
            // computed.
            // piDInverse serves as a vector which is similar to the first eigen
            // vector.  The second eigen vector is assumed to be orthogonal to
            // piDInverse.  This algorithm makes O(log(matrix.rows())) passes
            // through the data matrix.
        
            // Step 1, generate a random vector, v,  that is orthogonal to
            // pi*D-Inverse.
            DoubleVector v = new DenseVector(vectorLength);
            for (int i = 0; i < v.length(); ++i)
                v.set(i, Math.random());

            // Make log(matrix.rows()) passes.
            int log = (int) Statistics.log2(vectorLength);
            for (int k = 0; k < log; ++k) {
                // start the orthonormalizing the eigen vector.
                v = orthonormalize(v, piDInverse);

                // Step 2, repeated, (a) normalize v (b) set v = Q*v, where Q =
                // D * R-Inverse * matrix * matrix-Transpose * D-Inverse.

                // v = Q*v is broken into 4 sub steps that allow for sparse
                // multiplications.
                // Step 2b-1) v = D-Inverse*v.
                for (int i = 0; i < vectorLength; ++ i)
                    if (D.get(i) != 0d)
                        v.set(i, v.get(i) / D.get(i));

                // Step 2b-2) v = matrix-Transpose * v.
                DoubleVector newV = computeMatrixTransposeV(matrix, v);

                // Step 2b-3) v = matrix * v.
                computeMatrixDotV(matrix, newV, v);

                // Step 2b-4) v = D*R-Inverse * v. Note that R is a diagonal
View Full Code Here

    /**
     * {@inheritDoc}
     */
    public DoubleVector getColumnVector(int column) {
        DoubleVector col = new DenseVector(rows());
        for (int r = 0; r < rows(); ++r)
            col.set(r, getRowVector(r).get(column));
        return col;
    }
View Full Code Here

        dataMatrix = matrix;
        int numRows = matrix.rows();

        // Compute the centroid of the entire data set.
        int vectorLength = matrix.rows();
        DoubleVector matrixRowSums = computeRhoSum(matrix);
        LOGGER.info("Computing the second eigen vector");
        // Compute the second largest eigenvector of the normalized affinity
        // matrix with respect to pi*D^-1, which is similar to it's first eigen
        // vector.
        DoubleVector v = computeSecondEigenVector(matrix, vectorLength);

        // Sort the rows of the original matrix based on the values of the eigen
        // vector.
        Index[] elementIndices = new Index[v.length()];
        for (int i = 0; i < v.length(); ++i)
            elementIndices[i] = new Index(v.get(i), i);
        Arrays.sort(elementIndices);

        // Create a reordering mapping for the indices in the original data
        // matrix and of rho.  The ith data point and rho value will be ordered
        // based on the position of the ith value in the second eigen vector
        // after it has been sorted.
        DoubleVector sortedRho = new DenseVector(matrix.rows());
        int[] reordering = new int[v.length()];
        for (int i = 0; i < v.length(); ++i) {
            reordering[i] = elementIndices[i].index;
            sortedRho.set(i, rho.get(elementIndices[i].index));
        }

        // Create the sorted matrix based on the reordering.  Note that both row
        // masked matrices internally handle masking a masked matrix to avoid
        // recursive calls to the index lookups.
View Full Code Here

            verbose(LOGGER, "Computing principle vectors (round %d/%d)", iter+1, numIters);
            // Compute the centroids
            for (Map.Entry<Integer,Set<Integer>> e
                     : clusterAssignment.asMap().entrySet()) {
                int cluster = e.getKey();
                DoubleVector principle = new DenseVector(sspace.getVectorLength());
                principles[cluster] = principle;
                for (Integer row : e.getValue())
                    VectorMath.add(principle, termVectors.get(row));
            }

            // Reassign each element to the centroid to which it is closest
            clusterAssignment.clear();
            final int numThreads = workQueue.availableThreads();
            Object key = workQueue.registerTaskGroup(numThreads);

            for (int threadId_ = 0; threadId_ < numThreads; ++threadId_) {
                final int threadId = threadId_;
                workQueue.add(key, new Runnable() {
                        public void run() {
                            // Thread local cache of all the cluster assignments
                            MultiMap<Integer,Integer> clusterAssignment_ =
                                new HashMultiMap<Integer,Integer>();
                            // For each of the vectors that this thread is
                            // responsible for, find the principle vector to
                            // which it is closest
                            for (int i = threadId; i < numTerms; i += numThreads) {
                                DoubleVector v = termVectors.get(i);
                                double highestSim = -Double.MAX_VALUE;
                                int pVec = -1;
                                for (int j = 0; j < principles.length; ++j) {
                                    DoubleVector principle = principles[j];
                                    double sim = Similarity.cosineSimilarity(
                                        v, principle);
                                    assert sim >= -1 && sim <= 1 : "similarity "
                                        + " to principle vector " + j + " is "
                                        + "outside the expected range: " + sim;
View Full Code Here

    public SortedMultiMap<Double,String> getMostSimilar(
             Set<String> terms, int numberOfSimilarWords) {
        if (terms.isEmpty())
            return null;
        // Compute the mean vector for all the terms
        DoubleVector mean = new DenseVector(sspace.getVectorLength());
        int found = 0;
        for (String term : terms) {
            Vector v = sspace.getVector(term);
            if (v == null)
                info(LOGGER, "No vector for term " + term);
View Full Code Here

        final SortedMultiMap<Double,Map.Entry<DoubleVector,Set<String>>>
            mostSimilarPrincipleVectors = new BoundedSortedMultiMap
                <Double,Map.Entry<DoubleVector,Set<String>>>(k, false);       
        for (Map.Entry<DoubleVector,Set<String>> e :
                 principleVectorToNearestTerms.asMap().entrySet()) {
            DoubleVector pVec = e.getKey();
            double sim = Similarity.cosineSimilarity(v, pVec);
            mostSimilarPrincipleVectors.put(sim, e);
        }
       
        // Create a global map to store the results of the principle vectors'
View Full Code Here

    /**
     * {@inheritDoc}
     */
    public double getKMeansObjective() {
        // Note that the scaled vector handles any recursive scaling.
        DoubleVector centroid = new ScaledDoubleVector(
                matrixRowSums, 1/((double) dataMatrix.rows()));
        double score = 0;
        for (int r = 0; r < dataMatrix.rows(); ++r)
            score += VectorMath.dotProduct(
                    centroid, dataMatrix.getRowVector(r));
View Full Code Here

        DoubleVector[] centroids = new DoubleVector[numComparisons.length];
        int[] centroidSizes = new int[numComparisons.length];
        double intraClusterScore = 0;
        for (int i = 0; i < assignments.length; ++i) {
            int assignment = assignments[i];
            DoubleVector v = m.getRowVector(i);
            if (centroids[assignment] == null)
                centroids[assignment] = Vectors.copyOf(v);
            else {
                DoubleVector centroid = centroids[assignment];
                intraClusterScore += (centroidSizes[assignment] -
                                      VectorMath.dotProduct(v, centroid));
                VectorMath.add(centroid, v);
                numComparisons[assignment] += centroidSizes[assignment];
            }
View Full Code Here

TOP

Related Classes of edu.ucla.sspace.vector.DoubleVector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.