Examples of edu.ucla.sspace.vector.DoubleVector

edu.ucla.sspace.vector.DoubleVector
An generalized interface for vectors. This interface allows implementations to implement the vector with any kind of underlying data type, but the input and output data types must be doubles.
Methods which modify the state of a {@code Vector} are optional.Implementations that are not modifiable should throw an {@code UnsupportedOperationException} if such methods are called. These methods aremarked as "optional" in the specification for the interface. @author Keith Stevens

    
    private static IntPair pickFirstTwo(Matrix dataPoints, 
                                        SimilarityFunction simFunc, 
                                        int[] weights, double[] inverseSimilarities) {
  double OPT_1 = 0; // optimal 1-means cost.
  DoubleVector centerOfMass = new DenseVector(dataPoints.columns());
  double sum = 0d;
  int rows = dataPoints.rows();
        int cols = dataPoints.columns();
        double[] probs = new double[rows];
  int totalWeight = 0; 


  for (int i = 0; i < rows; i++) {
            DoubleVector v = dataPoints.getRowVector(i);
            int weight = weights[i];
            // Update the center of mass for the entire solution based
            VectorMath.add(centerOfMass, new ScaledDoubleVector(v, weight));
            totalWeight += weight;
        }
         
        // Then rescale the center of mass based on the total weight
        for (int j = 0; j < cols; j++) 
            centerOfMass.set(j, centerOfMass.get(j) / totalWeight);
        
        
        for (int i = 0; i < rows; i++) {
            double sim = simFunc.sim(centerOfMass, dataPoints.getRowVector(i));
            sim = invertSim(sim);
            inverseSimilarities[i] = sim;
            OPT_1 += sim * weights[i];
        }


        // Compute the probability mass of picking the first mean 
        for (int i = 0; i < rows; i++) {
            probs[i] = (OPT_1 + totalWeight * inverseSimilarities[i])
                / (2 * totalWeight * OPT_1);
            sum += probs[i];
        }
  
        // Normalize the relative mass assigned to each point to create a true
        // probability distribution that sums to 1.
        for (int i = 0; i < rows; i++) 
            probs[i] = probs[i] / sum;


  // Select the first center with probability proportional to its
  // dissimilarity from the center of mass
  int c1 = selectWithProb(probs);
        DoubleVector y = dataPoints.getRowVector(c1);


        // Keep the inverse similarity from the first center to the center of
        // mass
  double invSimFromCtrToC1 = invertSim(simFunc.sim(y, centerOfMass));
        
  // Recalculate inverseSimilarities and probs for selecting the second point.  Also
  // reset the probability of picking the first center again to 0
  sum = 0.0; 
  probs[c1] = 0;
        for (int i = 0; i < rows; i++) {
            // Skip assigning any probability mass to the first center's index
            // since it has already been selected
            if (i == c1) 
                continue;
            
            double sim = invertSim(simFunc.sim(dataPoints.getRowVector(i), y))
                * weights[i];
            inverseSimilarities[i] = sim;
            probs[i] = sim / ( OPT_1 + totalWeight * invSimFromCtrToC1);
            sum += probs[i];
        }


        // Normalize the probability masses to be probabilities
        for (int i = 0; i < rows; i++) 
    probs[i] = probs[i] / sum;
  
        // Select a second center
  int c2 = selectWithProb(probs);
        DoubleVector z = dataPoints.getRowVector(c2);


  inverseSimilarities[c1] = 0;
  inverseSimilarities[c2] = 0;
  
        // For each of the non-center points, assign it's initial inverse
        // similarity (i.e., distance) to be the minimum to either of the two
        // centers
        for (int i = 0; i < rows; i++) {
            DoubleVector v = dataPoints.getRowVector(i);
            double sim1 = simFunc.sim(v, y); // center 1
            sim1 = invertSim(sim1);
            double sim2 = simFunc.sim(v, z); // center 2
            sim2 = invertSim(sim2);
            inverseSimilarities[i] = Math.min(sim1, sim2);

View Full Code Here

     */
    private static void updateNearestCenter(double inverseSimilarities[], Matrix dataPoints,
                                            int newlyChosen, 
                                            SimilarityFunction simFunc) {
        
        DoubleVector chosenVec = dataPoints.getRowVector(newlyChosen);
  for (int i = 0; i < inverseSimilarities.length; i++) {
            double sim = simFunc.sim(dataPoints.getRowVector(i), chosenVec);
            sim = invertSim(sim);
            inverseSimilarities[i] = Math.min(inverseSimilarities[i], sim);
        }

View Full Code Here


        double bestDelta = (isMaximize()) ? 0 : Double.MAX_VALUE;
        int bestDeltaIndex = -1;


        // Get the current vector.
        DoubleVector vector = matrix.get(currentVectorIndex);


        // Get the current centroid without the current data point assigned to
        // it.  Compute the cost delta with that point removed from the cluster.
        //DoubleVector altCurrentCentroid = subtract(
        //        centroids[currentClusterIndex], vector);

View Full Code Here

     * DenseDynamicMagnitudeVector} can be used to represent the difference.
     * This vector type is optimized for when many calls to magnitude are
     * interleaved with updates to a few dimensions in the vector.
     */
    protected static DoubleVector subtract(DoubleVector c, DoubleVector v) {
        DoubleVector newCentroid = new DenseDynamicMagnitudeVector(c.length());


        // Special case sparse double vectors so that we don't incure a possibly
        // log n get operation for each zero value, as that's the common case
        // for CompactSparseVector.
        if (v instanceof SparseDoubleVector) {
            SparseDoubleVector sv = (SparseDoubleVector) v;
            int[] nonZeros = sv.getNonZeroIndices();
            int sparseIndex = 0;
            for (int i = 0; i < c.length(); ++i) {
                double value = c.get(i);
                if (sparseIndex < nonZeros.length &&
                    i == nonZeros[sparseIndex])
                    value -= sv.get(nonZeros[sparseIndex++]);


                newCentroid.set(i, value);
            }
        } else
            for (int i = 0; i < c.length(); ++i)
                newCentroid.set(i, c.get(i) - v.get(i));
        return newCentroid;
    }

View Full Code Here

        DoubleVector[] centroids = new DoubleVector[numCentroids];


        // Compute the centroid assuming that there is only one cluster to be
        // found.  This is required for computing the optimal cost of a single
        // cluster solution.
        DoubleVector singleCentroid = new DenseVector(dataPoints.columns());
        for (int i = 0; i < numDataPoints; ++i)
            VectorMath.add(singleCentroid, dataPoints.getRowVector(i));
        singleCentroid = new ScaledDoubleVector(
                singleCentroid, 1/((double)numDataPoints));

View Full Code Here

        /**
         * {@inheritDoc}
         */
        protected DoubleVector computeSecondEigenVector(Matrix matrix,
                                                        int vectorLength) {
            DoubleVector Rinv = new DenseVector(vectorLength);
            DoubleVector baseVector = new DenseVector(vectorLength);
            for (int i = 0; i < vectorLength; ++i) {
                Rinv.set(i, 1/Math.sqrt(rho.get(i)));
                baseVector.set(i, rho.get(i) * Rinv.get(i));
            }
     
            // Step 1, generate a random vector, v,  that is orthogonal to
            // pi*D-Inverse.
            DoubleVector v = new DenseVector(vectorLength);
            for (int i = 0; i < v.length(); ++i)
                v.set(i, Math.random());


            Matrix RinvData = (matrix instanceof SparseMatrix)
                ? new RowScaledSparseMatrix((SparseMatrix) matrix, Rinv)
                : new RowScaledMatrix(matrix, Rinv);


            // Make log(matrix.rows()) passes.
            int log = (int) Statistics.log2(vectorLength);
            for (int k = 0; k < log; ++k) {
                // start the orthonormalizing the eigen vector.
                v = orthonormalize(v, baseVector);
                DoubleVector newV = computeMatrixTransposeV(RinvData, v);
                computeMatrixDotV(RinvData, newV, v);
            }


            return v;
        }

View Full Code Here


    private BitSet calculateTermFeatures(String term, int corpusSize) {
        int termIndex = termToIndex.get(term);
        LOGGER.fine(String.format("Calculating feature set for %6d/%d: %s",
                                  termIndex, cooccurrenceMatrix.rows(), term));
        DoubleVector cooccurrences = cooccurrenceMatrix.getRowVector(termIndex);
        int termCount = termCounts.get(termIndex).get();
        BitSet validFeatures = new BitSet(wordIndexCounter);
        
        // For each of the co-occurring terms, calculate the log-likelikehood
        // value for that term's occurrences.  Only terms whose value is above
        // 3.841 will be counted as features
        for (int co = 0; co < cooccurrences.length(); ++co) {
            // Form the contingency table:
            //  a   b
            //  c   d
            double count = cooccurrences.get(co);
            // Don't include words that never co-occur as features
            if (count == 0)
                continue;
            
            // a = the number of times they both co-occur

View Full Code Here

            if (clusterAssignment.get(row).assignments().length == 0)
                continue;                
            int assignment = clusterAssignment.get(row).assignments()[0];                
            clusterSize[assignment]++;


            DoubleVector contextVector = contexts.getRowVector(row);
            VectorMath.add(meanSenseVectors[assignment], contextVector);
        }
        
        // For each of the clusters with more than 2% of the contexts, generage
        // an average sense vectors.  For those clusters with less than that

View Full Code Here

        // Setup the best cost and index for the best cost.
        double bestTotal = totalCost;
        int bestDeltaIndex = -1;


        // Get the current vector.
        DoubleVector vector = matrix.get(currentVectorIndex);


        // Get the base cost for removing the data point from the current
        // cluster.
        double baseE1Delta = 0;
        double baseI1Delta = 0;

View Full Code Here

            for (int i = 0; i < rows; ++i) {
                LOG.fine("computing affinity for row " + i);
                MultiMap<Double,Integer> neighborMap = rc.getMostSimilar(
                        input, i, numNearestNeighbors, edgeSim);


                DoubleVector row = input.getRowVector(i);
                for (int n : neighborMap.values()) {
                    double edgeWeight = kernelSim.sim(
                            row, input.getRowVector(n));
                    affMatrixWriter.printf("%d %d %f\n", i+1 ,n+1, edgeWeight);
                }

View Full Code Here

0 1 2 3 4 5

TOP

Related Classes of edu.ucla.sspace.vector.DoubleVector

edu.ucla.sspace.beagle.Beagle

edu.ucla.sspace.clustering.Assignments

edu.ucla.sspace.clustering.BaseSpectralCut

edu.ucla.sspace.clustering.CKVWSpectralClustering03$SpectralCut

edu.ucla.sspace.clustering.CKVWSpectralClustering06$SuperSpectralCut

edu.ucla.sspace.clustering.criterion.BaseFunction

edu.ucla.sspace.clustering.criterion.HybridBaseFunction

edu.ucla.sspace.clustering.DirectClustering

edu.ucla.sspace.clustering.FastStreamingKMeans

edu.ucla.sspace.clustering.LinkClustering$LazySimilarityMatrix

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.