Package edu.ucla.sspace.vector

Examples of edu.ucla.sspace.vector.DoubleVector


   
    private static IntPair pickFirstTwo(Matrix dataPoints,
                                        SimilarityFunction simFunc,
                                        int[] weights, double[] inverseSimilarities) {
  double OPT_1 = 0; // optimal 1-means cost.
  DoubleVector centerOfMass = new DenseVector(dataPoints.columns());
  double sum = 0d;
  int rows = dataPoints.rows();
        int cols = dataPoints.columns();
        double[] probs = new double[rows];
  int totalWeight = 0;

  for (int i = 0; i < rows; i++) {
            DoubleVector v = dataPoints.getRowVector(i);
            int weight = weights[i];
            // Update the center of mass for the entire solution based
            VectorMath.add(centerOfMass, new ScaledDoubleVector(v, weight));
            totalWeight += weight;
        }
        
        // Then rescale the center of mass based on the total weight
        for (int j = 0; j < cols; j++)
            centerOfMass.set(j, centerOfMass.get(j) / totalWeight);
       
       
        for (int i = 0; i < rows; i++) {
            double sim = simFunc.sim(centerOfMass, dataPoints.getRowVector(i));
            sim = invertSim(sim);
            inverseSimilarities[i] = sim;
            OPT_1 += sim * weights[i];
        }

        // Compute the probability mass of picking the first mean
        for (int i = 0; i < rows; i++) {
            probs[i] = (OPT_1 + totalWeight * inverseSimilarities[i])
                / (2 * totalWeight * OPT_1);
            sum += probs[i];
        }
 
        // Normalize the relative mass assigned to each point to create a true
        // probability distribution that sums to 1.
        for (int i = 0; i < rows; i++)
            probs[i] = probs[i] / sum;

  // Select the first center with probability proportional to its
  // dissimilarity from the center of mass
  int c1 = selectWithProb(probs);
        DoubleVector y = dataPoints.getRowVector(c1);

        // Keep the inverse similarity from the first center to the center of
        // mass
  double invSimFromCtrToC1 = invertSim(simFunc.sim(y, centerOfMass));
       
  // Recalculate inverseSimilarities and probs for selecting the second point.  Also
  // reset the probability of picking the first center again to 0
  sum = 0.0;
  probs[c1] = 0;
        for (int i = 0; i < rows; i++) {
            // Skip assigning any probability mass to the first center's index
            // since it has already been selected
            if (i == c1)
                continue;
           
            double sim = invertSim(simFunc.sim(dataPoints.getRowVector(i), y))
                * weights[i];
            inverseSimilarities[i] = sim;
            probs[i] = sim / ( OPT_1 + totalWeight * invSimFromCtrToC1);
            sum += probs[i];
        }

        // Normalize the probability masses to be probabilities
        for (int i = 0; i < rows; i++)
    probs[i] = probs[i] / sum;
 
        // Select a second center
  int c2 = selectWithProb(probs);
        DoubleVector z = dataPoints.getRowVector(c2);

  inverseSimilarities[c1] = 0;
  inverseSimilarities[c2] = 0;
 
        // For each of the non-center points, assign it's initial inverse
        // similarity (i.e., distance) to be the minimum to either of the two
        // centers
        for (int i = 0; i < rows; i++) {
            DoubleVector v = dataPoints.getRowVector(i);
            double sim1 = simFunc.sim(v, y); // center 1
            sim1 = invertSim(sim1);
            double sim2 = simFunc.sim(v, z); // center 2
            sim2 = invertSim(sim2);
            inverseSimilarities[i] = Math.min(sim1, sim2);
View Full Code Here


     */
    private static void updateNearestCenter(double inverseSimilarities[], Matrix dataPoints,
                                            int newlyChosen,
                                            SimilarityFunction simFunc) {
       
        DoubleVector chosenVec = dataPoints.getRowVector(newlyChosen);
  for (int i = 0; i < inverseSimilarities.length; i++) {
            double sim = simFunc.sim(dataPoints.getRowVector(i), chosenVec);
            sim = invertSim(sim);
            inverseSimilarities[i] = Math.min(inverseSimilarities[i], sim);
        }
View Full Code Here

        double bestDelta = (isMaximize()) ? 0 : Double.MAX_VALUE;
        int bestDeltaIndex = -1;

        // Get the current vector.
        DoubleVector vector = matrix.get(currentVectorIndex);

        // Get the current centroid without the current data point assigned to
        // it.  Compute the cost delta with that point removed from the cluster.
        //DoubleVector altCurrentCentroid = subtract(
        //        centroids[currentClusterIndex], vector);
View Full Code Here

     * DenseDynamicMagnitudeVector} can be used to represent the difference.
     * This vector type is optimized for when many calls to magnitude are
     * interleaved with updates to a few dimensions in the vector.
     */
    protected static DoubleVector subtract(DoubleVector c, DoubleVector v) {
        DoubleVector newCentroid = new DenseDynamicMagnitudeVector(c.length());

        // Special case sparse double vectors so that we don't incure a possibly
        // log n get operation for each zero value, as that's the common case
        // for CompactSparseVector.
        if (v instanceof SparseDoubleVector) {
            SparseDoubleVector sv = (SparseDoubleVector) v;
            int[] nonZeros = sv.getNonZeroIndices();
            int sparseIndex = 0;
            for (int i = 0; i < c.length(); ++i) {
                double value = c.get(i);
                if (sparseIndex < nonZeros.length &&
                    i == nonZeros[sparseIndex])
                    value -= sv.get(nonZeros[sparseIndex++]);

                newCentroid.set(i, value);
            }
        } else
            for (int i = 0; i < c.length(); ++i)
                newCentroid.set(i, c.get(i) - v.get(i));
        return newCentroid;
    }
View Full Code Here

        DoubleVector[] centroids = new DoubleVector[numCentroids];

        // Compute the centroid assuming that there is only one cluster to be
        // found.  This is required for computing the optimal cost of a single
        // cluster solution.
        DoubleVector singleCentroid = new DenseVector(dataPoints.columns());
        for (int i = 0; i < numDataPoints; ++i)
            VectorMath.add(singleCentroid, dataPoints.getRowVector(i));
        singleCentroid = new ScaledDoubleVector(
                singleCentroid, 1/((double)numDataPoints));
View Full Code Here

        /**
         * {@inheritDoc}
         */
        protected DoubleVector computeSecondEigenVector(Matrix matrix,
                                                        int vectorLength) {
            DoubleVector Rinv = new DenseVector(vectorLength);
            DoubleVector baseVector = new DenseVector(vectorLength);
            for (int i = 0; i < vectorLength; ++i) {
                Rinv.set(i, 1/Math.sqrt(rho.get(i)));
                baseVector.set(i, rho.get(i) * Rinv.get(i));
            }
    
            // Step 1, generate a random vector, v,  that is orthogonal to
            // pi*D-Inverse.
            DoubleVector v = new DenseVector(vectorLength);
            for (int i = 0; i < v.length(); ++i)
                v.set(i, Math.random());

            Matrix RinvData = (matrix instanceof SparseMatrix)
                ? new RowScaledSparseMatrix((SparseMatrix) matrix, Rinv)
                : new RowScaledMatrix(matrix, Rinv);

            // Make log(matrix.rows()) passes.
            int log = (int) Statistics.log2(vectorLength);
            for (int k = 0; k < log; ++k) {
                // start the orthonormalizing the eigen vector.
                v = orthonormalize(v, baseVector);
                DoubleVector newV = computeMatrixTransposeV(RinvData, v);
                computeMatrixDotV(RinvData, newV, v);
            }

            return v;
        }
View Full Code Here

    private BitSet calculateTermFeatures(String term, int corpusSize) {
        int termIndex = termToIndex.get(term);
        LOGGER.fine(String.format("Calculating feature set for %6d/%d: %s",
                                  termIndex, cooccurrenceMatrix.rows(), term));
        DoubleVector cooccurrences = cooccurrenceMatrix.getRowVector(termIndex);
        int termCount = termCounts.get(termIndex).get();
        BitSet validFeatures = new BitSet(wordIndexCounter);
       
        // For each of the co-occurring terms, calculate the log-likelikehood
        // value for that term's occurrences.  Only terms whose value is above
        // 3.841 will be counted as features
        for (int co = 0; co < cooccurrences.length(); ++co) {
            // Form the contingency table:
            //  a   b
            //  c   d
            double count = cooccurrences.get(co);
            // Don't include words that never co-occur as features
            if (count == 0)
                continue;
           
            // a = the number of times they both co-occur
View Full Code Here

            if (clusterAssignment.get(row).assignments().length == 0)
                continue;               
            int assignment = clusterAssignment.get(row).assignments()[0];               
            clusterSize[assignment]++;

            DoubleVector contextVector = contexts.getRowVector(row);
            VectorMath.add(meanSenseVectors[assignment], contextVector);
        }
       
        // For each of the clusters with more than 2% of the contexts, generage
        // an average sense vectors.  For those clusters with less than that
View Full Code Here

        // Setup the best cost and index for the best cost.
        double bestTotal = totalCost;
        int bestDeltaIndex = -1;

        // Get the current vector.
        DoubleVector vector = matrix.get(currentVectorIndex);

        // Get the base cost for removing the data point from the current
        // cluster.
        double baseE1Delta = 0;
        double baseI1Delta = 0;
View Full Code Here

            for (int i = 0; i < rows; ++i) {
                LOG.fine("computing affinity for row " + i);
                MultiMap<Double,Integer> neighborMap = rc.getMostSimilar(
                        input, i, numNearestNeighbors, edgeSim);

                DoubleVector row = input.getRowVector(i);
                for (int n : neighborMap.values()) {
                    double edgeWeight = kernelSim.sim(
                            row, input.getRowVector(n));
                    affMatrixWriter.printf("%d %d %f\n", i+1 ,n+1, edgeWeight);
                }
View Full Code Here

TOP

Related Classes of edu.ucla.sspace.vector.DoubleVector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.