Package edu.ucla.sspace.matrix

Examples of edu.ucla.sspace.matrix.SparseMatrix


        for (Integer index : indicesToKeep)
            indexMap.put(index, newIndex++);

        // Create a reduced matrix that will have only the selected columns in
        // the final space.
        SparseMatrix reduced = new YaleSparseMatrix(
                words, indicesToKeep.size());

        // Iterate over the sparse values in the matrix for added efficiency.
        for (int row = 0; row < words; ++ row) {
            SparseDoubleVector sv = cooccurrenceMatrix.getRowVector(row);
            for (int col : sv.getNonZeroIndices()) {
                double v = cooccurrenceMatrix.get(row, col);

                // If the original column was retained, get it's new index
                // value and add it to the reduced matrix.
                Integer newColIndex = indexMap.get(col);
                if (newColIndex != null)
                    reduced.set(row, newColIndex, v);

                // If the transposed row column was retained, get it's new index
                // value and add it to the reduced matrix.  This turns the col
                // value into the row and the new index as the column.
                newColIndex = indexMap.get(row + words);
                if (newColIndex != null)
                    reduced.set(col, newColIndex, v);
            }
        }

        return reduced;
    }
View Full Code Here


            new BufferedInputStream(new FileInputStream(compressedDocuments)));

        int documents = documentCounter.get();
        // Use the number of times the term occurred in the corpus to determine
        // how many rows (contexts) in the matrix.
        SparseMatrix contextsForCurTerm = new YaleSparseMatrix(
            termCounts.get(termIndex).get(), termToIndex.size());
        int contextsSeen = 0;
        for (int d = 0; d < documents; ++d) {
            final int docId = d;

            int tokensInDoc = corpusReader.readInt();
            int unfilteredTokens = corpusReader.readInt();
            // Read in the document
            int[] doc = new int[tokensInDoc];
            for (int i = 0; i < tokensInDoc; ++i)
                doc[i] = corpusReader.readInt();

            int contextsInDoc =
                processIntDocument(termIndex, doc, contextsForCurTerm,
                                   contextsSeen, termFeatures);
            contextsSeen += contextsInDoc;
        }
        corpusReader.close();

        // If the term is to be processed using fewer than all of its contexts,
        // then randomly select the maximum allowable contexts from the matrix
        if (maxContextsPerWord < Integer.MAX_VALUE &&
                contextsForCurTerm.rows() > maxContextsPerWord) {
            BitSet randomContexts = Statistics.randomDistribution(
                maxContextsPerWord, contextsForCurTerm.rows());
            contextsForCurTerm =
                new SparseRowMaskedMatrix(contextsForCurTerm, randomContexts);
        }
       
        return contextsForCurTerm;
View Full Code Here

        SparseDoubleVector[] vectorList =
            new SparseDoubleVector[wordToSemantics.size()];
        for (Map.Entry<String, SparseDoubleVector> e :
                wordToSemantics.entrySet())
            vectorList[getIndexFor(e.getKey())] = e.getValue();
        SparseMatrix matrix = Matrices.asSparseMatrix(
                Arrays.asList(vectorList));

        // If maxwords was set to 0, save all words.
        if (maxWords == 0 || maxWords > wordToSemantics.size())
            maxWords = wordToSemantics.size();
View Full Code Here

            nonZeroFeatures = new HashSet<Integer>();
            int[] numNonZeros = new int[m.rows()];
            double averageNumNonZeros = 0;

            if (m instanceof SparseMatrix) {
                SparseMatrix sm = (SparseMatrix) m;
                for (int r = 0; r < m.rows(); ++r) {
                    SparseDoubleVector v = sm.getRowVector(r);
                    int[] nonZeros = v.getNonZeroIndices();
                    numNonZeros[r] += nonZeros.length;
                    averageNumNonZeros += nonZeros.length;
                    for (int column : nonZeros) {
                        nonZeroFeatures.add(column);
View Full Code Here

        // itself.  This will make further calculations easier since the dot
        // product distrubutes when the cosine similarity does not.
        if (matrix instanceof SparseMatrix) {
            List<SparseDoubleVector> scaledVectors =
                new ArrayList<SparseDoubleVector>(matrix.rows());
            SparseMatrix sm = (SparseMatrix) matrix;
            for (int r = 0; r < matrix.rows(); ++r) {
                SparseDoubleVector v = sm.getRowVector(r);
                scaledVectors.add(new ScaledSparseDoubleVector(
                            v, 1/v.magnitude()));
            }
            return Matrices.asSparseMatrix(scaledVectors);
        } else {
View Full Code Here

        // Check that the input is a sparse matrix
        if (!(m instanceof SparseMatrix))
            throw new IllegalArgumentException("CBC only accepts sparse matrices");

        SparseMatrix sm = (SparseMatrix)m;
       
        // Create a bit set with the number of bits equal to the number of rows.
        // This serves as input to phase 2 where we indicate that all rows
        // should be considered for clustering at first.
        BitSet allRows = new BitSet(sm.rows());
        allRows.set(0, sm.rows());
        LOGGER.info("CBC begining Phase 2");
        List<Committee> committees = phase2(
            sm, allRows, avgLinkMergeThresh,
            maxCommitteeSimThresh, residueSimThresh);
       
        LOGGER.info("CBC begining Phase 3");
        // PHASE 3: Assign elements to clusters
        Assignment[] result = new Assignment[m.rows()];
        for (int r = 0; r < m.rows(); ++r) {
            LOGGER.fine("Computing Phase 3 for row " + r);
            SparseDoubleVector row = sm.getRowVector(r);
            // Determine to which committees the row belongs
            List<Integer> committeeIds = phase3(
                row, committees, useHardClustering, softClusteringThresh);
            int[] assignments = new int[committeeIds.size()];
            for (int i = 0; i < committeeIds.size(); ++i) {
View Full Code Here

     */
    private void clusterTerm(String senseName,
                             List<SparseDoubleVector> contextSet,
                             Properties props) {
        // Convert the data points to a sparse matrix.
        SparseMatrix contexts = Matrices.asSparseMatrix(contextSet);

        // Cluster the context set.
        LOG.info("Clustering term: " + senseName);
        Assignments assignments = (numClusters > 0)
            ? clustering.cluster(contexts, numClusters, props)
View Full Code Here

                "denote an edge from row i to row j");
        if (!(matrix instanceof SparseMatrix)) {
            throw new IllegalArgumentException("Input matrix must be a " +
                "sparse matrix.");
        }
        SparseMatrix sm = (SparseMatrix)matrix;

        String inMemProp =
            props.getProperty(KEEP_SIMILARITY_MATRIX_IN_MEMORY_PROPERTY);
        boolean keepSimMatrixInMem = (inMemProp != null)
            ? Boolean.parseBoolean(inMemProp) : true;

        // IMPLEMENTATION NOTE: Ahn et al. used single-linkage HAC, which can be
        // efficiently implemented in O(n^2) time as a special case of HAC.
        // However, we currently don't optimize for this special case and
        // instead use our HAC class.  Because of the complexity of the edge
        // similarity function, we build our own similarity matrix and then pass
        // it in, rather than passing in the edge matrix directly.

        final int rows = sm.rows();
        numRows = rows;
        LOGGER.fine("Generating link similarity matrix for " + rows + " nodes");

        //  Rather than create an O(row^3) matrix for representing the edges,
        // compress the edge matrix by getting a mapping for each edge to a row
        // in the new matrix.
        final List<Edge> edgeList = new ArrayList<Edge>();
        this.edgeList = edgeList;

        for (int r = 0; r < rows; ++r) {
            SparseDoubleVector row = sm.getRowVector(r);
            int[] edges = row.getNonZeroIndices();
            for (int col : edges) {
                // Always add edges from the upper triangular
                if (r > col)
                    edgeList.add(new Edge(r, col));
                // Otherwise, we only add the edge from the lower triangular if
                // it wasn't present in the upper.  This avoids counting
                // duplicate edges.
                else if (r < col && sm.get(col, r) == 0)
                    edgeList.add(new Edge(r, col));
            }
        }

        final int numEdges = edgeList.size();
View Full Code Here

     */
    protected static DoubleVector computeMatrixTransposeV(Matrix matrix,
                                                          DoubleVector v) {
        DoubleVector newV = new DenseVector(matrix.columns());
        if (matrix instanceof SparseMatrix) {
            SparseMatrix smatrix = (SparseMatrix) matrix;
            for (int r = 0; r < smatrix.rows(); ++r) {
                SparseDoubleVector row = smatrix.getRowVector(r);
                int[] nonZeros = row.getNonZeroIndices();
                for (int c : nonZeros)
                    newV.add(c, row.get(c) * v.get(r));
            }
        } else {
View Full Code Here

    protected static void computeMatrixDotV(Matrix matrix,
                                            DoubleVector newV,
                                            DoubleVector v) {
        // Special case for sparse matrices.
        if (matrix instanceof SparseMatrix) {
            SparseMatrix smatrix = (SparseMatrix) matrix;
            for (int r = 0; r < smatrix.rows(); ++r) {
                double vValue = 0;
                SparseDoubleVector row = smatrix.getRowVector(r);
                int[] nonZeros = row.getNonZeroIndices();
                for (int c : nonZeros)
                    vValue += row.get(c) * newV.get(c);
                v.set(r, vValue);
            }
View Full Code Here

TOP

Related Classes of edu.ucla.sspace.matrix.SparseMatrix

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.