Package edu.ucla.sspace.vector

Examples of edu.ucla.sspace.vector.SparseIntegerVector


     * @param word a word
     *
     * @return the {@code SemanticVector} for the provide word.
     */
    private SparseIntegerVector getSemanticVector(String word) {
        SparseIntegerVector v = wordToSemantics.get(word);
        if (v == null) {
            // lock on the word in case multiple threads attempt to add it at
            // once
            synchronized(this) {
                // recheck in case another thread added it while we were waiting
View Full Code Here


     * to the number of dimensions <i>at the time of this call</i> and therefore
     * may be less that the number of dimensions for the same word when obtained
     * at a later time.
     */
    public SparseIntegerVector getVector(String word) {
        SparseIntegerVector v = wordToSemantics.get(word);
        if (v == null) {
            return null;
        }
        // Note that because the word space is potentially ever growing, we wrap
        // the return vectors with the size of the semantic space at the time of
View Full Code Here

            boolean calculateSemantics =
                (semanticFilter.isEmpty() || semanticFilter.contains(focusWord))
                && !focusWord.equals(IteratorFactory.EMPTY_TOKEN);
           
            if (calculateSemantics) {
                SparseIntegerVector focusSemantics =
                    getSemanticVector(focusWord);

                // Keep track of the relative position of the focus word in case
                // word ordering is being used.
                int position = -prevWords.size(); // first word is furthest
                for (String word : prevWords) {
                    // Skip the addition of any words that are excluded from the
                    // filter set.  Note that by doing the exclusion here, we
                    // ensure that the token stream maintains its existing
                    // ordering, which is necessary when word order is taken
                    // into account.
                    if (word.equals(IteratorFactory.EMPTY_TOKEN)) {
                        position++;
                        continue;
                    }
                   
                    int dimension = basisMapping.getDimension(
                        new Duple<String,Integer>(word, position));
                    synchronized(focusSemantics) {
                        focusSemantics.add(dimension, 1);
                    }
                    position++;
                }
           
                // Repeat for the words in the forward window.
                position = 1;
                for (String word : nextWords) {
                    // Skip the addition of any words that are excluded from the
                    // filter set.  Note that by doing the exclusion here, we
                    // ensure that the token stream maintains its existing
                    // ordering, which is necessary when word order is taken
                    // into account.
                    if (word.equals(IteratorFactory.EMPTY_TOKEN)) {
                        ++position;
                        continue;
                    }
               
                    int dimension = basisMapping.getDimension(
                        new Duple<String,Integer>(word, position));
                    synchronized(focusSemantics) {
                        focusSemantics.add(dimension, 1);
                    }
                    position++;
                }
            }
View Full Code Here

        int length = a.length();
        double numerator = 0;
       
        // For both a and b, keep track of how many times each position i tied
        // with some other position for rank.
        SparseIntegerVector tiesInA = new CompactSparseIntegerVector(length);
        SparseIntegerVector tiesInB = new CompactSparseIntegerVector(length);
        boolean foundTies = false;

        int concordant = 0;
        int discordant = 0;

        // For all pairs, track how many pairs satisfy the ordering
        for (int i = 0; i < length; ++i) {
            for (int j = i+1; j < length; ++j) {
                // NOTE: this value will be 1 if there exists an match or
                // "concordance" in the ordering of the two pairs.  Otherwise
                // it, will be a -1 of the pairs are not matched or are
                // "discordant.
                double ai = a.get(i);
                double aj = a.get(j);
                double bi = b.get(i);
                double bj = b.get(j);

                // Check for ties
                boolean atie = ai == aj;
                if (ai == aj) {
                    tiesInA.add(i, 1);
                    foundTies = true;
                }
                if (bi == bj) {
                    tiesInB.add(i, 1);
                    foundTies = true;
                }
                // If there was a tied rank, don't count the comparisons towards
                // the concordance totals
                if (ai != aj && bi != bj) {
                    if ((ai < aj && bi < bj) || (ai > aj && bi > bj))
                        concordant++;
                    else
                        discordant++;
                }
            }
        }

        int n = concordant - discordant;
        double d = (.5 * (length * (length-1)));

        if (foundTies) {
            // IMPORTANT NOTE: for the summations, add 1 to the number of ties,
            // rather than subtract 1.  All the online pseudo code has (ties *
            // (ties - 1)) / 2, which assumes that for a tied rank, ties will
            // always have a value of 2 or more.  I think they're double
            // counting ties somehow, so we add 1 to account for this.  Most
            // importantly, adding 1 causes all the online Kendall's tau
            // calculators to agree with our result.
            double aSum = 0;
            for (int i : tiesInA.getNonZeroIndices()) {
                int ties = tiesInA.get(i);
                aSum += (ties * (ties + 1) * .5);
            }

            double bSum = 0;
            for (int i : tiesInB.getNonZeroIndices()) {
                int ties = tiesInB.get(i);
                bSum += (ties * (ties + 1) * .5);
            }

            return n / Math.sqrt((d - aSum) * (d - bSum));
        }
View Full Code Here

        int length = a.length();
        double numerator = 0;
       
        // For both a and b, keep track of how many times each position i tied
        // with some other position for rank.
        SparseIntegerVector tiesInA = new CompactSparseIntegerVector(length);
        SparseIntegerVector tiesInB = new CompactSparseIntegerVector(length);
        boolean foundTies = false;

        int concordant = 0;
        int discordant = 0;

        // For all pairs, track how many pairs satisfy the ordering
        for (int i = 0; i < length; ++i) {
            for (int j = i+1; j < length; ++j) {
                // NOTE: this value will be 1 if there exists an match or
                // "concordance" in the ordering of the two pairs.  Otherwise
                // it, will be a -1 of the pairs are not matched or are
                // "discordant.
                int ai = a.get(i);
                int aj = a.get(j);
                int bi = b.get(i);
                int bj = b.get(j);

                // Check for ties
                boolean atie = ai == aj;
                if (ai == aj) {
                    tiesInA.add(i, 1);
                    foundTies = true;
                }
                if (bi == bj) {
                    tiesInB.add(i, 1);
                    foundTies = true;
                }
                // If there was a tied rank, don't count the comparisons towards
                // the concordance totals
                if (ai != aj && bi != bj) {
                    if ((ai < aj && bi < bj) || (ai > aj && bi > bj))
                        concordant++;
                    else
                        discordant++;
                }
            }
        }

        int n = concordant - discordant;
        double d = (.5 * (length * (length-1)));

        if (foundTies) {
            // IMPORTANT NOTE: for the summations, add 1 to the number of ties,
            // rather than subtract 1.  All the online pseudo code has (ties *
            // (ties - 1)) / 2, which assumes that for a tied rank, ties will
            // always have a value of 2 or more.  I think they're double
            // counting ties somehow, so we add 1 to account for this.  Most
            // importantly, adding 1 causes all the online Kendall's tau
            // calculators to agree with our result.
            double aSum = 0;
            for (int i : tiesInA.getNonZeroIndices()) {
                int ties = tiesInA.get(i);
                aSum += (ties * (ties + 1) * .5);
            }

            double bSum = 0;
            for (int i : tiesInB.getNonZeroIndices()) {
                int ties = tiesInB.get(i);
                bSum += (ties * (ties + 1) * .5);
            }

            return n / Math.sqrt((d - aSum) * (d - bSum));
        }
View Full Code Here

TOP

Related Classes of edu.ucla.sspace.vector.SparseIntegerVector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.