Package com.carrotsearch.hppc

Examples of com.carrotsearch.hppc.IntStack


        if (browser.isDisposed())
        {
            return Status.OK_STATUS;
        }

        IntStack ids = IntStack.newInstanceWithCapacity(selected.size());
        for (Cluster cluster : selected)
        {
            ids.push(cluster.getId());
        }
        browser.execute("javascript:selectGroupsById(" + Arrays.toString(ids.toArray()) + ");");

        return Status.OK_STATUS;
    }
View Full Code Here


        // A byte set for word fields tracking
        final BitSet fieldIndices = new BitSet(context.allFields.name.length);

        // A stack for pushing information about the term's documents.
        final IntStack wordDocuments = new IntStack();

        if (documentIndexesArray[tokenImagesOrder[0]] >= 0)
        {
            wordDocuments.push(documentIndexesArray[tokenImagesOrder[0]]);
        }

        // Go through the ordered token images
        for (int i = 0; i < tokenImagesOrder.length - 1; i++)
        {
            final char [] image = tokenImages[tokenImagesOrder[i]];
            final char [] nextImage = tokenImages[tokenImagesOrder[i + 1]];
            final int tokenType = tokenTypesArray[tokenImagesOrder[i]];
            final int documentIndex = documentIndexesArray[tokenImagesOrder[i + 1]];

            // Reached the end of non-null tokens?
            if (image == null)
            {
                break;
            }

            // Check if we want to index this token at all
            if (isNotIndexed(tokenType))
            {
                variantStartIndex = i + 1;
                maxTfVariantIndex = tokenImagesOrder[i + 1];

                resetForNewTokenImage(documentIndexesArray, tokenImagesOrder,
                    fieldIndices, wordDocuments, i);
                continue;
            }

            fieldIndices.set(tokensFieldIndex[tokenImagesOrder[i]]);

            // Now check if image case is changing
            final boolean sameCase = CharArrayComparators.FAST_CHAR_ARRAY_COMPARATOR
                .compare(image, nextImage) == 0;
            if (sameCase)
            {
                // Case has not changed, just increase counters
                tf++;
                totalTf++;
                wordDocuments.push(documentIndex);
                continue;
            }

            // Case (or even token image) has changed. Update most frequent case
            // variant
            if (maxTf < tf)
            {
                maxTf = tf;
                maxTfVariantIndex = tokenImagesOrder[i];
                tf = 1;
            }

            final boolean sameImage = CharArrayComparators.CASE_INSENSITIVE_CHAR_ARRAY_COMPARATOR
                .compare(image, nextImage) == 0;

            // Check if token image has changed
            if (sameImage)
            {
                totalTf++;
                wordDocuments.push(documentIndex);
            }
            else
            {
                // The image has changed completely.
                // Before we start processing the new image, we need to
                // see if we want to store the previous image, and if so
                // we need add some data about it to the arrays
               
                // wordDocuments.size() may contain duplicate entries from the same document,
                // but this check is faster than deduping, so we do it first. 
                if (wordDocuments.size() >= dfThreshold)
                {
                    // Flatten the list of documents this term occurred in.
                    final int [] sparseEncoding = SparseArray.toSparseEncoding(wordDocuments);
                    final int df = (sparseEncoding.length >> 1);
                    if (df >= dfThreshold)
View Full Code Here

        // Trim to only include shifted merged candidates.
        candidates.subList(j, candidates.size()).clear();

        // Recalculate score after merging.
        IntStack scratch = new IntStack();
        for (ClusterCandidate cc : candidates)
        {
            if (cc.phrases.size() > 1)
            {
                cc.cardinality = (int) cc.documents.cardinality();
View Full Code Here

         * each base cluster.
         */

        // [i] - next neighbor or END, [i + 1] - neighbor cluster index.
        final int END = -1;
        final IntStack neighborList = new IntStack();
        neighborList.push(END);
        final int [] neighbors = new int [baseClusters.size()];
        final float m = (float) mergeThreshold;
        for (int i = 0; i < baseClusters.size(); i++)
        {
            for (int j = i + 1; j < baseClusters.size(); j++)
            {
                final ClusterCandidate c1 = baseClusters.get(i);
                final ClusterCandidate c2 = baseClusters.get(j);

                final float a = c1.cardinality;
                final float b = c2.cardinality;
                final float c = BitSet.intersectionCount(c1.documents, c2.documents);

                if (c / a > m && c / b > m)
                {
                    neighborList.push(neighbors[i], j);
                    neighbors[i] = neighborList.size() - 2;
                    neighborList.push(neighbors[j], i);
                    neighbors[j] = neighborList.size() - 2;
                }
            }
        }

        /*
         * Find connected components in the similarity graph using Tarjan's algorithm
         * (flattened to use the stack instead of recursion).
         */

        final int NO_INDEX = -1;
        final int [] merged = new int [baseClusters.size()];
        Arrays.fill(merged, NO_INDEX);

        final ArrayList<ClusterCandidate> mergedClusters =
            Lists.newArrayListWithCapacity(baseClusters.size());
        final IntStack stack = new IntStack(baseClusters.size());
        final IntStack mergeList = new IntStack(baseClusters.size());
        int mergedIndex = 0;
        for (int v = 0; v < baseClusters.size(); v++)
        {
            if (merged[v] != NO_INDEX) continue;

            // Recursively mark all connected components from an unmerged cluster.
            stack.push(v);
            while (stack.size() > 0)
            {
                final int c = stack.pop();

                assert merged[c] == NO_INDEX || merged[c] == mergedIndex;
                if (merged[c] == mergedIndex) continue;

                merged[c] = mergedIndex;
                mergeList.push(c);

                for (int i = neighbors[c]; neighborList.get(i) != END;)
                {
                    final int neighbor = neighborList.get(i + 1);
                    if (merged[neighbor] == NO_INDEX)
                    {
                        stack.push(neighbor);
                    }
                    else
                    {
                        assert merged[neighbor] == mergedIndex;
                    }
                    i = neighborList.get(i);
                }
            }
            mergedIndex++;

            /*
             * Aggregate documents from each base cluster of the current merge, compute
             * the score and labels.
             */
            mergedClusters.add(merge(mergeList, baseClusters));
            mergeList.clear();
        }

        /*
         * Sort merged clusters.
         */
 
View Full Code Here

    private void markSubSuperPhrases(ArrayList<PhraseCandidate> phrases)
    {
        final int max = phrases.size();

        // A list of all words for each candidate phrase.
        final IntStack words = new IntStack(
            maxDescPhraseLength * phrases.size());

        // Offset pairs in the words list -- a pair [start, length].
        final IntStack offsets = new IntStack(phrases.size() * 2);

        for (PhraseCandidate p : phrases)
        {
            appendWords(words, offsets, p);
        }

        /*
         * Mark phrases that cannot be most specific or most general.
         */
        for (int i = 0; i < max; i++)
        {
            for (int j = 0; j < max; j++)
            {
                if (i == j) continue;

                int index = indexOf(
                    words.buffer, offsets.get(2 * i), offsets.get(2 * i + 1),
                    words.buffer, offsets.get(2 * j), offsets.get(2 * j + 1));
                if (index >= 0)
                {
                    // j is a subphrase of i, hence i cannot be mostGeneral and j
                    // cannot be most specific.
                    phrases.get(i).mostGeneral = false;
                    phrases.get(j).mostSpecific = false;
                }
            }
        }

        /*
         * For most general phrases, do not display them if a more specific phrase
         * exists with pretty much the same coverage.
         */
        for (int i = 0; i < max; i++)
        {
            final PhraseCandidate a = phrases.get(i);
            if (!a.mostGeneral) continue;

            for (int j = 0; j < max; j++)
            {
                final PhraseCandidate b = phrases.get(j);
                if (i == j || !b.mostSpecific) continue;

                int index = indexOf(
                    words.buffer, offsets.get(2 * j), offsets.get(2 * j + 1),
                    words.buffer, offsets.get(2 * i), offsets.get(2 * i + 1));
                if (index >= 0)
                {
                    if (a.coverage - b.coverage < mostGeneralPhraseCoverage)
                    {
                        a.selected = false;
View Full Code Here

    private void markOverlappingPhrases(ArrayList<PhraseCandidate> phrases)
    {
        final int max = phrases.size();

        // A list of all unique words for each candidate phrase.
        final IntStack words = new IntStack(
            maxDescPhraseLength * phrases.size());

        // Offset pairs in the words list -- a pair [start, length].
        final IntStack offsets = new IntStack(phrases.size() * 2);

        for (PhraseCandidate p : phrases)
        {
            appendUniqueWords(words, offsets, p);
        }

        for (int i = 0; i < max; i++)
        {
            for (int j = i + 1; j < max; j++)
            {
                final PhraseCandidate a = phrases.get(i);
                final PhraseCandidate b = phrases.get(j);

                final int a_words = offsets.get(2 * i + 1);
                final int b_words = offsets.get(2 * j + 1);

                final float intersection = computeIntersection(
                    words.buffer, offsets.get(2 * i), a_words,
                    words.buffer, offsets.get(2 * j), b_words);

                if ((intersection / b_words) > maxPhraseOverlap
                    && b.coverage < a.coverage)
                {
                    b.selected = false;
View Full Code Here

  {
    Map<String,IntStack> map = new HashMap<String,IntStack>();
    List<Mention> mentions = new ArrayList<Mention>();
    int i, size = lines.size();
    String corefs, key;
    IntStack stack;
   
    for (i=0; i<size; i++)
    {
      corefs = lines.get(i)[i_coref];
     
      if (corefs.equals("-"))
        continue;
     
      for (String coref : DEPFeat.P_FEATS.split(corefs))
      {
        if (coref.startsWith("("))
        {
          if (coref.endsWith(")"))
          {
            key = coref.substring(1, coref.length()-1);
            mentions.add(new Mention(key, i+1, i+1));
          }
          else
          {
            key = coref.substring(1);
            stack = map.get(key);
           
            if (stack == null)
            {
              stack = new IntStack();
              map.put(key, stack);
            }
           
            stack.push(i+1);
          }
        }
        else //if (coref.endsWith(")"))
        {
          key = coref.substring(0, coref.length()-1);
View Full Code Here

    public static void postorder(final int sequenceLength, int [] sa, int [] lcp,
        IPostOrderVisitor visitor)
    {
        assert sequenceLength <= sa.length && sequenceLength <= lcp.length : "Input sequence length larger than suffix array or the LCP.";

        final IntStack stack = new IntStack();

        // Push the stack bottom marker (sentinel).
        stack.push(-1, -1);

        // Process every leaf.
        int top_h;
        for (int i = 0; i <= sequenceLength; i++)
        {
            final int h = (sequenceLength == i ? -1 : lcp[i]);

            while (true)
            {
                top_h = stack.get(stack.size() - 1);
                if (top_h <= h) break;

                // Visit the node and remove it from the end of the stack.
                final int top_i = stack.get(stack.size() - 2);
                final boolean leaf = (top_i < 0);
                stack.discard(2);

                visitor.visitNode(sa[leaf ? -(top_i + 1) : top_i], top_h, leaf);
            }

            if (top_h < h)
            {
                stack.push(i, h);
            }

            if (i < sequenceLength)
            {
                // Mark leaf nodes in the stack.
                stack.push(-(i + 1), sequenceLength - sa[i]);
            }
        }
    }
View Full Code Here

    public static <E> void postorder(final int sequenceLength, int [] sa, int [] lcp,
        E epsilon, IPostOrderComputingVisitor<E> visitor)
    {
        assert sequenceLength <= sa.length && sequenceLength <= lcp.length : "Input sequence length larger than suffix array or the LCP.";

        final IntStack stack = new IntStack();
        final ArrayList<E> values = new ArrayList<E>();

        // Push the stack bottom marker (sentinel).
        stack.push(-1, -1);
        values.add(epsilon);

        // Process every leaf.
        int top_h;
        E top_c;
        for (int i = 0; i <= sequenceLength; i++)
        {
            final int h = (sequenceLength == i ? -1 : lcp[i]);
            E ci = epsilon;

            while (true)
            {
                top_h = stack.get(stack.size() - 1);
                if (top_h <= h) break;

                // Visit the node and remove it from the end of the stack.
                top_c = values.remove(values.size() - 1);
                final int top_i = stack.get(stack.size() - 2);
                final boolean leaf = (top_i < 0);
                stack.discard(2);

                ci = visitor.aggregate(top_c, ci);
                visitor.visitNode(sa[leaf ? -(top_i + 1) : top_i], top_h, leaf, ci);

                top_c = values.get(values.size() - 1);
            }

            if (top_h < h)
            {
                stack.push(i, h);
                values.add(ci);
            }
            else
            {
                assert top_h == h;
                final int index = values.size() - 1;
                values.set(index, visitor.aggregate(ci, values.get(index)));
            }

            if (i < sequenceLength)
            {
                // Mark leaf nodes in the stack.
                stack.push(-(i + 1), sequenceLength - sa[i]);
                values.add(visitor.leafValue(i, sa[i], sequenceLength - sa[i]));
            }
        }
    }
View Full Code Here

TOP

Related Classes of com.carrotsearch.hppc.IntStack

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.