Package com.carrotsearch.hppc

Examples of com.carrotsearch.hppc.BitSet


    public static void before()
    {
        final int MB = 1024 * 1024;
        final int bits = 128 * MB * 4;

        hppc = new BitSet(bits);
        jre = new java.util.BitSet(bits);

        // Randomly fill every bits (this is fairly dense distribution).
        for (int i = 0; i < bits; i += 1 + rnd.nextInt(10))
        {
View Full Code Here


                // Add label and score
                cluster.addPhrases(labelFormatter.format(context, labelFeature));
                cluster.setAttribute(Cluster.SCORE, clusterLabelScore[i]);

                // Add documents
                final BitSet bs = clusterDocuments[i];
                for (int bit = bs.nextSetBit(0); bit >= 0; bit = bs.nextSetBit(bit + 1))
                {
                    cluster.addDocuments(documents.get(bit));
                }

                // Add cluster
View Full Code Here

        final BitSet [] labelsDocumentIndices = new BitSet [labelsFeatureIndex.length];

        for (int i = 0; i < labelsFeatureIndex.length; i++)
        {
            final BitSet documentIndices = new BitSet(documentCount);

            final int featureIndex = labelsFeatureIndex[i];
            if (featureIndex < wordCount)
            {
                addTfByDocumentToBitSet(documentIndices,
                    stemsTfByDocument[wordsStemIndex[featureIndex]]);
            }
            else
            {
                final int phraseIndex = featureIndex - wordCount;
                if (exactPhraseAssignment)
                {
                    addTfByDocumentToBitSet(documentIndices,
                        phrasesTfByDocument[phraseIndex]);
                }
                else
                {
                    final int [] wordIndices = phrasesWordIndices[phraseIndex];
                    boolean firstAdded = false;

                    for (int j = 0; j < wordIndices.length; j++)
                    {
                        final int wordIndex = wordIndices[j];
                        if (!TokenTypeUtils.isCommon(wordsTypes[wordIndex]))
                        {
                            if (!firstAdded)
                            {
                                addTfByDocumentToBitSet(documentIndices,
                                    stemsTfByDocument[wordsStemIndex[wordIndex]]);
                                firstAdded = true;
                            }
                            else
                            {
                                final BitSet temp = new BitSet(documentCount);
                                addTfByDocumentToBitSet(temp,
                                    stemsTfByDocument[wordsStemIndex[wordIndex]]);
                                // .retainAll == set intersection
                                documentIndices.and(temp);
                            }
View Full Code Here

        final int [][] phrasesWordIndices = context.allPhrases.wordIndices;
        final int wordCount = wordsStemIndex.length;

        final int [][] stemsTfByDocument = context.allStems.tfByDocument;
        int documentCount = context.documents.size();
        final BitSet requiredStemIndices = new BitSet(labelsFeatureIndex.length);

        for (int i = 0; i < labelsFeatureIndex.length; i++)
        {
            final int featureIndex = labelsFeatureIndex[i];
            if (featureIndex < wordCount)
            {
                addStemIndex(wordsStemIndex, documentCount, stemsTfByDocument,
                    requiredStemIndices, featureIndex);
            }
            else
            {
                final int [] wordIndices = phrasesWordIndices[featureIndex - wordCount];
                for (int j = 0; j < wordIndices.length; j++)
                {
                    final int wordIndex = wordIndices[j];
                    if (!TokenTypeUtils.isCommon(wordsTypes[wordIndex]))
                    {
                        addStemIndex(wordsStemIndex, documentCount, stemsTfByDocument,
                            requiredStemIndices, wordIndex);
                    }
                }
            }
        }

        return requiredStemIndices.asIntLookupContainer().toArray();
    }
View Full Code Here

        int maxTfVariantIndex = tokenImagesOrder[0];
        int totalTf = 1;
        int variantStartIndex = 0;

        // A byte set for word fields tracking
        final BitSet fieldIndices = new BitSet(context.allFields.name.length);

        // A stack for pushing information about the term's documents.
        final IntStack wordDocuments = new IntStack();

        if (documentIndexesArray[tokenImagesOrder[0]] >= 0)
        {
            wordDocuments.push(documentIndexesArray[tokenImagesOrder[0]]);
        }

        // Go through the ordered token images
        for (int i = 0; i < tokenImagesOrder.length - 1; i++)
        {
            final char [] image = tokenImages[tokenImagesOrder[i]];
            final char [] nextImage = tokenImages[tokenImagesOrder[i + 1]];
            final int tokenType = tokenTypesArray[tokenImagesOrder[i]];
            final int documentIndex = documentIndexesArray[tokenImagesOrder[i + 1]];

            // Reached the end of non-null tokens?
            if (image == null)
            {
                break;
            }

            // Check if we want to index this token at all
            if (isNotIndexed(tokenType))
            {
                variantStartIndex = i + 1;
                maxTfVariantIndex = tokenImagesOrder[i + 1];

                resetForNewTokenImage(documentIndexesArray, tokenImagesOrder,
                    fieldIndices, wordDocuments, i);
                continue;
            }

            fieldIndices.set(tokensFieldIndex[tokenImagesOrder[i]]);

            // Now check if image case is changing
            final boolean sameCase = CharArrayComparators.FAST_CHAR_ARRAY_COMPARATOR
                .compare(image, nextImage) == 0;
            if (sameCase)
View Full Code Here

     * set of clusters in Carrot2 format.
     */
    private void postProcessing(List<ClusterCandidate> clusters)
    {
        // Adapt to Carrot2 classes, counting used documents on the way.
        final BitSet all = new BitSet(documents.size());
        final ArrayList<Document> docs = Lists.newArrayListWithCapacity(documents.size());
        final ArrayList<String> phrases = Lists.newArrayListWithCapacity(3);
        for (ClusterCandidate c : clusters)
        {
            final Cluster c2 = new Cluster();
            c2.addPhrases(collectPhrases(phrases, c));
            c2.addDocuments(collectDocuments(docs, c.documents));
            c2.setScore((double) c.score);
            this.clusters.add(c2);

            all.or(c.documents);
            docs.clear();
            phrases.clear();
        }

        Collections.sort(this.clusters,
View Full Code Here

        }
        else
        {
            // For larger hash maps, use a bitset to sort keys.

            final BitSet bset = new BitSet(map.size());
            for (IntIntCursor c : map)
            {
                bset.set(c.key);
            }

            for (int key = bset.nextSetBit(0); key >= 0; key = bset.nextSetBit(key + 1))
            {
                result[index++] = key;
                result[index++] = map.get(key);
            }
        }
View Full Code Here

        private void countDocs(int level, int state)
        {
            assert !stree.isLeaf(state);

            final BitSet me = getBitSet(level);
            for (int edge = stree.firstEdge(state); edge != NO_EDGE; edge = stree.nextEdge(edge))
            {
                final int childState = stree.getToState(edge);
                if (stree.isLeaf(childState))
                {
                    final int documentIndex = sb.stateOriginDocument.get(childState);
                    me.set(documentIndex);
                }
                else
                {
                    final BitSet child = getBitSet(level + 1);
                    child.clear();
                    edges.push(stree.getStartIndex(edge), stree.getEndIndex(edge));
                    countDocs(level + 1, childState);
                    edges.discard(2);
                    me.or(child);
                }
View Full Code Here

        protected abstract void visit(int state, int cardinality, BitSet documents, IntStack path);

        private BitSet getBitSet(int level)
        {
            while (bsets.size() <= level) bsets.add(new BitSet());
            return bsets.get(level);
        }
View Full Code Here

    /* For cluster merging. */
    ClusterCandidate()
    {
        this.phrases = Lists.newArrayList();
        this.documents = new BitSet();
    }
View Full Code Here

TOP

Related Classes of com.carrotsearch.hppc.BitSet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.