Examples of com.carrotsearch.hppc.IntArrayList

com.carrotsearch.hppc.IntArrayList
An array-backed list of ints. A single array is used to store and manipulate all elements. Reallocations are governed by a {@link ArraySizingStrategy}and may be expensive if they move around really large chunks of memory.
See {@link ObjectArrayList} class for API similarities and differences against JavaCollections.

    @Test
    public void testIterableCursor() throws Exception
    {
        // [[[start:iteration-lists-using-iterator]]]
        // Prepare some list to iterate over
        final IntArrayList list = prepare(10);
        
        // Lists implement the Iterable interface that returns [type]Cursor elements.
        // The cursor contains the index and value of the current element.
        for (IntCursor c : list)
        {

View Full Code Here


    @Test
    public void testSimpleGetLoop() throws Exception
    {
        // [[[start:iteration-lists-using-get]]]
        final IntArrayList list = prepare(10);
        
        // Another way to iterate over array list is to access each element
        // of the list using the get() method.
        final int size = list.size();
        for (int i = 0; i < size; i++)
        {
            System.out.println(i + ": " + list.get(i));
        }
        // [[[end:iteration-lists-using-get]]]
    }

View Full Code Here


    @Test
    public void testWithProcedureClosure()
    {
        // [[[start:iteration-lists-using-procedures]]]
        final IntArrayList list = prepare(10);


        // Lists also support iteration through [type]Procedure interfaces.
        // The apply() method will be called once for each element in the list.
        list.forEach(new IntProcedure()
        {
            public void apply(int value)
            {
                System.out.println(value);
            }

View Full Code Here


    @Test
    public void testDirectBufferLoop() throws Exception
    {
        // [[[start:iteration-lists-using-direct-buffer-access]]]
        final IntArrayList list = prepare(10);


        // For the fastest iteration, you can access the lists' data buffer directly.
        final int [] buffer = list.buffer;
        
        // Make sure you take the list.size() and not the length of the data buffer.
        final int size = list.size();
        
        // Iterate of the the array as usual.
        for (int i = 0; i < size; i++)
        {
            System.out.println(i + ": " + buffer[i]);

View Full Code Here

        final CharacterSequence seq = new CharacterSequence("cocoa$");
        final SuffixTree stree = SuffixTreeBuilder.from(seq).build();


        stree.visit(new VisitorAdapter()
        {
            final IntArrayList states = new IntArrayList();


            public void post(int state)
            {
                if (stree.getRootState() != state)
                {
                    final StringBuilder buffer = new StringBuilder();
                    for (int i = 0; i < states.size(); i += 2)
                        for (int j = states.get(i); j <= states.get(i + 1); j++)
                            buffer.append((char) seq.objectAt(j));


                    if (stree.isLeaf(state)) buffer.append(" [leaf]");
                    nodes.add(buffer.toString());


                    states.remove(states.size() - 1);
                    states.remove(states.size() - 1);
                }
            };


            public boolean edge(int fromState, int toState, int startIndex, int endIndex)
            {
                states.add(startIndex);
                states.add(endIndex);
                return true;
            }
        });


        Collections.sort(nodes);

View Full Code Here

        }


        // Filter out labels that do not meet the minimum cluster size
        if (minClusterSize > 1)
        {
            final IntArrayList newFeatureIndex = new IntArrayList(
                labelsFeatureIndex.length);
            final ArrayList<BitSet> newDocumentIndices = Lists
                .newArrayListWithExpectedSize(labelsFeatureIndex.length);


            for (int i = 0; i < labelsFeatureIndex.length; i++)
            {
                if (labelsDocumentIndices[i].cardinality() >= minClusterSize)
                {
                    newFeatureIndex.add(labelsFeatureIndex[i]);
                    newDocumentIndices.add(labelsDocumentIndices[i]);
                }
            }
            context.allLabels.documentIndices = newDocumentIndices
                .toArray(new BitSet [newDocumentIndices.size()]);
            context.allLabels.featureIndex = newFeatureIndex.toArray();
            LabelFilterProcessor.updateFirstPhraseIndex(context);
        }
        else
        {
            context.allLabels.documentIndices = labelsDocumentIndices;

View Full Code Here

        final int [] tokenImagesOrder = IndirectSort.mergesort(tokenImages, 0,
            tokenImages.length, CharArrayComparators.NORMALIZING_CHAR_ARRAY_COMPARATOR);


        // Create holders for new arrays
        final List<char []> normalizedWordImages = Lists.newArrayList();
        final IntArrayList normalizedWordTf = new IntArrayList();
        final List<int []> wordTfByDocumentList = Lists.newArrayList();
        final ByteArrayList fieldIndexList = new ByteArrayList();
        final ShortArrayList types = new ShortArrayList();


        final int [] wordIndexes = new int [tokenCount];
        Arrays.fill(wordIndexes, -1);


        // Initial values for counters
        int tf = 1;
        int maxTf = 1;
        int maxTfVariantIndex = tokenImagesOrder[0];
        int totalTf = 1;
        int variantStartIndex = 0;


        // A byte set for word fields tracking
        final BitSet fieldIndices = new BitSet(context.allFields.name.length);


        // A stack for pushing information about the term's documents.
        final IntStack wordDocuments = new IntStack();


        if (documentIndexesArray[tokenImagesOrder[0]] >= 0)
        {
            wordDocuments.push(documentIndexesArray[tokenImagesOrder[0]]);
        }


        // Go through the ordered token images
        for (int i = 0; i < tokenImagesOrder.length - 1; i++)
        {
            final char [] image = tokenImages[tokenImagesOrder[i]];
            final char [] nextImage = tokenImages[tokenImagesOrder[i + 1]];
            final int tokenType = tokenTypesArray[tokenImagesOrder[i]];
            final int documentIndex = documentIndexesArray[tokenImagesOrder[i + 1]];


            // Reached the end of non-null tokens?
            if (image == null)
            {
                break;
            }


            // Check if we want to index this token at all
            if (isNotIndexed(tokenType))
            {
                variantStartIndex = i + 1;
                maxTfVariantIndex = tokenImagesOrder[i + 1];


                resetForNewTokenImage(documentIndexesArray, tokenImagesOrder, 
                    fieldIndices, wordDocuments, i);
                continue;
            }


            fieldIndices.set(tokensFieldIndex[tokenImagesOrder[i]]);


            // Now check if image case is changing
            final boolean sameCase = CharArrayComparators.FAST_CHAR_ARRAY_COMPARATOR
                .compare(image, nextImage) == 0;
            if (sameCase)
            {
                // Case has not changed, just increase counters
                tf++;
                totalTf++;
                wordDocuments.push(documentIndex);
                continue;
            }


            // Case (or even token image) has changed. Update most frequent case
            // variant
            if (maxTf < tf)
            {
                maxTf = tf;
                maxTfVariantIndex = tokenImagesOrder[i];
                tf = 1;
            }


            final boolean sameImage = CharArrayComparators.CASE_INSENSITIVE_CHAR_ARRAY_COMPARATOR
                .compare(image, nextImage) == 0;


            // Check if token image has changed
            if (sameImage)
            {
                totalTf++;
                wordDocuments.push(documentIndex);
            }
            else
            {
                // The image has changed completely.
                // Before we start processing the new image, we need to
                // see if we want to store the previous image, and if so
                // we need add some data about it to the arrays
                
                // wordDocuments.size() may contain duplicate entries from the same document, 
                // but this check is faster than deduping, so we do it first.  
                if (wordDocuments.size() >= dfThreshold)
                {
                    // Flatten the list of documents this term occurred in.
                    final int [] sparseEncoding = SparseArray.toSparseEncoding(wordDocuments);
                    final int df = (sparseEncoding.length >> 1); 
                    if (df >= dfThreshold)
                    {
                        wordTfByDocumentList.add(sparseEncoding);
    
                        // Add the word to the word list
                        normalizedWordImages.add(tokenImages[maxTfVariantIndex]);
                        types.add(tokenTypesArray[maxTfVariantIndex]);
                        normalizedWordTf.add(totalTf);
                        fieldIndexList.add((byte) fieldIndices.bits[0]);


                        // Add this word's index in AllWords to all its instances
                        // in the AllTokens multiarray
                        for (int j = variantStartIndex; j < i + 1; j++)
                        {
                            wordIndexes[tokenImagesOrder[j]] = normalizedWordImages.size() - 1;
                        }
                    }
                }


                // Reinitialize counters
                totalTf = 1;
                tf = 1;
                maxTf = 1;
                maxTfVariantIndex = tokenImagesOrder[i + 1];
                variantStartIndex = i + 1;


                // Re-initialize int set used for document frequency calculation
                resetForNewTokenImage(documentIndexesArray, tokenImagesOrder,
                    fieldIndices, wordDocuments, i);
            }
        }


        // Mapping from allTokens
        context.allTokens.wordIndex = wordIndexes;


        context.allWords.image = normalizedWordImages
            .toArray(new char [normalizedWordImages.size()] []);
        context.allWords.tf = normalizedWordTf.toArray();
        context.allWords.tfByDocument = 
            wordTfByDocumentList.toArray(new int [wordTfByDocumentList.size()] []);
        context.allWords.fieldIndices = fieldIndexList.toArray();
        context.allWords.type = types.toArray();
    }

View Full Code Here


            // Convert word indices to stem indices.
            assert cc.phrases.size() == 1;
            int [] stemIndices = context.allWords.stemIndex;
            int [] phraseWords = cc.phrases.get(0);
            IntArrayList stemList = new IntArrayList(phraseWords.length);
            for (int seqIndex : phraseWords)
            {
                int termIndex = sb.input.get(seqIndex);
                stemList.add(stemIndices[termIndex]);
            }
            
            // Check if we have stem-equivalent phrase like this.
            ClusterCandidate equivalent = merged.get(stemList);
            if (equivalent == null)

View Full Code Here


        // Find all subphrases
        List<Substring> rcs = discoverRcs(suffixArray, lcpArray, documentIndexArray);


        List<int []> phraseWordIndexes = Lists.newArrayList();
        IntArrayList phraseTf = new IntArrayList();
        List<int []> phraseTfByDocumentList = Lists.newArrayList();


        if (rcs.size() > 0)
        {
            // Determine most frequent originals and create the final phrase
            // array. Also merge the phrase tf by document maps into flat
            // arrays.
            Collections.sort(rcs, new SubstringComparator(wordIndexesArray, stemIndexes));


            int totalPhraseTf = rcs.get(0).frequency;
            Substring mostFrequentOriginal = rcs.get(0);
            IntIntOpenHashMap phraseTfByDocument = new IntIntOpenHashMap();
            phraseTfByDocument.putAll(mostFrequentOriginal.tfByDocument);


            // Don't change the rcs list type from ArrayList or we'll
            // run into O(n^2) iteration cost :)
            for (int i = 0; i < rcs.size() - 1; i++)
            {
                final Substring substring = rcs.get(i);
                final Substring nextSubstring = rcs.get(i + 1);


                if (substring
                    .isEquivalentTo(nextSubstring, wordIndexesArray, stemIndexes))
                {
                    totalPhraseTf += nextSubstring.frequency;
                    addAllWithOffset(phraseTfByDocument, nextSubstring.tfByDocument, -1);
                    if (mostFrequentOriginal.frequency < nextSubstring.frequency)
                    {
                        mostFrequentOriginal = nextSubstring;
                    }
                }
                else
                {
                    int [] wordIndexes = new int [(mostFrequentOriginal.to - mostFrequentOriginal.from)];
                    for (int j = 0; j < wordIndexes.length; j++)
                    {
                        wordIndexes[j] = wordIndexesArray[mostFrequentOriginal.from + j];
                    }
                    phraseWordIndexes.add(wordIndexes);
                    phraseTf.add(totalPhraseTf);
                    phraseTfByDocumentList.add(IntMapUtils.flatten(phraseTfByDocument));


                    totalPhraseTf = nextSubstring.frequency;
                    mostFrequentOriginal = nextSubstring;
                    phraseTfByDocument.clear();
                    phraseTfByDocument.putAll(nextSubstring.tfByDocument);
                }
            }


            // Add the last substring
            final Substring substring = rcs.get(rcs.size() - 1);
            int [] wordIndexes = new int [(substring.to - substring.from)];
            for (int j = 0; j < wordIndexes.length; j++)
            {
                wordIndexes[j] = wordIndexesArray[mostFrequentOriginal.from + j];
            }
            phraseWordIndexes.add(wordIndexes);
            phraseTf.add(totalPhraseTf);
            phraseTfByDocumentList.add(IntMapUtils.flatten(phraseTfByDocument));
        }


        // Store the results to allPhrases
        context.allPhrases.wordIndices = phraseWordIndexes
            .toArray(new int [phraseWordIndexes.size()] []);
        context.allPhrases.tf = phraseTf.toArray();
        context.allPhrases.tfByDocument = phraseTfByDocumentList
            .toArray(new int [phraseTfByDocumentList.size()] []);
    }

View Full Code Here

        }


        // Prepare arrays
        images = Lists.newArrayList();
        tokenTypes = new ShortArrayList();
        documentIndices = new IntArrayList();
        fieldIndices = new ByteArrayList();


        final Iterator<Document> docIterator = documents.iterator();
        int documentIndex = 0;
        final ITokenizer ts = context.language.getTokenizer();

View Full Code Here

0 1 2 3 4 5

TOP

Related Classes of com.carrotsearch.hppc.IntArrayList

com.carrotsearch.hppc.examples.IteratingOverLists

com.carrotsearch.hppc.hash.IntHashFunction

com.clearnlp.classification.algorithm.AbstractAdaGrad

com.clearnlp.classification.algorithm.LiblinearL2LR

com.clearnlp.classification.algorithm.LiblinearL2SVC

com.clearnlp.classification.algorithm.old.AbstractAdaGrad

com.clearnlp.classification.algorithm.old.LiblinearL2LR

com.clearnlp.classification.algorithm.old.LiblinearL2SVC

com.clearnlp.classification.model.StringModelAD

com.clearnlp.classification.train.AbstractTrainSpace

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.