Examples of com.carrotsearch.hppc.IntArrayList

com.carrotsearch.hppc.IntArrayList
An array-backed list of ints. A single array is used to store and manipulate all elements. Reallocations are governed by a {@link ArraySizingStrategy}and may be expensive if they move around really large chunks of memory.
See {@link ObjectArrayList} class for API similarities and differences against JavaCollections.

            if (!nodesChecked[i])
            {
                nodeQueue.clear();
                nodeQueue.addLast(i);
                nodesChecked[i] = true;
                IntArrayList clusterGroup = new IntArrayList();


                while (!nodeQueue.isEmpty())
                {
                    // Get a node from the queue
                    int node = nodeQueue.removeFirst();


                    // Add to the current sub-graph (cluster group)
                    clusterGroup.add(node);


                    // Add all its non-checked neighbors to the queue
                    for (int j = i + 1; j < vertexCount; j++)
                    {
                        if (!nodesChecked[j])
                        {
                            if (arcPredicate.isArcPresent(node, j))
                            {
                                nodeQueue.addLast(j);
                                nodesChecked[j] = true;
                            }
                        }
                    }
                }


                if (clusterGroup.size() > 1 || !pruneOneNodeSubrgaphs)
                {
                    clusterGroups.add(clusterGroup);
                }
            }
        }

View Full Code Here

            preprocessingPipeline.preprocess(documents, null, language);


        // Add trivial AllLabels so that we can reuse the common TD matrix builder
        final int [] stemsMfow = preprocessingContext.allStems.mostFrequentOriginalWordIndex;
        final short [] wordsType = preprocessingContext.allWords.type;
        final IntArrayList featureIndices = new IntArrayList(stemsMfow.length);
        for (int i = 0; i < stemsMfow.length; i++)
        {
            final short flag = wordsType[stemsMfow[i]];
            if ((flag & (ITokenizer.TF_COMMON_WORD | ITokenizer.TF_QUERY_WORD | ITokenizer.TT_NUMERIC)) == 0)
            {
                featureIndices.add(stemsMfow[i]);
            }
        }
        preprocessingContext.allLabels.featureIndex = featureIndices.toArray();
        preprocessingContext.allLabels.firstPhraseIndex = -1;


        // Further processing only if there are words to process
        clusters = Lists.newArrayList();
        if (preprocessingContext.hasLabels())
        {
            // Term-document matrix building and reduction
            final VectorSpaceModelContext vsmContext = new VectorSpaceModelContext(
                preprocessingContext);
            final ReducedVectorSpaceModelContext reducedVsmContext = new ReducedVectorSpaceModelContext(
                vsmContext);


            matrixBuilder.buildTermDocumentMatrix(vsmContext);
            matrixBuilder.buildTermPhraseMatrix(vsmContext);


            // Prepare rowIndex -> stemIndex mapping for labeling
            final IntIntOpenHashMap rowToStemIndex = new IntIntOpenHashMap();
            for (IntIntCursor c : vsmContext.stemToRowIndex)
            {
                rowToStemIndex.put(c.value, c.key);
            }


            final DoubleMatrix2D tdMatrix;
            if (useDimensionalityReduction && clusterCount * 2 < preprocessingContext.documents.size())
            {
                matrixReducer.reduce(reducedVsmContext, clusterCount * 2);
                tdMatrix = reducedVsmContext.coefficientMatrix.viewDice();
            }
            else
            {
                tdMatrix = vsmContext.termDocumentMatrix;
            }


            // Initial selection containing all columns, initial clustering
            final IntArrayList columns = new IntArrayList(tdMatrix.columns());
            for (int c = 0; c < tdMatrix.columns(); c++)
            {
                columns.add(c);
            }
            final List<IntArrayList> rawClusters = Lists.newArrayList();
            rawClusters.addAll(split(partitionCount, tdMatrix, columns, maxIterations));
            Collections.sort(rawClusters, BY_SIZE_DESCENDING);
            
            int largestIndex = 0;
            while (rawClusters.size() < clusterCount && largestIndex < rawClusters.size())
            {
                // Find largest cluster to split
                IntArrayList largest = rawClusters.get(largestIndex);
                if (largest.size() <= partitionCount * 2) 
                {
                    // No cluster is large enough to produce a meaningful
                    // split (i.e. a split into subclusters with more than
                    // 1 member).
                    break;
                }


                final List<IntArrayList> split = split(partitionCount, tdMatrix, largest,
                    maxIterations);
                if (split.size() > 1)
                {
                    rawClusters.remove(largestIndex);
                    rawClusters.addAll(split);
                    Collections.sort(rawClusters, BY_SIZE_DESCENDING);
                    largestIndex = 0;
                }
                else
                {
                    largestIndex++;
                }
            }


            for (int i = 0; i < rawClusters.size(); i++)
            {
                final Cluster cluster = new Cluster();


                final IntArrayList rawCluster = rawClusters.get(i);
                if (rawCluster.size() > 1)
                {
                    cluster.addPhrases(getLabels(rawCluster,
                        vsmContext.termDocumentMatrix, rowToStemIndex,
                        preprocessingContext.allStems.mostFrequentOriginalWordIndex,
                        preprocessingContext.allWords.image));
                    for (int j = 0; j < rawCluster.size(); j++)
                    {
                        cluster.addDocuments(documents.get(rawCluster.get(j)));
                    }
                    clusters.add(cluster);
                }
            }
        }

View Full Code Here

        // Prepare results holders
        List<IntArrayList> result = Lists.newArrayList();
        List<IntArrayList> previousResult = null;
        for (int i = 0; i < partitions; i++)
        {
            result.add(new IntArrayList(selected.columns()));
        }
        for (int i = 0; i < selected.columns(); i++) 
        {
            result.get(i % partitions).add(i);
        }


        // Matrices for centroids and document-centroid similarities
        final DoubleMatrix2D centroids = new DenseDoubleMatrix2D(selected.rows(),
            partitions).assign(selected.viewPart(0, 0, selected.rows(), partitions));
        final DoubleMatrix2D similarities = new DenseDoubleMatrix2D(partitions,
            selected.columns());


        // Run a fixed number of K-means iterations
        for (int it = 0; it < iterations; it++)
        {
            // Update centroids
            for (int i = 0; i < result.size(); i++)
            {
                final IntArrayList cluster = result.get(i);
                for (int k = 0; k < selected.rows(); k++)
                {
                    double sum = 0;
                    for (int j = 0; j < cluster.size(); j++)
                    {
                        sum += selected.get(k, cluster.get(j));
                    }
                    centroids.setQuick(k, i, sum / cluster.size());
                }
            }


            if (it < iterations - 1)
            {
                previousResult = result;
                result = Lists.newArrayList();
                for (int i = 0; i < partitions; i++)
                {
                    result.add(new IntArrayList(selected.columns()));
                }
            }


            // Calculate similarity to centroids
            centroids.zMult(selected, similarities, 1, 0, true, false);


            // Assign documents to the nearest centroid
            for (int c = 0; c < similarities.columns(); c++)
            {
                int maxRow = 0;
                double max = similarities.get(0, c);
                for (int r = 1; r < similarities.rows(); r++)
                {
                    if (max < similarities.get(r, c))
                    {
                        max = similarities.get(r, c);
                        maxRow = r;
                    }
                }


                result.get(maxRow).add(c);
            }


            if (ObjectUtils.equals(previousResult, result))
            {
                // Unchanged result
                break;
            }
        }


        // Map the results back to the global indices
        for (Iterator<IntArrayList> it = result.iterator(); it.hasNext();)
        {
            final IntArrayList cluster = it.next();
            if (cluster.isEmpty())
            {
                it.remove();
            }
            else
            {
                for (int j = 0; j < cluster.size(); j++)
                {
                    cluster.set(j, selectedToInput.get(cluster.get(j)));
                }
            }
        }


        return result;

View Full Code Here

            return;
        }


        // Lists to accommodate the results
        final ArrayList<char []> stemImages = new ArrayList<char []>(allWordsCount);
        final IntArrayList stemTf = new IntArrayList(allWordsCount);
        final IntArrayList stemMostFrequentWordIndexes = new IntArrayList(allWordsCount);
        final ArrayList<int []> stemTfByDocumentList = new ArrayList<int []>(allWordsCount);
        final ByteArrayList fieldIndexList = new ByteArrayList();


        // Counters
        int totalTf = wordTfArray[stemImagesOrder[0]];
        int mostFrequentWordFrequency = wordTfArray[stemImagesOrder[0]];
        int mostFrequentWordIndex = stemImagesOrder[0];
        int stemIndex = 0;


        // A list of document-term-frequency pairs, by document, for all words with identical stems.
        final ArrayList<int[]> stemTfsByDocument = Lists.newArrayList();
        
        stemTfsByDocument.add(wordTfByDocumentArray[stemImagesOrder[0]]);
        byte fieldIndices = 0;
        fieldIndices |= wordsFieldIndices[0];


        // For locating query words
        final MutableCharArray buffer = new MutableCharArray(
            wordStemImages[stemImagesOrder[0]]);
        boolean inQuery = queryStems.contains(buffer);


        // Go through all words in the order of stem images
        for (int i = 0; i < stemImagesOrder.length - 1; i++)
        {
            final int orderIndex = stemImagesOrder[i];
            final char [] stem = wordStemImages[orderIndex];
            final int nextInOrderIndex = stemImagesOrder[i + 1];
            final char [] nextStem = wordStemImages[nextInOrderIndex];


            stemIndexesArray[orderIndex] = stemIndex;
            if (inQuery)
            {
                wordsType[orderIndex] |= ITokenizer.TF_QUERY_WORD;
            }


            // Now check if token image is changing
            final boolean sameStem = CharArrayComparators.FAST_CHAR_ARRAY_COMPARATOR
                .compare(stem, nextStem) == 0;


            if (sameStem)
            {
                totalTf += wordTfArray[nextInOrderIndex];
                stemTfsByDocument.add(wordTfByDocumentArray[nextInOrderIndex]);
                fieldIndices |= wordsFieldIndices[nextInOrderIndex];
                if (mostFrequentWordFrequency < wordTfArray[nextInOrderIndex])
                {
                    mostFrequentWordFrequency = wordTfArray[nextInOrderIndex];
                    mostFrequentWordIndex = nextInOrderIndex;
                }
            }
            else
            {
                stemImages.add(stem);
                stemTf.add(totalTf);
                stemMostFrequentWordIndexes.add(mostFrequentWordIndex);
                storeTfByDocument(stemTfByDocumentList, stemTfsByDocument);
                fieldIndexList.add(fieldIndices);


                stemIndex++;
                totalTf = wordTfArray[nextInOrderIndex];
                mostFrequentWordFrequency = wordTfArray[nextInOrderIndex];
                mostFrequentWordIndex = nextInOrderIndex;
                fieldIndices = 0;
                fieldIndices |= wordsFieldIndices[nextInOrderIndex];


                stemTfsByDocument.clear();
                stemTfsByDocument.add(wordTfByDocumentArray[nextInOrderIndex]);


                buffer.reset(wordStemImages[nextInOrderIndex]);
                inQuery = queryStems.contains(buffer);
            }
        }


        // Store tf for the last stem in the array
        stemImages.add(wordStemImages[stemImagesOrder[stemImagesOrder.length - 1]]);
        stemTf.add(totalTf);
        stemMostFrequentWordIndexes.add(mostFrequentWordIndex);
        stemIndexesArray[stemImagesOrder[stemImagesOrder.length - 1]] = stemIndex;
        storeTfByDocument(stemTfByDocumentList, stemTfsByDocument);
        fieldIndexList.add(fieldIndices);
        if (inQuery)
        {
            wordsType[stemImagesOrder[stemImagesOrder.length - 1]] |= ITokenizer.TF_QUERY_WORD;
        }


        // Convert lists to arrays and store them in allStems
        context.allStems.image = stemImages.toArray(new char [stemImages.size()] []);
        context.allStems.mostFrequentOriginalWordIndex = stemMostFrequentWordIndexes
            .toArray();
        context.allStems.tf = stemTf.toArray();
        context.allStems.tfByDocument = stemTfByDocumentList
            .toArray(new int [stemTfByDocumentList.size()] []);
        context.allStems.fieldIndices = fieldIndexList.toArray();

View Full Code Here

        stopWordLabelFilter.filter(context, acceptedStems, acceptedPhrases);
        numericLabelFilter.filter(context, acceptedStems, acceptedPhrases);
        stopLabelFilter.filter(context, acceptedStems, acceptedPhrases);
        completeLabelFilter.filter(context, acceptedStems, acceptedPhrases);


        final IntArrayList acceptedFeatures = new IntArrayList(acceptedStems.length
            + acceptedPhrases.length);


        final int [] mostFrequentOriginalWordIndex = context.allStems.mostFrequentOriginalWordIndex;
        for (int i = 0; i < acceptedStems.length; i++)
        {
            if (acceptedStems[i])
            {
                acceptedFeatures.add(mostFrequentOriginalWordIndex[i]);
            }
        }


        for (int i = 0; i < acceptedPhrases.length; i++)
        {
            if (acceptedPhrases[i])
            {
                acceptedFeatures.add(i + wordCount);
            }
        }


        context.allLabels.featureIndex = acceptedFeatures.toArray();
        updateFirstPhraseIndex(context);
    }

View Full Code Here

      terminals.add(curr);
  }
  
  public IntArrayList getSubTerminalIdList()
  {
    IntArrayList list = new IntArrayList();
    
    for (CTNode node : getSubTerminals())
      list.add(node.getTerminalId());
    
    return list;
  }

View Full Code Here

  
  // --------------------------------- projectivize ---------------------------------
  
  public void projectivize()
  {
    IntArrayList ids = new IntArrayList();
    int i, size = size();
    DEPNode nonProj;
    
    for (i=1; i<size; i++)
      ids.add(i);
    
    while ((nonProj = getSmallestNonProjectiveArc(ids)) != null)
      nonProj.setHead(nonProj.getHead().getHead(), DEPLib.DEP_NON_PROJ);
  }

View Full Code Here

   */
  public AbstractTrainSpace(AbstractModel model, boolean hasWeight)
  {
    m_model  = model;
    b_weight = hasWeight;
    a_ys     = new IntArrayList();
    a_xs     = new ArrayList<int[]>();
    if (hasWeight)  a_vs = new ArrayList<double[]>();
  }

View Full Code Here

  }
  
  void measureTime()
  {
    int i, j, len = 10, size = 1000000;
    IntArrayList list;
    IntDeque deque;
    long st, et;
    
    st = System.currentTimeMillis();
    
    for (i=0; i<size; i++)
    {
      list = new IntArrayList();
      
      for (j=0; j<len; j++)
        list.add(j);
      
      list.remove(list.size()-1);
    }
    
    et = System.currentTimeMillis();
    System.out.println(et-st);

View Full Code Here

    
    IntArrayList[] groups = new IntArrayList[size];
    
    for (i=0; i<size; i++)
    {
      groups[i] = new IntArrayList();
      groups[i].addAll(lhs[i]);
      groups[i].addAll(rhs[i]);
    }
    
    return groups;

View Full Code Here

0 1 2 3 4 5

TOP

Related Classes of com.carrotsearch.hppc.IntArrayList

com.carrotsearch.hppc.examples.IteratingOverLists

com.carrotsearch.hppc.hash.IntHashFunction

com.clearnlp.classification.algorithm.AbstractAdaGrad

com.clearnlp.classification.algorithm.LiblinearL2LR

com.clearnlp.classification.algorithm.LiblinearL2SVC

com.clearnlp.classification.algorithm.old.AbstractAdaGrad

com.clearnlp.classification.algorithm.old.LiblinearL2LR

com.clearnlp.classification.algorithm.old.LiblinearL2SVC

com.clearnlp.classification.model.StringModelAD

com.clearnlp.classification.train.AbstractTrainSpace

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.