Package com.carrotsearch.hppc

Examples of com.carrotsearch.hppc.IntIntOpenHashMap


        IntArrayList columns, int iterations)
    {
        // Prepare selected matrix
        final DoubleMatrix2D selected = input.viewSelection(null, columns.toArray())
            .copy();
        final IntIntMap selectedToInput = new IntIntOpenHashMap(selected.columns());
        for (int i = 0; i < columns.size(); i++)
        {
            selectedToInput.put(i, columns.get(i));
        }

        // Prepare results holders
        List<IntArrayList> result = Lists.newArrayList();
        List<IntArrayList> previousResult = null;
        for (int i = 0; i < partitions; i++)
        {
            result.add(new IntArrayList(selected.columns()));
        }
        for (int i = 0; i < selected.columns(); i++)
        {
            result.get(i % partitions).add(i);
        }

        // Matrices for centroids and document-centroid similarities
        final DoubleMatrix2D centroids = new DenseDoubleMatrix2D(selected.rows(),
            partitions).assign(selected.viewPart(0, 0, selected.rows(), partitions));
        final DoubleMatrix2D similarities = new DenseDoubleMatrix2D(partitions,
            selected.columns());

        // Run a fixed number of K-means iterations
        for (int it = 0; it < iterations; it++)
        {
            // Update centroids
            for (int i = 0; i < result.size(); i++)
            {
                final IntArrayList cluster = result.get(i);
                for (int k = 0; k < selected.rows(); k++)
                {
                    double sum = 0;
                    for (int j = 0; j < cluster.size(); j++)
                    {
                        sum += selected.get(k, cluster.get(j));
                    }
                    centroids.setQuick(k, i, sum / cluster.size());
                }
            }

            if (it < iterations - 1)
            {
                previousResult = result;
                result = Lists.newArrayList();
                for (int i = 0; i < partitions; i++)
                {
                    result.add(new IntArrayList(selected.columns()));
                }
            }

            // Calculate similarity to centroids
            centroids.zMult(selected, similarities, 1, 0, true, false);

            // Assign documents to the nearest centroid
            for (int c = 0; c < similarities.columns(); c++)
            {
                int maxRow = 0;
                double max = similarities.get(0, c);
                for (int r = 1; r < similarities.rows(); r++)
                {
                    if (max < similarities.get(r, c))
                    {
                        max = similarities.get(r, c);
                        maxRow = r;
                    }
                }

                result.get(maxRow).add(c);
            }

            if (ObjectUtils.equals(previousResult, result))
            {
                // Unchanged result
                break;
            }
        }

        // Map the results back to the global indices
        for (Iterator<IntArrayList> it = result.iterator(); it.hasNext();)
        {
            final IntArrayList cluster = it.next();
            if (cluster.isEmpty())
            {
                it.remove();
            }
            else
            {
                for (int j = 0; j < cluster.size(); j++)
                {
                    cluster.set(j, selectedToInput.get(cluster.get(j)));
                }
            }
        }

        return result;
View Full Code Here


        assertThat(vsmContext.termDocumentMatrix.rows()).as("tdMatrix.rowCount")
            .isEqualTo(expectedTdMatrixStemIndices.length);
        MatrixAssertions.assertThat(vsmContext.termDocumentMatrix).isEquivalentTo(
            expectedTdMatrixElements);

        final IntIntOpenHashMap expectedStemToRowIndex = new IntIntOpenHashMap();
        for (int i = 0; i < expectedTdMatrixStemIndices.length; i++)
        {
            expectedStemToRowIndex.put(expectedTdMatrixStemIndices[i], i);
        }

        assertThat((Object) vsmContext.stemToRowIndex).isEqualTo(expectedStemToRowIndex);
    }
View Full Code Here

    /**
     * Convert to sparse encoding using a hash map.
     */
    public static int [] toSparseEncodingByHash(IntStack documents)
    {
        final IntIntOpenHashMap map = new IntIntOpenHashMap();

        final int toIndex = documents.size();
        final int [] buffer = documents.buffer;
        for (int i = 0; i < toIndex; i++)
        {
            map.putOrAdd(buffer[i], 1, 1);
        }

        return hashToKeyValuePairs(map);
    }
View Full Code Here

    /**
     * Merge data from one or more sparse arrays.
     */
    public static int [] mergeSparseArrays(Iterable<int []> source)
    {
        final IntIntOpenHashMap m = new IntIntOpenHashMap();
        for (int[] list : source)
        {
            final int max = list.length;
            for (int i = 0; i < max; i += 2)
            {
                final int v = list[i + 1];
                m.putOrAdd(list[i], v, v);
            }
        }

        return hashToKeyValuePairs(m);
    }
View Full Code Here

    {
        // for each discovered phrase, do manual count and verify if tf and tfByDocument are correct.
        AllPhrases allPhrases = context.allPhrases;
        for (int index = 0; index < allPhrases.size(); index++)
        {
            IntIntOpenHashMap realTfByDocuments = countManually(context, allPhrases.wordIndices[index]);
            final int realTf = realTfByDocuments.forEach(new IntIntProcedure()
            {
                int tf;
                public void apply(int key, int value)
                {
                    tf += value;
                }
            }).tf;

            Assertions.assertThat(allPhrases.tf[index]).as("Phrase: " + allPhrases.getPhrase(index))
                .isEqualTo(realTf);
           
            // Phrase extractor does not sort the byDocumentTf, so we need to addAllFromFlattened
            // to a map and then flatten with sorting.
            Assertions
                .assertThat(
                    IntMapUtils.flattenSortedByKey(IntMapUtils.addAllFromFlattened(
                        new IntIntOpenHashMap(), allPhrases.tfByDocument[index])))
                .as("Phrase: " + allPhrases.getPhrase(index))
                .isEqualTo(IntMapUtils.flattenSortedByKey(realTfByDocuments));
        }
    }
View Full Code Here

    /**
     * Manually and naively count doc->tf for the given word sequence.
     */
    private IntIntOpenHashMap countManually(PreprocessingContext context, int [] phraseWordIndices)
    {
        IntIntOpenHashMap tfByDoc = new IntIntOpenHashMap();
        AllTokens allTokens = context.allTokens;
outer:
        for (int i = allTokens.wordIndex.length - phraseWordIndices.length; --i >=0 ;)
        {
            for (int j = 0; j < phraseWordIndices.length; j++)
            {
                int wordInPhrase = phraseWordIndices[j];
                int wordInTokens = allTokens.wordIndex[i + j];
                if (wordInPhrase != wordInTokens)
                    continue outer;
            }
            tfByDoc.putOrAdd(allTokens.documentIndex[i], 1, 1);
        }
        return tfByDoc;
    }
View Full Code Here

    if (mOrg.isEmpty())  return;
   
    int[] keys = mOrg.keys().toArray();
    Arrays.sort(keys);
   
    IntIntOpenHashMap mNew = new IntIntOpenHashMap();   
    int coIndex = 1, last, i;
    List<CTNode> list;
    CTNode curr, ec;
    boolean isAnteFound;
   
    for (int key : keys)
    {
      list = mOrg.get(key);
      last = list.size() - 1;
      isAnteFound = false;
     
      for (i=last; i>=0; i--)
      {
        curr = list.get(i);
       
        if (curr.isEmptyCategoryRec())
        {
          ec = curr.getSubTerminals().get(0);
         
          if (i == last || isAnteFound || CTLibEn.RE_ICH_PPA_RNR.matcher(ec.form).find() || CTLibEn.containsCoordination(curr.getLowestCommonAncestor(list.get(i+1))))
            curr.coIndex = -1;
          else
            curr.coIndex = coIndex++;

          if (isAnteFound || i > 0)
            ec.form += "-"+coIndex;
        }
        else if (isAnteFound)
        {
          curr.coIndex = -1;
        }
        else
        {
          curr.coIndex = coIndex;
          mNew.put(key, coIndex);
          isAnteFound  = true;
        }
      }
     
      coIndex++;
View Full Code Here

            }
            String[] sValues = uniqueTerms.toArray(String.class);
            uniqueTerms = null;

            BulkRequestBuilder builder = client.prepareBulk();
            IntIntOpenHashMap tracker = new IntIntOpenHashMap();
            for (int i = 0; i < COUNT; i++) {
                Map<String, Object> fieldValues = new HashMap<>();
                for (int fieldSuffix = 1; fieldSuffix <= FIELD_LIMIT; fieldSuffix <<= 1) {
                    int index;
                    if (tracker.containsKey(fieldSuffix)) {
                        index = tracker.lget();
                    } else {
                        tracker.put(fieldSuffix, index = 0);
                    }
                    if (index >= fieldSuffix) {
                        index = random.nextInt(fieldSuffix);
                        fieldValues.put("field_" + fieldSuffix, sValues[index]);
                    } else {
                        fieldValues.put("field_" + fieldSuffix, sValues[index]);
                        tracker.put(fieldSuffix, ++index);
                    }
                }
                builder.add(
                        client.prepareIndex(INDEX_NAME, TYPE_NAME, String.valueOf(i))
                        .setSource(fieldValues)
View Full Code Here

        for (int i = 0; i < values.length; i++) {
            iValues[i] = ThreadLocalRandom.current().nextInt();
        }

        stopWatch = new StopWatch().start();
        IntIntOpenHashMap intMap = new IntIntOpenHashMap();
        for (long iter = 0; iter < ITERATIONS; iter++) {
            if (REUSE) {
                intMap.clear();
            } else {
                intMap = new IntIntOpenHashMap();
            }
            for (long i = 0; i < PUT_OPERATIONS; i++) {
                int key = iValues[(int) (i % NUMBER_OF_KEYS)];
                intMap.addTo(key, 1);
            }
        }
        stopWatch.stop();
        System.out.println("TIntIntHashMap: " + stopWatch.totalTime() + ", " + stopWatch.totalTime().millisFrac() / ITERATIONS + "ms");

        intMap.clear();
        intMap = null;

        // now test with THashMap
        stopWatch = new StopWatch().start();
        IntObjectOpenHashMap<IntEntry> tIntMap = new IntObjectOpenHashMap<>();
View Full Code Here

TOP

Related Classes of com.carrotsearch.hppc.IntIntOpenHashMap

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.