Package com.carrotsearch.hppc

Source Code of com.carrotsearch.hppc.BigramCountingBase

package com.carrotsearch.hppc;

import gnu.trove.map.hash.TIntIntHashMap;
import it.unimi.dsi.fastutil.ints.Int2IntLinkedOpenHashMap;
import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap;

import java.io.IOException;

import org.apache.commons.io.IOUtils;
import org.junit.BeforeClass;
import org.junit.Test;

public class BigramCountingBase
{
    /* Prepare some test data */
    public static char [] DATA;

    /* Prevent dead code removal. */
    public volatile int guard;

    @BeforeClass
    public static void prepareData() throws IOException
    {
        byte [] dta = IOUtils.toByteArray(
            Thread.currentThread().getContextClassLoader().getResourceAsStream("books-polish.txt"));
        DATA = new String(dta, "UTF-8").toCharArray();
    }

    @Test
    public void hppc()
    {
        // [[[start:bigram-counting]]]
        // Some character data
        final char [] CHARS = DATA;
       
        // We'll use a int -> int map for counting. A bigram can be encoded
        // as an int by shifting one of the bigram's characters by 16 bits
        // and then ORing the other character to form a 32-bit int.
        final IntIntOpenHashMap map = new IntIntOpenHashMap(
            IntIntOpenHashMap.DEFAULT_CAPACITY,
            IntIntOpenHashMap.DEFAULT_LOAD_FACTOR);
   
        for (int i = 0; i < CHARS.length - 1; i++)
        {
            final int bigram = CHARS[i] << 16 | CHARS[i+1];
            map.putOrAdd(bigram, 1, 1);
        }
        // [[[end:bigram-counting]]]

        guard = map.size();
    }

    @Test
    public void trove()
    {
        final char [] CHARS = DATA;
        final TIntIntHashMap map = new TIntIntHashMap();
   
        for (int i = 0; i < CHARS.length - 1; i++)
        {
            final int bigram = CHARS[i] << 16 | CHARS[i+1];
            map.adjustOrPutValue(bigram, 1, 1);
        }
   
        guard = map.size();
    }

    @Test
    public void mahoutCollections()
    {
        final char [] CHARS = DATA;
        final org.apache.mahout.math.map.OpenIntIntHashMap map =
            new org.apache.mahout.math.map.OpenIntIntHashMap();
        for (int i = 0; i < CHARS.length - 1; i++)
        {
            final int bigram = CHARS[i] << 16 | CHARS[i+1];
            map.adjustOrPutValue(bigram, 1, 1);
        }
   
        guard = map.size();
    }

    @Test
    public void fastutilOpenHashMap()
    {
        final char [] CHARS = DATA;
        final Int2IntOpenHashMap map = new Int2IntOpenHashMap();
        map.defaultReturnValue(0);
   
        for (int i = 0; i < CHARS.length - 1; i++)
        {
            final int bigram = CHARS[i] << 16 | CHARS[i+1];
            map.addTo(bigram, 1);
        }
   
        guard = map.size();
    }

    @Test
    public void fastutilLinkedOpenHashMap()
    {
        final char [] CHARS = DATA;
        final Int2IntLinkedOpenHashMap map = new Int2IntLinkedOpenHashMap();
        map.defaultReturnValue(0);
   
        for (int i = 0; i < CHARS.length - 1; i++)
        {
            final int bigram = CHARS[i] << 16 | CHARS[i+1];
            map.addTo(bigram, 1);
        }
   
        guard = map.size();
    }
}
TOP

Related Classes of com.carrotsearch.hppc.BigramCountingBase

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.