Package me.shenfeng.mmseg

Source Code of me.shenfeng.mmseg.PerformanceTest

package me.shenfeng.mmseg;

import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;

public class PerformanceTest {

    private static Logger logger = LoggerFactory
            .getLogger(PerformanceTest.class);


    String datastr;
    Dictionary bs;
    Dictionary trie;
    Dictionary hash;

    @Before
    public void setup() throws IOException {
        InputStream is = PerformanceTest.class.getClassLoader()
                .getResourceAsStream("data/words.dic");

        InputStream is2 = PerformanceTest.class.getClassLoader()
                .getResourceAsStream("data/words.dic");

        InputStream is3 = PerformanceTest.class.getClassLoader()
                .getResourceAsStream("data/words.dic");

        InputStream is4 = PerformanceTest.class.getClassLoader()
                .getResourceAsStream("data/words.dic");


        datastr = getBook();

        hash = new HashSetDictionary(is);
        bs = new BSDictionary(is2);
        trie = new TrieDictionary(is3);
//        new me.shenfeng.mmseg.Trie2Dictionary(is4);
    }

    public static String getBook() throws IOException {
        char[] book1 = Utils.getCharsFromResource("book1.txt");
        char[] book2 = Utils.getCharsFromResource("book2.txt");

        // copy as one array
        char[] data = Arrays.copyOf(book1, book1.length + book2.length);
        System.arraycopy(book2, 0, data, book1.length - 1, book2.length);

        return new String(data);
    }

    @Test
    public void testPerf() throws IOException {
        logger.info("Warm up");
        for (int i = 0; i < 10; i++) { // warm up

            boolean p = true;
            SimpleMMsegTokenizer tokenizer = new SimpleMMsegTokenizer(hash,
                    new StringReader(datastr));
            loopResult(tokenizer, hash, p);

            tokenizer = new SimpleMMsegTokenizer(bs,
                    new StringReader(datastr));
            loopResult(tokenizer, bs, p);

            tokenizer = new SimpleMMsegTokenizer(trie,
                    new StringReader(datastr));
            loopResult(tokenizer, trie, p);

        }

        SimpleMMsegTokenizer tokenizer = new SimpleMMsegTokenizer(hash,
                new StringReader(datastr));

        loopResult(tokenizer, hash, true);
        for (int i = 0; i < 3; ++i) {
            tokenizer = new SimpleMMsegTokenizer(bs,
                    new StringReader(datastr));
            loopResult(tokenizer, bs, true);
        }
    }

    String loopResult(Tokenizer tokenizer, Dictionary dic, boolean print)
            throws IOException {
        long start = System.currentTimeMillis();
        CharTermAttribute termAtt = tokenizer
                .getAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAtt = tokenizer
                .getAttribute(OffsetAttribute.class);
        String i = "";
        while (tokenizer.incrementToken()) {
            String word = new String(termAtt.buffer(), 0, termAtt.length());
            // int s = offsetAtt.startOffset();
            // int e = offsetAtt.endOffset();
            i = word;
        }
        if (print) {
            long time = System.currentTimeMillis() - start;
            logger.info("Dictionary: {} takes {}ms to seg {} char",
                    new Object[]{dic.getClass().getSimpleName(), time,
                            datastr.length()}
            );

        }
        return i; // prevent jit
    }
}
TOP

Related Classes of me.shenfeng.mmseg.PerformanceTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.