Package com.mapr.synth.distributions

Examples of com.mapr.synth.distributions.TermGenerator


    private static final WordGenerator WORDS = new WordGenerator("word-frequency-seed", "other-words");

    @Test
    public void generateTerms() {
        TermGenerator x = new TermGenerator(WORDS, 1, 0.8);
        final Multiset<String> counts = HashMultiset.create();
        for (int i = 0; i < 10000; i++) {
            counts.add(x.sample());
        }

        assertEquals(10000, counts.size());
        assertTrue("Should have some common words", counts.elementSet().size() < 10000);
        List<Integer> k = Lists.newArrayList(Iterables.transform(counts.elementSet(), new Function<String, Integer>() {
View Full Code Here


        assertTrue(counts.count("the") > 300);
    }

    @Test
    public void distinctVocabularies() {
        TermGenerator x1 = new TermGenerator(WORDS, 1, 0.8);
        final Multiset<String> k1 = HashMultiset.create();
        for (int i = 0; i < 50000; i++) {
            k1.add(x1.sample());
        }

        TermGenerator x2 = new TermGenerator(WORDS, 1, 0.8);
        final Multiset<String> k2 = HashMultiset.create();
        for (int i = 0; i < 50000; i++) {
            k2.add(x2.sample());
        }

        final NormalDistribution normal = new NormalDistribution();
        List<Double> scores = Ordering.natural().sortedCopy(Iterables.transform(k1.elementSet(),
                new Function<String, Double>() {
View Full Code Here

TOP

Related Classes of com.mapr.synth.distributions.TermGenerator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.