Examples of edu.cmu.sphinx.linguist.WordSequence

edu.cmu.sphinx.linguist.WordSequence
This class can be used to keep track of a word sequence. This class is an immutable class. It can never be modified once it is created (except, perhaps for transient, cached things such as a precalculated hashcode).

        HashMap<WordSequence, Integer> bigrams = new HashMap<WordSequence, Integer>();
        HashMap<WordSequence, Integer> trigrams = new HashMap<WordSequence, Integer>();
        int wordCount = 0;
        
        if (words.size() > 0) {
            addSequence(unigrams, new WordSequence(words.get(0)));
            wordCount++;
        }
            
        if (words.size() > 1) {
            wordCount++;
            addSequence(unigrams, new WordSequence(words.get(1)));
            addSequence(bigrams, new WordSequence(words.get(0), words.get(1)));
        }


        if (words.size() > 2) {
            addSequence(bigrams, new WordSequence(words.get(1), words.get(2)));
            addSequence(trigrams, new WordSequence(words.get(0), words.get(1), words
                    .get(2)));
        }


        for (int i = 2; i < words.size(); ++i) {
            wordCount++;
            addSequence(unigrams, new WordSequence(words.get(i)));
            addSequence(bigrams, new WordSequence(words.get(i - 1), words.get(i)));
            addSequence(trigrams, new WordSequence(words.get(i - 2),
                                          words.get(i - 1),
                                          words.get(i)));
        }


        float discount = .5f;
        float deflate = 1 - discount;
        Map<WordSequence, Float> uniprobs = new HashMap<WordSequence, Float>();
        for (Map.Entry<WordSequence, Integer> e : unigrams.entrySet()) {
            uniprobs.put(e.getKey(),
                         (float) e.getValue() * deflate / wordCount);
        }


        LogMath lmath = LogMath.getLogMath();
        float logUnigramWeight = lmath.linearToLog(unigramWeight);
        float invLogUnigramWeight = lmath.linearToLog(1 - unigramWeight);
        float logUniformProb = -lmath.linearToLog(uniprobs.size());


        Set<WordSequence> sorted1grams = new TreeSet<WordSequence>(unigrams.keySet());
        Iterator<WordSequence> iter =
                new TreeSet<WordSequence>(bigrams.keySet()).iterator();
        WordSequence ws = iter.hasNext() ? iter.next() : null;
        for (WordSequence unigram : sorted1grams) {
            float p = lmath.linearToLog(uniprobs.get(unigram));
            p += logUnigramWeight;
            p = lmath.addAsLinear(p, logUniformProb + invLogUnigramWeight);
            logProbs.put(unigram, p);


            float sum = 0.f;
            while (ws != null) {
                int cmp = ws.getOldest().compareTo(unigram);
                if (cmp > 0) {
                    break;
                }
                if (cmp == 0) {
                    sum += uniprobs.get(ws.getNewest());
                }
                ws = iter.hasNext() ? iter.next() : null;
            }


            logBackoffs.put(unigram, lmath.linearToLog(discount / (1 - sum)));
        }


        Map<WordSequence, Float> biprobs = new HashMap<WordSequence, Float>();
        for (Map.Entry<WordSequence, Integer> entry : bigrams.entrySet()) {
            int unigramCount = unigrams.get(entry.getKey().getOldest());
            biprobs.put(entry.getKey(),
                        entry.getValue() * deflate / unigramCount);
        }


        Set<WordSequence> sorted2grams = new TreeSet<WordSequence>(bigrams.keySet());
        iter = new TreeSet<WordSequence>(trigrams.keySet()).iterator();
        ws = iter.hasNext() ? iter.next() : null;
        for (WordSequence biword : sorted2grams) {
            logProbs.put(biword, lmath.linearToLog(biprobs.get(biword)));


            float sum = 0.f;
            while (ws != null) {
                int cmp = ws.getOldest().compareTo(biword);
                if (cmp > 0) {
                    break;
                }
                if (cmp == 0) {
                    sum += biprobs.get(ws.getNewest());
                }
                ws = iter.hasNext() ? iter.next() : null;
            }
            logBackoffs.put(biword, lmath.linearToLog(discount / (1 - sum)));
        }

View Full Code Here

                    if (wordObject == null) {
                        wordObject = Word.UNKNOWN;
                    }
                    wordList.add(wordObject);
                }
                WordSequence wordSequence = new WordSequence(wordList);
                if (tok.hasMoreTokens()) {
                    log10Backoff = Float.parseFloat(tok.nextToken());
                }
                float logProb = logMath.log10ToLog(log10Prob);
                float logBackoff = logMath.log10ToLog(log10Backoff);

View Full Code Here

                    wordToClassProb = classProbability.getLogProbability();
                }
            }
        }
        float classBasedProbability =
            classLM.getProbability(new WordSequence(classes));
        return classBasedProbability + wordToClassProb;
    }

View Full Code Here

     *
     * @param t the token to add
     */
    public void add(Token t) {
        stateCount++;
        WordSequence ws = getWordSequence(t);
        WordStats stats = statMap.get(ws);
        if (stats == null) {
            stats = new WordStats(ws);
            statMap.put(ws, stats);
        }

View Full Code Here

     *
     * @param t the token to add
     */
    public void add(Token t) {
        stateCount++;
        WordSequence ws = getWordSequence(t);
        WordStats stats = statMap.get(ws);
        if (stats == null) {
            stats = new WordStats(ws);
            statMap.put(ws, stats);
        }

View Full Code Here

                Word word = wordState.getPronunciation().getWord();
                wordList.add(0, word);
            }
            token = token.getPredecessor();
        }
        return new WordSequence(wordList);
    }

View Full Code Here

0 1

TOP

Related Classes of edu.cmu.sphinx.linguist.WordSequence

edu.cmu.sphinx.decoder.search.stats.StateHistoryTracker

edu.cmu.sphinx.decoder.search.stats.WordTracker

edu.cmu.sphinx.linguist.language.classes.ClassBasedLanguageModel

edu.cmu.sphinx.linguist.language.grammar.LMGrammar

edu.cmu.sphinx.linguist.language.ngram.DynamicTrigramModel

edu.cmu.sphinx.linguist.language.ngram.DynamicTrigramModelTest

edu.cmu.sphinx.linguist.language.ngram.large.LargeNGramModel

edu.cmu.sphinx.linguist.language.ngram.large.LargeNgramTest

edu.cmu.sphinx.linguist.language.ngram.SimpleNGramModel

edu.cmu.sphinx.linguist.lextree.HMMTree

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.