Package edu.cmu.sphinx.decoder.search.stats

Source Code of edu.cmu.sphinx.decoder.search.stats.WordTracker

package edu.cmu.sphinx.decoder.search.stats;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import edu.cmu.sphinx.decoder.search.Token;
import edu.cmu.sphinx.linguist.WordSearchState;
import edu.cmu.sphinx.linguist.WordSequence;
import edu.cmu.sphinx.linguist.dictionary.Word;

/** A class that keeps track of word histories */

public class WordTracker {

    final Map<WordSequence, WordStats> statMap;
    final int frameNumber;
    int stateCount;
    int maxWordHistories;


    /**
     * Creates a word tracker for the given frame number
     *
     * @param frameNumber the frame number
     */
    public WordTracker(int frameNumber) {
        statMap = new HashMap<WordSequence, WordStats>();
        this.frameNumber = frameNumber;
    }


    /**
     * Adds a word history for the given token to the word tracker
     *
     * @param t the token to add
     */
    public void add(Token t) {
        stateCount++;
        WordSequence ws = getWordSequence(t);
        WordStats stats = statMap.get(ws);
        if (stats == null) {
            stats = new WordStats(ws);
            statMap.put(ws, stats);
        }
        stats.update(t);
    }


    /** Dumps the word histories in the tracker */
    public void dump() {
        dumpSummary();
        List<WordStats> stats = new ArrayList<WordStats>(statMap.values());
        Collections.sort(stats, WordStats.COMPARATOR);
        for (WordStats stat : stats) {
            System.out.println("   " + stat);
        }
    }


    /** Dumps summary information in the tracker */
    void dumpSummary() {
        System.out.println("Frame: " + frameNumber + " states: " + stateCount
                + " histories " + statMap.size());
    }


    /**
     * Given a token, gets the word sequence represented by the token
     *
     * @param token the token of interest
     * @return the word sequence for the token
     */
    private WordSequence getWordSequence(Token token) {
        List<Word> wordList = new LinkedList<Word>();

        while (token != null) {
            if (token.isWord()) {
                WordSearchState wordState = (WordSearchState) token
                        .getSearchState();
                Word word = wordState.getPronunciation().getWord();
                wordList.add(0, word);
            }
            token = token.getPredecessor();
        }
        return new WordSequence(wordList);
    }
   
    /** Keeps track of statistics for a particular word sequence */

    static class WordStats {

        public final static Comparator<WordStats> COMPARATOR = new Comparator<WordStats>() {
            public int compare(WordStats ws1, WordStats ws2) {
                if (ws1.maxScore > ws2.maxScore) {
                    return -1;
                } else if (ws1.maxScore == ws2.maxScore) {
                    return 0;
                } else {
                    return 1;
                }
            }
        };

        private int size;
        private float maxScore;
        private float minScore;
        private final WordSequence ws;

        /**
         * Creates a word statistics for the given sequence
         *
         * @param ws the word sequence
         */
        WordStats(WordSequence ws) {
            size = 0;
            maxScore = -Float.MAX_VALUE;
            minScore = Float.MAX_VALUE;
            this.ws = ws;
        }


        /**
         * Updates the statistics based upon the scores for the given token
         *
         * @param t the token
         */
        void update(Token t) {
            size++;
            if (t.getScore() > maxScore) {
                maxScore = t.getScore();
            }
            if (t.getScore() < minScore) {
                minScore = t.getScore();
            }
        }


        /**
         * Returns a string representation of the statistics
         *
         * @return a string representation
         */
        @Override
        public String toString() {
            return "states:" + size + " max:" + maxScore + " min:" + minScore + ' '
                    + ws;
        }
    }
}
TOP

Related Classes of edu.cmu.sphinx.decoder.search.stats.WordTracker

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.