Package wordcram

Source Code of wordcram.WordCounter

package wordcram;

import java.util.*;
import java.util.Map.Entry;

import cue.lang.Counter;
import cue.lang.WordIterator;
import cue.lang.stop.StopWords;

class WordCounter {

    private StopWords cueStopWords;
    private Set<String> extraStopWords = new HashSet<String>();
    private boolean excludeNumbers;

    public WordCounter() {
        this(null);
    }
    public WordCounter(StopWords cueStopWords) {
        this.cueStopWords = cueStopWords;
    }

    public WordCounter withExtraStopWords(String extraStopWordsString) {
        String[] stopWordsArray = extraStopWordsString.toLowerCase().split(" ");
        extraStopWords = new HashSet<String>(Arrays.asList(stopWordsArray));
        return this;
    }

    public WordCounter shouldExcludeNumbers(boolean shouldExcludeNumbers) {
        excludeNumbers = shouldExcludeNumbers;
        return this;
    }

  public Word[] count(String text, RenderOptions renderOptions) {
    if (cueStopWords == null) {
      cueStopWords = StopWords.guess(text);

            if (cueStopWords == StopWords.Arabic ||
                cueStopWords == StopWords.Farsi ||
                cueStopWords == StopWords.Hebrew) {
                renderOptions.rightToLeft = true;
            }

      tellScripterAboutTheGuess(cueStopWords);
    }
    return countWords(text);
  }

  private void tellScripterAboutTheGuess(StopWords stopWords) {
    // TODO Find a better way to do this; it prints out during the tests. =p
    if (stopWords == null) {
      System.out.println("cue.language can't guess what language your text is in.");
    } else {
      System.out.println("cue.language guesses your text is in " + stopWords);
    }
  }

    private Word[] countWords(String text) {
        Counter<String> counter = new Counter<String>();

        for (String word : new WordIterator(text)) {
            if (shouldCountWord(word)) {
                counter.note(word);
            }
        }

        List<Word> words = new ArrayList<Word>();

        for (Entry<String, Integer> entry : counter.entrySet()) {
            words.add(new Word(entry.getKey(), (int)entry.getValue()));
        }

        return words.toArray(new Word[0]);
    }

    private boolean shouldCountWord(String word) {
        return !isStopWord(word) && !(excludeNumbers && isNumeric(word));
    }

    private boolean isNumeric(String word) {
        try {
            Double.parseDouble(word);
            return true;
        }
        catch (NumberFormatException x) {
            return false;
        }
    }

    private boolean isStopWord(String word) {
      boolean cueSaysStopWord = cueStopWords != null && cueStopWords.isStopWord(word);
      boolean extraSaysStopWord = extraStopWords.contains(word.toLowerCase());
        return  cueSaysStopWord || extraSaysStopWord;
    }

}
TOP

Related Classes of wordcram.WordCounter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.