Examples of SentenceTokenizer


Examples of org.apache.lucene.analysis.cn.smart.SentenceTokenizer

      throws IOException {
    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
      streams = new SavedStreams();
      setPreviousTokenStream(streams);
      streams.tokenStream = new SentenceTokenizer(reader);
      streams.filteredTokenStream = new WordTokenFilter(streams.tokenStream);
      streams.filteredTokenStream = new PorterStemFilter(streams.filteredTokenStream);
      if (!stopWords.isEmpty()) {
        streams.filteredTokenStream = new StopFilter(matchVersion, streams.filteredTokenStream, stopWords, false);
      }
View Full Code Here

Examples of org.languagetool.tokenizers.SentenceTokenizer

 
  @Override
  public void setUp() {
    tagger = new EnglishTagger();
    tokenizer = new WordTokenizer();
    sentenceTokenizer = new SentenceTokenizer();
    disambiguator = new EnglishRuleDisambiguator();
    disamb2 = new DemoDisambiguator();
  }
View Full Code Here

Examples of org.languagetool.tokenizers.SentenceTokenizer

  @Override
  public void setUp() {
    tagger = new RomanianTagger();
    tokenizer = new RomanianWordTokenizer();
    sentenceTokenizer = new SentenceTokenizer();
    disambiguator = new RomanianRuleDisambiguator();
    disamb2 = new DemoDisambiguator();
  }
View Full Code Here

Examples of org.languagetool.tokenizers.SentenceTokenizer

 
  @Override
  public void setUp() {
    tagger = new FrenchTagger();
    tokenizer = new WordTokenizer();
    sentenceTokenizer = new SentenceTokenizer();
    disambiguator = new FrenchRuleDisambiguator();
    disamb2 = new DemoDisambiguator();   
    try {
      lt = new JLanguageTool(new French());
    } catch (IOException e) {
View Full Code Here

Examples of org.languagetool.tokenizers.SentenceTokenizer

      printMatches(ruleMatches, prevMatches, contents, contextSize);
    }

    //display stats if it's not in a buffered mode
    if (xmlMode == StringTools.XmlPrintMode.NORMAL_XML) {
      SentenceTokenizer sentenceTokenizer = lt.getLanguage().getSentenceTokenizer();
      int sentenceCount = sentenceTokenizer.tokenize(contents).size();
      displayTimeStats(startTime, sentenceCount, apiFormat);
    }
    return ruleMatches.size();
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.