Package org.tartarus.snowball.ext

Examples of org.tartarus.snowball.ext.englishStemmer


  public static void main(String[] args) throws Exception {
    LuceneIndexerSearcher lis = new LuceneIndexerSearcher(false);
    IndexSearcher is = lis.getIndexSearcher();
   
    Stemmer stemmerTools = new Stemmer(new EnglishStemmer());
   
    //QueryParser qp = new Oscar3QueryParser("txt", new Oscar3Analyzer(), lis, false);
    //Query q = qp.parse("NaCl");
   
    String queryTerm = "content";
View Full Code Here


   
    return results;
  }
 
  public Map<String,List<String>> ngramsByStem() {
    Stemmer st = new Stemmer(new EnglishStemmer());
    Set<String> terms = new HashSet<String>();
    for(SubstringClass sc : classArray) {
      for(String s : sc.getSuffixStrings(2)) {
        if(!checkTerm(s)) continue;
        terms.add(s);
View Full Code Here

import uk.ac.cam.ch.wwmm.ptclib.string.StringTools;

public class DocClassifier {

  public static Event docToEvent(IndexReader ir, int doc, String cue) throws Exception {
    Stemmer st = new Stemmer(new EnglishStemmer());
    List<String> words = new ArrayList<String>();
    boolean hasCue = false;
    TermFreqVector tvf = ir.getTermFreqVector(doc, "txt");
    String [] termArray = tvf.getTerms();
    int [] termFreqs = tvf.getTermFrequencies();
View Full Code Here

import java.util.Map;

public class TestSnowballPorterFilterFactory extends BaseTokenStreamTestCase {

  public void test() throws IOException {
    EnglishStemmer stemmer = new EnglishStemmer();
    String[] test = {"The", "fledgling", "banks", "were", "counting", "on", "a", "big", "boom", "in", "banking"};
    String[] gold = new String[test.length];
    for (int i = 0; i < test.length; i++) {
      stemmer.setCurrent(test[i]);
      stemmer.stem();
      gold[i] = stemmer.getCurrent();
    }

    SnowballPorterFilterFactory factory = new SnowballPorterFilterFactory();
    Map<String, String> args = new HashMap<String, String>();
    args.put("language", "English");
View Full Code Here

public class TestSnowballPorterFilterFactory extends BaseTokenStreamFactoryTestCase {

  public void test() throws Exception {
    String text = "The fledgling banks were counting on a big boom in banking";
    EnglishStemmer stemmer = new EnglishStemmer();
    String[] test = text.split("\\s");
    String[] gold = new String[test.length];
    for (int i = 0; i < test.length; i++) {
      stemmer.setCurrent(test[i]);
      stemmer.stem();
      gold[i] = stemmer.getCurrent();
    }
   
    Reader reader = new StringReader(text);
    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
    stream = tokenFilterFactory("SnowballPorter", "language", "English").create(stream);
View Full Code Here

import java.util.Collections;

public class EnglishPorterFilterFactoryTest extends BaseTokenTestCase {

  public void test() throws IOException {
    EnglishStemmer stemmer = new EnglishStemmer();
    String[] test = {"The", "fledgling", "banks", "were", "counting", "on", "a", "big", "boom", "in", "banking"};
    StringBuilder gold = new StringBuilder();
    for (int i = 0; i < test.length; i++) {
      stemmer.setCurrent(test[i]);
      stemmer.stem();
      gold.append(stemmer.getCurrent()).append(' ');
    }

    EnglishPorterFilterFactory factory = new EnglishPorterFilterFactory();
    Map<String, String> args = new HashMap<String, String>();
View Full Code Here

    String out = tsToString(factory.create(new IterTokenStream(test)));
    assertEquals(gold.toString().trim(), out);
  }

  public void testProtected() throws Exception {
    EnglishStemmer stemmer = new EnglishStemmer();
    String[] test = {"The", "fledgling", "banks", "were", "counting", "on", "a", "big", "boom", "in", "banking"};
    StringBuilder gold = new StringBuilder();
    for (int i = 0; i < test.length; i++) {
      if (test[i].equals("fledgling") == false && test[i].equals("banks") == false) {
        stemmer.setCurrent(test[i]);
        stemmer.stem();
        gold.append(stemmer.getCurrent()).append(' ');
      } else {
        gold.append(test[i]).append(' ');
      }
    }
View Full Code Here

    /**
     * {@inheritDoc}
     */
    public String stem(String token) {
        englishStemmer stemmer = new englishStemmer();
        stemmer.setCurrent(token);
        stemmer.stem();
        return stemmer.getCurrent();
    }
View Full Code Here

      // initialize stemmers
      this.stemmers = new HashMap<String, SnowballProgram>();
      this.stemmers.put("da", new danishStemmer());
      this.stemmers.put("nl", new dutchStemmer());
      this.stemmers.put("en", new englishStemmer());
      this.stemmers.put("fi", new finnishStemmer());
      this.stemmers.put("fr", new frenchStemmer());
      this.stemmers.put("de", new germanStemmer());
      this.stemmers.put("hu", new hungarianStemmer());
      this.stemmers.put("it", new italianStemmer());
View Full Code Here

      // initialize stemmers
      this.stemmers = new HashMap();
      this.stemmers.put("da", new danishStemmer());
      this.stemmers.put("nl", new dutchStemmer());
      this.stemmers.put("en", new englishStemmer());
      this.stemmers.put("fi", new finnishStemmer());
      this.stemmers.put("fr", new frenchStemmer());
      this.stemmers.put("de", new germanStemmer());
      this.stemmers.put("hu", new hungarianStemmer());
      this.stemmers.put("it", new italianStemmer());
View Full Code Here

TOP

Related Classes of org.tartarus.snowball.ext.englishStemmer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.