Examples of SnowballStemmer


Examples of org.tartarus.snowball.SnowballStemmer

    for (String stemmerLanguage : stemmers) {
      System.out.println(stemmerLanguage);

      Class<?> stemClass = Class.forName("org.tartarus.snowball.ext."
            + stemmerLanguage + "Stemmer");
      SnowballStemmer stemmer = (SnowballStemmer) stemClass
            .newInstance();

      for (String stopWord : stopList) {
        StringBuilder sb = new StringBuilder();

        for (String part : stopWord.split(" ")) {
          if (sb.length() > 0)
            sb.append(" ");
          stemmer.setCurrent(part);
          stemmer.stem();
          sb.append(stemmer.getCurrent());
        }

        stopSet.add(sb.toString());
      }
    }
View Full Code Here

Examples of org.tartarus.snowball.SnowballStemmer

        fields.set(0, fields.get(0) + fields.get(1));
       
        String text = fields.get(3).toLowerCase();
       
        loadStopWords();
      SnowballStemmer stemmer = new org.tartarus.snowball.ext.englishStemmer();
     
      for(int i=0; i < stopWords.size(); i++)
      {
        text = text.replaceAll("[\\s]"+stopWords.get(i)+"[\\s]", " ");
      }
     
      Pattern p = Pattern.compile("[^a-zA-Z]+");
      String [] words = p.split(text);
      text = "";
      for(int i=0; i < words.length; i++)
      {
        if(words[i].length() < 3)
        {
          continue;
        }
       
        stemmer.setCurrent(words[i]);
        stemmer.stem();
        String stemmed = stemmer.getCurrent();
       
        if(!loadedWords.contains(stemmed))
        {
          continue;
        }
View Full Code Here

Examples of org.tartarus.snowball.SnowballStemmer

   
    int[] wordCounter = new int[loadedWords.size()];
   
    String text = fields.get(3).toLowerCase();
    loadStopWords();
    SnowballStemmer stemmer = new org.tartarus.snowball.ext.englishStemmer();
   
    for(int i=0; i < stopWords.size(); i++)
    {
      text = text.replaceAll("[\\s]"+stopWords.get(i)+"[\\s]", " ");
    }
   
    Pattern p = Pattern.compile("[^a-zA-Z]+");
    String [] words = p.split(text);
   
    for(int i=0; i < words.length; i++)
    {
      if(words[i].length() < 3)
      {
        continue;
      }
     
      stemmer.setCurrent(words[i]);
      stemmer.stem();
      String stemmed = stemmer.getCurrent();
     
      if(!loadedWords.contains(stemmed))
      {
        continue;
      }
View Full Code Here

Examples of org.tartarus.snowball.SnowballStemmer

  public Object[] getValues(Vector<String> fields,
      HashMap<String, Boolean> hashMap)
  {
    String text = fields.get(3).toLowerCase();
    loadStopWords();
    SnowballStemmer stemmer = new org.tartarus.snowball.ext.englishStemmer();
   
    for(int i=0; i < stopWords.size(); i++)
    {
      text = text.replaceAll("[\\s]"+stopWords.get(i)+"[\\s]", " ");
    }
   
    Pattern p = Pattern.compile("[^a-zA-Z]+");
    String [] words = p.split(text);
    text = "";
    int no_words = 0;
    for(int i=0; i < words.length; i++)
    {
      if(words[i].length() < 3)
      {
        continue;
      }
     
      stemmer.setCurrent(words[i]);
      stemmer.stem();
      String stemmed = stemmer.getCurrent();
     
     
      if(text.indexOf(stemmed) >= 0)
      {
        continue;
View Full Code Here

Examples of org.tartarus.snowball.SnowballStemmer

  public Object[] getValues(Vector<String> fields,
      HashMap<String, Boolean> hashMap)
  {
    String text = fields.get(3).toLowerCase();
    loadStopWords();
    SnowballStemmer stemmer = new org.tartarus.snowball.ext.englishStemmer();
   
    for(int i=0; i < stopWords.size(); i++)
    {
      text = text.replaceAll("[\\s]"+stopWords.get(i)+"[\\s]", " ");
    }
   
    Pattern p = Pattern.compile("[^a-zA-Z]+");
    String [] words = p.split(text);
    text = "";
    for(int i=0; i < words.length; i++)
    {
      if(words[i].length() < 3)
      {
        continue;
      }
     
      stemmer.setCurrent(words[i]);
      stemmer.stem();
      String stemmed = stemmer.getCurrent();
     
      if(VerifyParents.getParent(fields.get(1)) == null)
      {
        if(stemWordsTotal.containsKey(stemmed))
        {
View Full Code Here

Examples of weka.core.stemmers.SnowballStemmer

        private SnowballStemmer snowball;
        private final Pattern nonsensePattern;

        public NoNonsenseStemmer(boolean useSnowball) {
            if (useSnowball) {
                this.snowball = new SnowballStemmer();
            }

            this.nonsensePattern = Pattern.compile("^[\\p{Digit}\\p{Punct}]*$");
        }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.