Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.CharArraySet$CharArraySetIterator


  private final CharArraySet words;
  private final TermAttribute termAtt;

  public KeepWordFilter(TokenStream in, Set<String> words, boolean ignoreCase ) {
    super(in);
    this.words = new CharArraySet(words, ignoreCase);
    this.termAtt = (TermAttribute)addAttribute(TermAttribute.class);
  }
View Full Code Here


      try {
        File protectedWordFiles = new File(wordFiles);
        if (protectedWordFiles.exists()) {
          List<String> wlist = loader.getLines(wordFiles);
          //This cast is safe in Lucene
          protectedWords = new CharArraySet(wlist, false);//No need to go through StopFilter as before, since it just uses a List internally
        } else  {
          List<String> files = StrUtils.splitFileNames(wordFiles);
          for (String file : files) {
            List<String> wlist = loader.getLines(file.trim());
            if (protectedWords == null)
              protectedWords = new CharArraySet(wlist, false);
            else
              protectedWords.addAll(wlist);
          }
        }
      } catch (IOException e) {
View Full Code Here

      try {
        List<String> files = StrUtils.splitFileNames(commonWordFiles);
        if (commonWords == null && files.size() > 0) {
          // default stopwords list has 35 or so words, but maybe don't make it
          // that big to start
          commonWords = new CharArraySet(files.size() * 10, ignoreCase);
        }
        for (String file : files) {
          List<String> wlist = loader.getLines(file.trim());
          // TODO: once StopFilter.makeStopSet(List) method is available, switch
          // to using that so we can avoid a toArray() call
View Full Code Here

          new String[] { "знан", "эт", "хран", "тайн" });
    }
   
   
    public void testWithStemExclusionSet() throws Exception {
      CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
      set.add("представление");
      Analyzer a = new RussianAnalyzer(TEST_VERSION_CURRENT, RussianAnalyzer.getDefaultStopSet() , set);
      assertAnalyzesToReuse(a, "Вместе с тем о силе электромагнитной энергии имели представление еще",
          new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представление" });
    
    }
View Full Code Here

   * @param articles a set of stopword articles
   */
  public ElisionFilter(Version matchVersion, TokenStream input, Set<?> articles) {
    super(input);
    this.articles = CharArraySet.unmodifiableSet(
        new CharArraySet(matchVersion, articles, true));
  }
View Full Code Here

   * @deprecated use {@link #ElisionFilter(Version, TokenStream, Set)} instead
   */
  @Deprecated
  public ElisionFilter(TokenStream input, String[] articles) {
    this(Version.LUCENE_CURRENT, input,
        new CharArraySet(Version.LUCENE_CURRENT,
            Arrays.asList(articles), true));
  }
View Full Code Here

  }
 
  public void testExclusionTableBWCompat() throws IOException {
    GermanStemFilter filter = new GermanStemFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT,
        new StringReader("Fischen Trinken")));
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set.add("fischen");
    filter.setExclusionSet(set);
    assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
  }
View Full Code Here

    filter.setExclusionSet(set);
    assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
  }

  public void testWithKeywordAttribute() throws IOException {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set.add("fischen");
    GermanStemFilter filter = new GermanStemFilter(
        new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
            "Fischen Trinken")), set));
    assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
  }
View Full Code Here

            "Fischen Trinken")), set));
    assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
  }

  public void testWithKeywordAttributeAndExclusionTable() throws IOException {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set.add("fischen");
    CharArraySet set1 = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set1.add("trinken");
    set1.add("fischen");
    GermanStemFilter filter = new GermanStemFilter(
        new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
            "Fischen Trinken")), set));
    filter.setExclusionSet(set1);
    assertTokenStreamContents(filter, new String[] { "fischen", "trinken" });
View Full Code Here

    assertAnalyzesTo(cz, "e", new String[] { "e" });
    assertAnalyzesTo(cz, "zi", new String[] { "zi" });
  }
 
  public void testWithKeywordAttribute() throws IOException {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set.add("hole");
    CzechStemFilter filter = new CzechStemFilter(new KeywordMarkerFilter(
        new MockTokenizer(new StringReader("hole desek"), MockTokenizer.WHITESPACE, false), set));
    assertTokenStreamContents(filter, new String[] { "hole", "desk" });
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.CharArraySet$CharArraySetIterator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.