Package org.apache.lucene.analysis.miscellaneous

Examples of org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter


    final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false);
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
        return new TokenStreamComponents(source, new PortugueseMinimalStemFilter(sink));
      }
    };
    checkOneTerm(a, "quilométricas", "quilométricas");
  }
View Full Code Here


    assertTokenStreamContents(filter, new String[]{"lucene", "lucen", "is", "awesome"}, new int[] {1, 0, 1, 1});
   
    // assert with keywork marker
    tokenizer = new MockTokenizer(new StringReader("lucene is awesome"), MockTokenizer.WHITESPACE, true);
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList("Lucene"), true);
    filter = new HunspellStemFilter(new SetKeywordMarkerFilter(tokenizer, set), DICTIONARY, _TestUtil.nextInt(random(), 1, 3));
    assertTokenStreamContents(filter, new String[]{"lucene", "is", "awesome"}, new int[] {1, 1, 1});
  }
View Full Code Here

    final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("elefantes"), false);
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
        return new TokenStreamComponents(source, new GalicianMinimalStemFilter(sink));
      }
    };
    checkOneTerm(a, "elefantes", "elefantes");
  }
View Full Code Here

        final CharArraySet keywords = new CharArraySet(version, 1, false);
        keywords.add("liście");

        final Tokenizer src = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
        TokenStream result = new StandardFilter(TEST_VERSION_CURRENT, src);
        result = new SetKeywordMarkerFilter(result, keywords);
        result = new MorfologikFilter(result, dictionary, TEST_VERSION_CURRENT);

        return new TokenStreamComponents(src, result);
      }
    };
View Full Code Here

 
  public void testWithKeywordAttribute() throws IOException {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set.add("Brasília");
    BrazilianStemFilter filter = new BrazilianStemFilter(
        new SetKeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
            "Brasília Brasilia")), set));
    assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
  }
View Full Code Here

    final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sekretæren"), false);
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
        return new TokenStreamComponents(source, new NorwegianMinimalStemFilter(sink));
      }
    };
    checkOneTerm(a, "sekretæren", "sekretæren");
  }
View Full Code Here

    final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sekretæren"), false);
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
        return new TokenStreamComponents(source, new NorwegianLightStemFilter(sink));
      }
    };
    checkOneTerm(a, "sekretæren", "sekretæren");
  }
View Full Code Here

  public void testWithKeywordAttribute() throws IOException {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set.add("ساهدهات");
    ArabicLetterTokenizer tokenStream  = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader("ساهدهات"));

    ArabicStemFilter filter = new ArabicStemFilter(new SetKeywordMarkerFilter(tokenStream, set));
    assertTokenStreamContents(filter, new String[]{"ساهدهات"});
  }
View Full Code Here

 
  public void testWithKeywordAttribute() throws IOException {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set.add("yourselves");
    Tokenizer tokenizer = new MockTokenizer(new StringReader("yourselves yours"), MockTokenizer.WHITESPACE, false);
    TokenStream filter = new PorterStemFilter(new SetKeywordMarkerFilter(tokenizer, set));  
    assertTokenStreamContents(filter, new String[] {"yourselves", "your"});
  }
View Full Code Here

    final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("chevaux"), false);
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
        return new TokenStreamComponents(source, new FrenchMinimalStemFilter(sink));
      }
    };
    checkOneTerm(a, "chevaux", "chevaux");
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.