Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.KeywordTokenizer


    this.collator = collator;
  }

  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new KeywordTokenizer(reader);
    result = new ICUCollationKeyFilter(result, collator);
    return result;
  }
View Full Code Here


    throws IOException {
   
    SavedStreams streams = (SavedStreams)getPreviousTokenStream();
    if (streams == null) {
      streams = new SavedStreams();
      streams.source = new KeywordTokenizer(reader);
      streams.result = new ICUCollationKeyFilter(streams.source, collator);
      setPreviousTokenStream(streams);
    } else {
      streams.source.reset(reader);
    }
View Full Code Here

  public void testStem() throws IOException {
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName,
          Reader reader) {
        Tokenizer t = new KeywordTokenizer(reader);
        return new TokenStreamComponents(t, new RussianStemFilter(t));
      }
    };
    InputStream voc = getClass().getResourceAsStream("wordsUTF8.txt");
    InputStream out = getClass().getResourceAsStream("stemsUTF8.txt");
View Full Code Here

  public void testStemming() throws Exception {
    Analyzer analyzer = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName,
          Reader reader) {
        Tokenizer t = new KeywordTokenizer(reader);
        return new TokenStreamComponents(t,
            new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t)));
      }
    };
   
View Full Code Here

   
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName,
          Reader reader) {
        Tokenizer t = new KeywordTokenizer(reader);
        return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
     
    };
   
    assertVocabulary(a, getDataFile("TestSnowballVocabData.zip"),
View Full Code Here

 
  public void testOptimizer() throws Exception {
    String rules = "a > b; b > c;"; // convert a's to b's and b's to c's
    Transliterator custom = Transliterator.createFromRules("test", rules, Transliterator.FORWARD);
    assertTrue(custom.getFilter() == null);
    new ICUTransformFilter(new KeywordTokenizer(new StringReader("")), custom);
    assertTrue(custom.getFilter().equals(new UnicodeSet("[ab]")));
  }
View Full Code Here

 
  public void testOptimizerSurrogate() throws Exception {
    String rules = "\\U00020087 > x;"; // convert CJK UNIFIED IDEOGRAPH-20087 to an x
    Transliterator custom = Transliterator.createFromRules("test", rules, Transliterator.FORWARD);
    assertTrue(custom.getFilter() == null);
    new ICUTransformFilter(new KeywordTokenizer(new StringReader("")), custom);
    assertTrue(custom.getFilter().equals(new UnicodeSet("[\\U00020087]")));
  }
View Full Code Here

    new ICUTransformFilter(new KeywordTokenizer(new StringReader("")), custom);
    assertTrue(custom.getFilter().equals(new UnicodeSet("[\\U00020087]")));
  }

  private void checkToken(Transliterator transform, String input, String expected) throws IOException {
    TokenStream ts = new ICUTransformFilter(new KeywordTokenizer((new StringReader(input))), transform);
    assertTokenStreamContents(ts, new String[] { expected });
  }
View Full Code Here

    // lets make booked stem to books
    // the override filter will convert "booked" to "books",
    // but also mark it with KeywordAttribute so Porter will not change it.
    Map<String,String> dictionary = new HashMap<String,String>();
    dictionary.put("booked", "books");
    Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked"));
    TokenStream stream = new PorterStemFilter(
        new StemmerOverrideFilter(TEST_VERSION_CURRENT, tokenizer, dictionary));
    assertTokenStreamContents(stream, new String[] { "books" });
  }
View Full Code Here

      _collator = collator;
    }

    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      TokenStream result = new KeywordTokenizer(reader);
      result = new CollationKeyFilter(result, _collator);
      return result;
    }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.KeywordTokenizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.