Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.ReusableAnalyzerBase


    String tf = args.get("tokenizerFactory");

    final TokenizerFactory factory = tf == null ? null : loadTokenizerFactory(loader, tf, args);
   
    Analyzer analyzer = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_31, reader) : factory.create(reader);
        TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_31, tokenizer) : tokenizer;
        return new TokenStreamComponents(tokenizer, stream);
View Full Code Here


  }
 
  /** blast some random strings through the analyzer */
  public void testRandomStrings() throws Exception {
    final Transliterator transform = Transliterator.getInstance("Any-Latin");
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
        return new TokenStreamComponents(tokenizer, new ICUTransformFilter(tokenizer, transform));
      }
View Full Code Here

*/
@Test
public class UniqueTokenFilterTests {

    @Test public void simpleTest() throws IOException {
        Analyzer analyzer = new ReusableAnalyzerBase() {
            @Override
            protected TokenStreamComponents createComponents(String fieldName,
                                                             Reader reader) {
                Tokenizer t = new WhitespaceTokenizer(Lucene.VERSION, reader);
                return new TokenStreamComponents(t, new UniqueTokenFilter(t));
            }
        };

        TokenStream test = analyzer.reusableTokenStream("test", new StringReader("this test with test"));
        CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class);
        assertThat(test.incrementToken(), equalTo(true));
        assertThat(termAttribute.toString(), equalTo("this"));

        assertThat(test.incrementToken(), equalTo(true));
View Full Code Here

*/
@Test
public class TruncateTokenFilterTests {

    @Test public void simpleTest() throws IOException {
        Analyzer analyzer = new ReusableAnalyzerBase() {
            @Override
            protected TokenStreamComponents createComponents(String fieldName,
                                                             Reader reader) {
                Tokenizer t = new WhitespaceTokenizer(Lucene.VERSION, reader);
                return new TokenStreamComponents(t, new TruncateTokenFilter(t, 3));
            }
        };

        TokenStream test = analyzer.reusableTokenStream("test", new StringReader("a bb ccc dddd eeeee"));
        CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class);
        assertThat(test.incrementToken(), equalTo(true));
        assertThat(termAttribute.toString(), equalTo("a"));

        assertThat(test.incrementToken(), equalTo(true));
View Full Code Here

      "a\\=>a => b\\=>b\n" +
      "a\\,a => b\\,b";
    SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random, MockTokenizer.KEYWORD, false));
    parser.add(new StringReader(testFile));
    final SynonymMap map = parser.build();
    Analyzer analyzer = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
        return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
      }
View Full Code Here

        add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
      }
      final SynonymMap map = b.build();
      final boolean ignoreCase = random.nextBoolean();
     
      final Analyzer analyzer = new ReusableAnalyzerBase() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
          Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
          return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
        }
View Full Code Here

     
    SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random));
    parser.add(new StringReader(testFile));
    final SynonymMap map = parser.build();
     
    Analyzer analyzer = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
        return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true));
      }
View Full Code Here

    add("a", "aa", keepOrig);
    add("b", "bb", keepOrig);
    add("z x c v", "zxcv", keepOrig);
    add("x c", "xc", keepOrig);
    final SynonymMap map = b.build();
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true));
      }
View Full Code Here

    final boolean keepOrig = false;
    add("a b", "ab", keepOrig);
    add("a b", "ab", keepOrig);
    add("a b", "ab", keepOrig);
    final SynonymMap map = b.build();
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true));
      }
View Full Code Here

    final boolean keepOrig = false;
    add("a b", "ab", keepOrig);
    add("a b", "ab", keepOrig);
    add("a b", "ab", keepOrig);
    final SynonymMap map = b.build();
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true));
      }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.ReusableAnalyzerBase

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.