Examples of KeywordTokenizer


Examples of org.apache.lucene.analysis.core.KeywordTokenizer

    args.put("custom", "rules.txt");
    args.put("strength", "primary");
    ICUCollationKeyFilterFactory factory = new ICUCollationKeyFilterFactory(args);
    factory.inform(new StringMockResourceLoader(tailoredRules));
    TokenStream tsUmlaut = factory.create(
        new KeywordTokenizer(new StringReader(germanUmlaut)));
    TokenStream tsOE = factory.create(
        new KeywordTokenizer(new StringReader(germanOE)));

    assertCollatesToSame(tsUmlaut, tsOE);
  }
View Full Code Here

Examples of org.apache.lucene.analysis.core.KeywordTokenizer

      final boolean ignoreCase = random.nextBoolean();
     
      final Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
          Tokenizer tokenizer = new KeywordTokenizer(reader);
          return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
        }
      };

      checkAnalysisConsistency(random, analyzer, random.nextBoolean(), "");
View Full Code Here

Examples of org.apache.lucene.analysis.core.KeywordTokenizer

  public void testEmptyTerm() throws Exception {
    Random random = random();
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer,
            new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, EdgeNGramTokenFilter.Side.FRONT, 2, 15));
      }   
    };
    checkAnalysisConsistency(random, a, random.nextBoolean(), "");
   
    Analyzer b = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer,
            new EdgeNGramTokenFilter(Version.LUCENE_43, tokenizer, EdgeNGramTokenFilter.Side.BACK, 2, 15));
      }   
    };
    checkAnalysisConsistency(random, b, random.nextBoolean(), "");
View Full Code Here

Examples of org.apache.lucene.analysis.core.KeywordTokenizer

  public void testSupplementaryCharacters() throws IOException {
    final String s = _TestUtil.randomUnicodeString(random(), 10);
    final int codePointCount = s.codePointCount(0, s.length());
    final int minGram = _TestUtil.nextInt(random(), 1, 3);
    final int maxGram = _TestUtil.nextInt(random(), minGram, 10);
    TokenStream tk = new KeywordTokenizer(new StringReader(s));
    tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
    final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class);
    final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class);
    tk.reset();
    for (int i = minGram; i <= Math.min(codePointCount, maxGram); ++i) {
      assertTrue(tk.incrementToken());
      assertEquals(0, offsetAtt.startOffset());
      assertEquals(s.length(), offsetAtt.endOffset());
      final int end = Character.offsetByCodePoints(s, 0, i);
      assertEquals(s.substring(0, end), termAtt.toString());
    }
    assertFalse(tk.incrementToken());
  }
View Full Code Here

Examples of org.apache.lucene.analysis.core.KeywordTokenizer

 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new CodepointCountFilter(TEST_VERSION_CURRENT, tokenizer, 0, 5));
      }
    };
    checkOneTerm(a, "", "");
  }
View Full Code Here

Examples of org.apache.lucene.analysis.core.KeywordTokenizer

      String text = _TestUtil.randomUnicodeString(random(), 100);
      int min = _TestUtil.nextInt(random(), 0, 100);
      int max = _TestUtil.nextInt(random(), 0, 100);
      int count = text.codePointCount(0, text.length());
      boolean expected = count >= min && count <= max;
      TokenStream stream = new KeywordTokenizer(new StringReader(text));
      stream = new CodepointCountFilter(TEST_VERSION_CURRENT, stream, min, max);
      stream.reset();
      assertEquals(expected, stream.incrementToken());
      stream.end();
      stream.close();
    }
  }
View Full Code Here

Examples of org.apache.lucene.analysis.core.KeywordTokenizer

 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new ASCIIFoldingFilter(tokenizer));
      }
    };
    checkOneTerm(a, "", "");
  }
View Full Code Here

Examples of org.apache.lucene.analysis.core.KeywordTokenizer

 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new ShingleFilter(tokenizer));
      }
    };
    checkOneTerm(a, "", "");
  }
View Full Code Here

Examples of org.apache.lucene.analysis.core.KeywordTokenizer

      }
   
      Analyzer a = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
          Tokenizer tokenizer = new KeywordTokenizer(reader);
          return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, protectedWords));
        }
      };
      // depending upon options, this thing may or may not preserve the empty term
      checkAnalysisConsistency(random, a, random.nextBoolean(), "");
View Full Code Here

Examples of org.apache.lucene.analysis.core.KeywordTokenizer

 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, DICTIONARY, _TestUtil.nextInt(random(), 1, 3)));
      }
    };
    checkOneTerm(a, "", "");
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.