Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.TokenFilter


  public void testInvalidOffsets() throws Exception {
    Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
        filters = new EdgeNGramTokenFilter(filters, EdgeNGramTokenFilter.Side.FRONT, 2, 15);
        return new TokenStreamComponents(tokenizer, filters);
      }
    };
    assertAnalyzesTo(analyzer, "mosfellsbær",
View Full Code Here


         
          final Object args[] = newFilterArgs(random, spec.stream, ctor.getParameterTypes());
          if (broken(ctor, args)) {
            continue;
          }
          final TokenFilter flt = createComponent(ctor, args, descr);
          if (flt != null) {
            spec.offsetsAreCorrect &= !brokenOffsets(ctor, args);
            spec.stream = flt;
            break;
          }
View Full Code Here

    Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader);
        if (fieldName.contains("payloadsFixed")) {
          TokenFilter filter = new MockFixedLengthPayloadFilter(new Random(0), tokenizer, 1);
          return new TokenStreamComponents(tokenizer, filter);
        } else if (fieldName.contains("payloadsVariable")) {
          TokenFilter filter = new MockVariableLengthPayloadFilter(new Random(0), tokenizer);
          return new TokenStreamComponents(tokenizer, filter);
        } else {
          return new TokenStreamComponents(tokenizer);
        }
      }
View Full Code Here

      @Override
      public TokenStreamComponents createComponents(String fieldName, Reader reader) {
        MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
        tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
        return new TokenStreamComponents(tokenizer, new TokenFilter(tokenizer) {
          private int count = 0;

          @Override
          public boolean incrementToken() throws IOException {
            if (count++ == 5) {
View Full Code Here

  public void testElision() throws Exception {
    String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
    Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, newAttributeFactory(), new StringReader(test));
    CharArraySet articles = new CharArraySet(TEST_VERSION_CURRENT, asSet("l", "M"), false);
    TokenFilter filter = new ElisionFilter(tokenizer, articles);
    List<String> tas = filter(filter);
    assertEquals("embrouille", tas.get(4));
    assertEquals("O'brian", tas.get(6));
    assertEquals("enfin", tas.get(7));
  }
View Full Code Here

    Analyzer analyzer = new Analyzer() {

      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        TokenFilter filter = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict);
        return new TokenStreamComponents(tokenizer, filter);
      }

      @Override
      protected Reader initReader(String fieldName, Reader reader) {
View Full Code Here

    Analyzer b = new Analyzer() {

      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        TokenFilter filter = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, hyphenator);
        return new TokenStreamComponents(tokenizer, filter);
      }
    };
    checkRandomData(random(), b, 1000*RANDOM_MULTIPLIER);
  }
View Full Code Here

    Analyzer b = new Analyzer() {

      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        TokenFilter filter = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, hyphenator);
        return new TokenStreamComponents(tokenizer, filter);
      }
    };
    checkOneTerm(b, "", "");
  }
View Full Code Here

  public void testFirstPosInc() throws Exception {
    Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        TokenFilter filter = new MockSynonymFilter(tokenizer);
        StopFilter stopfilter = new StopFilter(Version.LUCENE_4_3, filter, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
        stopfilter.setEnablePositionIncrements(false);
        return new TokenStreamComponents(tokenizer, stopfilter);
      }
    };
View Full Code Here

         
          final Object args[] = newFilterArgs(random, spec.stream, ctor.getParameterTypes());
          if (broken(ctor, args)) {
            continue;
          }
          final TokenFilter flt = createComponent(ctor, args, descr);
          if (flt != null) {
            spec.offsetsAreCorrect &= !brokenOffsets(ctor, args);
            spec.stream = flt;
            break;
          }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.TokenFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.