Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.MockTokenizer


      started = false;
    }
  }

  public void testFirstTokenPositionIncrement() throws Exception {
    TokenStream ts = new MockTokenizer(new StringReader("a abc"), MockTokenizer.WHITESPACE, false);
    ts = new PositionFilter(ts); // All but first token will get 0 position increment
    EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, ts, EdgeNGramTokenFilter.Side.FRONT, 2, 3);
    // The first token "a" will not be output, since it's smaller than the mingram size of 2.
    // The second token on input to EdgeNGramTokenFilter will have position increment of 0,
    // which should be increased to 1, since this is the first output token in the stream.
View Full Code Here


        new int[]    {    1,     0 }
    );
  }
 
  public void testSmallTokenInStream() throws Exception {
    input = new MockTokenizer(new StringReader("abc de fgh"), MockTokenizer.WHITESPACE, false);
    EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 3, 3);
    assertTokenStreamContents(tokenizer, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10});
  }
View Full Code Here

  // so in this case we behave like WDF, and preserve any modified offsets
  public void testInvalidOffsets() throws Exception {
    Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
        filters = new EdgeNGramTokenFilter(Version.LUCENE_43, filters, EdgeNGramTokenFilter.Side.FRONT, 2, 15);
        return new TokenStreamComponents(tokenizer, filters);
      }
    };
View Full Code Here

      final int max = _TestUtil.nextInt(random(), min, 20);
   
      Analyzer a = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
          Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
          return new TokenStreamComponents(tokenizer,
            new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, min, max));
        }   
      };
      checkRandomData(random(), a, 100*RANDOM_MULTIPLIER);
    }
   
    Analyzer b = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer,
            new EdgeNGramTokenFilter(Version.LUCENE_43, tokenizer, EdgeNGramTokenFilter.Side.BACK, 2, 4));
      }   
    };
    checkRandomData(random(), b, 1000*RANDOM_MULTIPLIER, 20, false, false);
View Full Code Here

    return Arrays.asList(arr);
  }

  static void assertTokenizesTo(SlowSynonymMap dict, String input,
      String expected[]) throws IOException {
    Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
    SlowSynonymFilter stream = new SlowSynonymFilter(tokenizer, dict);
    assertTokenStreamContents(stream, expected);
  }
View Full Code Here

    assertTokenStreamContents(stream, expected);
  }
 
  static void assertTokenizesTo(SlowSynonymMap dict, String input,
      String expected[], int posIncs[]) throws IOException {
    Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
    SlowSynonymFilter stream = new SlowSynonymFilter(tokenizer, dict);
    assertTokenStreamContents(stream, expected, posIncs);
  }
View Full Code Here

  public void testEndingHole() throws Exception {
    // Just deletes "of"
    Analyzer a = new Analyzer() {
        @Override
        public TokenStreamComponents createComponents(String field, Reader reader) {
          Tokenizer tokenizer = new MockTokenizer(reader);
          CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "of");
          return new TokenStreamComponents(tokenizer, new StopFilter(TEST_VERSION_CURRENT, tokenizer, stopSet));
        }
      };
View Full Code Here

  public void testTwoEndingHoles() throws Exception {
    // Just deletes "of"
    Analyzer a = new Analyzer() {
        @Override
        public TokenStreamComponents createComponents(String field, Reader reader) {
          Tokenizer tokenizer = new MockTokenizer(reader);
          CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "of");
          return new TokenStreamComponents(tokenizer, new StopFilter(TEST_VERSION_CURRENT, tokenizer, stopSet));
        }
      };
View Full Code Here

    if (random.nextBoolean()) {
      // no payloads
     analyzer = new Analyzer() {
        @Override
        public TokenStream tokenStream(String fieldName, Reader reader) {
          return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
        }
      };
    } else {
      // fixed length payloads
      final int length = random.nextInt(200);
      analyzer = new Analyzer() {
        @Override
        public TokenStream tokenStream(String fieldName, Reader reader) {
          return new MockFixedLengthPayloadFilter(random,
              new MockTokenizer(reader, MockTokenizer.WHITESPACE, true),
              length);
        }
      };
    }
   
View Full Code Here

  // other StopFilter functionality is already tested by TestStopAnalyzer

  public void testExactCase() throws IOException {
    StringReader reader = new StringReader("Now is The Time");
    Set<String> stopWords = new HashSet<String>(Arrays.asList("is", "the", "Time"));
    TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopWords, false);
    assertTokenStreamContents(stream, new String[] { "Now", "The" });
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.MockTokenizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.