Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.TokenFilter


    Analyzer analyzer = new Analyzer(new Analyzer.PerFieldReuseStrategy()) {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader);
        if (fieldName.contains("payloadsFixed")) {
          TokenFilter filter = new MockFixedLengthPayloadFilter(new Random(0), tokenizer, 1);
          return new TokenStreamComponents(tokenizer, filter);
        } else if (fieldName.contains("payloadsVariable")) {
          TokenFilter filter = new MockVariableLengthPayloadFilter(new Random(0), tokenizer);
          return new TokenStreamComponents(tokenizer, filter);
        } else {
          return new TokenStreamComponents(tokenizer);
        }
      }
View Full Code Here


  public void testInvalidOffsets() throws Exception {
    Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
        filters = new EdgeNGramTokenFilter(filters, EdgeNGramTokenFilter.Side.FRONT, 2, 15);
        return new TokenStreamComponents(tokenizer, filters);
      }
    };
    assertAnalyzesTo(analyzer, "mosfellsbær",
View Full Code Here

    Analyzer analyzer = new Analyzer() {

      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        TokenFilter filter = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict);
        return new TokenStreamComponents(tokenizer, filter);
      }

      @Override
      protected Reader initReader(String fieldName, Reader reader) {
View Full Code Here

    Analyzer b = new Analyzer() {

      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        TokenFilter filter = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, hyphenator);
        return new TokenStreamComponents(tokenizer, filter);
      }
    };
    checkRandomData(random(), b, 1000*RANDOM_MULTIPLIER);
  }
View Full Code Here

    Analyzer b = new Analyzer() {

      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        TokenFilter filter = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, hyphenator);
        return new TokenStreamComponents(tokenizer, filter);
      }
    };
    checkOneTermReuse(b, "", "");
  }
View Full Code Here

    expect(mapFileReaderFactory.createMapFileReader(TEST_FILE_1)).andReturn(mapFileReader);
    expect(mapFileReader.readMap()).andReturn(mapping);
    replay(mapFileReaderFactory);
    replay(mapFileReader);
   
    TokenFilter tokenFilter = replaceFilterFactory.createTokenFilter(tokenStream, properties);
    assertEquals(ReplaceFilter.class, tokenFilter.getClass());
    ReplaceFilter replaceFilter = (ReplaceFilter) tokenFilter;
    assertEquals(mapping, replaceFilter.getMapping());
    verify(mapFileReaderFactory);
    verify(mapFileReader);
   
View Full Code Here

    Collection<Token> tokens = new ArrayList<Token>();
    tokens.add(newToken("token1 token2 token3", 0, 6));
    tokens.add(newToken("token4 token5 token6", 7, 13));
   
    TokenStream tokenStream = new CollectionTokenStream(tokens);
    TokenFilter filter = new SplitterFilter(tokenStream, " ");
   
    Token nextToken = new Token();
    filter.next(nextToken);
    assertNotNull(nextToken);
    assertEquals("token1", new String(nextToken.termBuffer(), 0, nextToken.termLength()));
    assertEquals(0, nextToken.startOffset());
    assertEquals(6, nextToken.endOffset());
   
    nextToken = filter.next(nextToken);
    assertNotNull(nextToken);
    assertEquals("token2", new String(nextToken.termBuffer(), 0, nextToken.termLength()));
    assertEquals(0, nextToken.startOffset());
    assertEquals(6, nextToken.endOffset());
   
    nextToken = filter.next(nextToken);
    assertNotNull(nextToken);
    assertEquals("token3", new String(nextToken.termBuffer(), 0, nextToken.termLength()));
    assertEquals(0, nextToken.startOffset());
    assertEquals(6, nextToken.endOffset());

    nextToken = filter.next(nextToken);
    assertNotNull(nextToken);
    assertEquals("token4", new String(nextToken.termBuffer(), 0, nextToken.termLength()));
    assertEquals(7, nextToken.startOffset());
    assertEquals(13, nextToken.endOffset());

    nextToken = filter.next(nextToken);
    assertNotNull(nextToken);
    assertEquals("token5", new String(nextToken.termBuffer(), 0, nextToken.termLength()));
    assertEquals(7, nextToken.startOffset());
    assertEquals(13, nextToken.endOffset());

    nextToken = filter.next(nextToken);
    assertNotNull(nextToken);
    assertEquals("token6", new String(nextToken.termBuffer(), 0, nextToken.termLength()));
    assertEquals(7, nextToken.startOffset());
    assertEquals(13, nextToken.endOffset());
  }
View Full Code Here

  private static class ThrowingAnalyzer extends Analyzer {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader input) {
      Tokenizer tokenizer = new MockTokenizer(input);
      if (fieldName.equals("distinctiveFieldName")) {
        TokenFilter tosser = new TokenFilter(tokenizer) {
          @Override
          public boolean incrementToken() throws IOException {
            throw new BadNews("Something is icky.");
          }
        };
View Full Code Here

  }

  public void testTokenReuse() throws IOException {
    Analyzer analyzer = new Analyzer() {
      public TokenStream tokenStream(String fieldName, Reader reader) {
        return new TokenFilter(new WhitespaceTokenizer(reader)) {
          boolean first=true;
          Token buffered;

          public Token next(final Token reusableToken) throws IOException {
            if (buffered != null) {
View Full Code Here

  public void testExceptionFromTokenStream() throws IOException {
    RAMDirectory dir = new MockRAMDirectory();
    IndexWriter writer = new IndexWriter(dir, new Analyzer() {

      public TokenStream tokenStream(String fieldName, Reader reader) {
        return new TokenFilter(new StandardTokenizer(reader)) {
          private int count = 0;

          public Token next(final Token reusableToken) throws IOException {
            if (count++ == 5) {
              throw new IOException();
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.TokenFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.