Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.CharFilter


    List<String> mappingRules = new ArrayList<>();
    mappingRules.add( "\"&uuml;\" => \"ü\"" );
    NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
    builder.add("&uuml;", "ü");
    NormalizeCharMap normMap = builder.build();
    CharFilter charStream = new MappingCharFilter( normMap, new StringReader( INPUT ) );

    // create PatternTokenizer
    Tokenizer stream = new PatternTokenizer(newAttributeFactory(), charStream, Pattern.compile("[,;/\\s]+"), -1);
    assertTokenStreamContents(stream,
        new String[] { "Günther", "Günther", "is", "here" },
View Full Code Here


      throw new UnsupportedOperationException("read(char[], int, int)");
    }
  };
 
  public void testWrapping() throws Exception {
    CharFilter cs = new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(wrappedStream);
    try {
      cs.mark(1);
      fail();
    } catch (Exception e) {
      assertEquals("mark(int)", e.getMessage());
    }
   
    try {
      cs.markSupported();
      fail();
    } catch (Exception e) {
      assertEquals("markSupported()", e.getMessage());
    }
   
    try {
      cs.read();
      fail();
    } catch (Exception e) {
      assertEquals("read()", e.getMessage());
    }
   
    try {
      cs.read(new char[0]);
      fail();
    } catch (Exception e) {
      assertEquals("read(char[])", e.getMessage());
    }
   
    try {
      cs.read(CharBuffer.wrap(new char[0]));
      fail();
    } catch (Exception e) {
      assertEquals("read(CharBuffer)", e.getMessage());
    }
   
    try {
      cs.reset();
      fail();
    } catch (Exception e) {
      assertEquals("reset()", e.getMessage());
    }
   
    try {
      cs.skip(1);
      fail();
    } catch (Exception e) {
      assertEquals("skip(long)", e.getMessage());
    }
   
    try {
      cs.correctOffset(1);
      fail();
    } catch (Exception e) {
      assertEquals("correct(int)", e.getMessage());
    }
   
    try {
      cs.close();
      fail();
    } catch (Exception e) {
      assertEquals("close()", e.getMessage());
    }
   
    try {
      cs.read(new char[0], 0, 0);
      fail();
    } catch (Exception e) {
      assertEquals("read(char[], int, int)", e.getMessage());
    }
  }
View Full Code Here

      throw new UnsupportedOperationException("read(char[], int, int)");
    }
  };
 
  public void testWrapping() throws Exception {
    CharFilter cs = new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(wrappedStream);
    try {
      cs.mark(1);
      fail();
    } catch (Exception e) {
      assertEquals("mark(int)", e.getMessage());
    }
   
    try {
      cs.markSupported();
      fail();
    } catch (Exception e) {
      assertEquals("markSupported()", e.getMessage());
    }
   
    try {
      cs.read();
      fail();
    } catch (Exception e) {
      assertEquals("read()", e.getMessage());
    }
   
    try {
      cs.read(new char[0]);
      fail();
    } catch (Exception e) {
      assertEquals("read(char[])", e.getMessage());
    }
   
    try {
      cs.read(CharBuffer.wrap(new char[0]));
      fail();
    } catch (Exception e) {
      assertEquals("read(CharBuffer)", e.getMessage());
    }
   
    try {
      cs.reset();
      fail();
    } catch (Exception e) {
      assertEquals("reset()", e.getMessage());
    }
   
    try {
      cs.skip(1);
      fail();
    } catch (Exception e) {
      assertEquals("skip(long)", e.getMessage());
    }
   
    try {
      cs.correctOffset(1);
      fail();
    } catch (Exception e) {
      assertEquals("correct(int)", e.getMessage());
    }
   
    try {
      cs.close();
      fail();
    } catch (Exception e) {
      assertEquals("close()", e.getMessage());
    }
   
    try {
      cs.read(new char[0], 0, 0);
      fail();
    } catch (Exception e) {
      assertEquals("read(char[], int, int)", e.getMessage());
    }
  }
View Full Code Here

public class TestJapaneseIterationMarkCharFilterFactory extends BaseTokenStreamTestCase {

  public void testIterationMarksWithKeywordTokenizer() throws IOException {
    final String text = "時々馬鹿々々しいところゞゝゝミスヾ";
    JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(new HashMap<String,String>());
    CharFilter filter = filterFactory.create(new StringReader(text));
    TokenStream tokenStream = new MockTokenizer(filter, MockTokenizer.KEYWORD, false);
    assertTokenStreamContents(tokenStream, new String[]{"時時馬鹿馬鹿しいところどころミスズ"});
  }
View Full Code Here

  public void testIterationMarksWithJapaneseTokenizer() throws IOException {
    JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
    tokenizerFactory.inform(new StringMockResourceLoader(""));

    JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(new HashMap<String,String>());
    CharFilter filter = filterFactory.create(
        new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
    );
    TokenStream tokenStream = tokenizerFactory.create(filter);
    assertTokenStreamContents(tokenStream, new String[]{"時時", "馬鹿馬鹿しい", "ところどころ", "ミ", "スズ"});
  }
View Full Code Here

    Map<String, String> filterArgs = new HashMap<>();
    filterArgs.put("normalizeKanji", "true");
    filterArgs.put("normalizeKana", "false");
    JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(filterArgs);
   
    CharFilter filter = filterFactory.create(
        new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
    );
    TokenStream tokenStream = tokenizerFactory.create(filter);
    assertTokenStreamContents(tokenStream, new String[]{"時時", "馬鹿馬鹿しい", "ところ", "ゞ", "ゝ", "ゝ", "ミス", "ヾ"});
  }
View Full Code Here

    Map<String, String> filterArgs = new HashMap<>();
    filterArgs.put("normalizeKanji", "false");
    filterArgs.put("normalizeKana", "true");
    JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(filterArgs);

    CharFilter filter = filterFactory.create(
        new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
    );
    TokenStream tokenStream = tokenizerFactory.create(filter);
    assertTokenStreamContents(tokenStream, new String[]{"時々", "馬鹿", "々", "々", "しい", "ところどころ", "ミ", "スズ"});
  }
View Full Code Here

  // 012345678901234567890123456789012345678
  //  a bb - ccc . --- bb a . ccc ccc bb
  //  aa b - c . --- b aa . c c b
  public void testChain() throws IOException {
    final String BLOCK = " a bb - ccc . --- bb a . ccc ccc bb";
    CharFilter cs = new PatternReplaceCharFilter( pattern("a"), "aa",
        new StringReader( BLOCK ) );
    cs = new PatternReplaceCharFilter( pattern("bb"), "b", cs );
    cs = new PatternReplaceCharFilter( pattern("ccc"), "c", cs );
    TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
    assertTokenStreamContents(ts,
View Full Code Here

    List<String> mappingRules = new ArrayList<>();
    mappingRules.add( "\"&uuml;\" => \"ü\"" );
    NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
    builder.add("&uuml;", "ü");
    NormalizeCharMap normMap = builder.build();
    CharFilter charStream = new MappingCharFilter( normMap, new StringReader( INPUT ) );

    // create PatternTokenizer
    TokenStream stream = new PatternTokenizer(charStream, Pattern.compile("[,;/\\s]+"), -1);
    assertTokenStreamContents(stream,
        new String[] { "Günther", "Günther", "is", "here" },
View Full Code Here

      throw new UnsupportedOperationException("read(char[], int, int)");
    }
  };
 
  public void testWrapping() throws Exception {
    CharFilter cs = new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(wrappedStream);
    try {
      cs.mark(1);
      fail();
    } catch (Exception e) {
      assertEquals("mark(int)", e.getMessage());
    }
   
    try {
      cs.markSupported();
      fail();
    } catch (Exception e) {
      assertEquals("markSupported()", e.getMessage());
    }
   
    try {
      cs.read();
      fail();
    } catch (Exception e) {
      assertEquals("read()", e.getMessage());
    }
   
    try {
      cs.read(new char[0]);
      fail();
    } catch (Exception e) {
      assertEquals("read(char[])", e.getMessage());
    }
   
    try {
      cs.read(CharBuffer.wrap(new char[0]));
      fail();
    } catch (Exception e) {
      assertEquals("read(CharBuffer)", e.getMessage());
    }
   
    try {
      cs.reset();
      fail();
    } catch (Exception e) {
      assertEquals("reset()", e.getMessage());
    }
   
    try {
      cs.skip(1);
      fail();
    } catch (Exception e) {
      assertEquals("skip(long)", e.getMessage());
    }
   
    try {
      cs.correctOffset(1);
      fail();
    } catch (Exception e) {
      assertEquals("correct(int)", e.getMessage());
    }
   
    try {
      cs.close();
      fail();
    } catch (Exception e) {
      assertEquals("close()", e.getMessage());
    }
   
    try {
      cs.read(new char[0], 0, 0);
      fail();
    } catch (Exception e) {
      assertEquals("read(char[], int, int)", e.getMessage());
    }
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.CharFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.