Package org.apache.lucene.analysis.cjk

Examples of org.apache.lucene.analysis.cjk.CJKWidthFilter


  @Override
  protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    Tokenizer tokenizer = new JapaneseTokenizer(reader, userDict, true, mode);
    TokenStream stream = new JapaneseBaseFormFilter(tokenizer);
    stream = new JapanesePartOfSpeechStopFilter(matchVersion, stream, stoptags);
    stream = new CJKWidthFilter(stream);
    stream = new StopFilter(matchVersion, stream, stopwords);
    stream = new JapaneseKatakanaStemFilter(stream);
    stream = new LowerCaseFilter(matchVersion, stream);
    return new TokenStreamComponents(tokenizer, stream);
  }
View Full Code Here


public class CJKWidthFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
 
  @Override
  public TokenStream create(TokenStream input) {
    return new CJKWidthFilter(input);
  }
View Full Code Here

  public void testKatakanaReadingsHalfWidth() throws IOException {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), reader, null, true, JapaneseTokenizer.Mode.SEARCH);
        TokenStream stream = new CJKWidthFilter(tokenizer);
        return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(stream, false));
      }
    };
    assertAnalyzesTo(a, "今夜はロバート先生と話した",
        new String[] { "コンヤ", "ハ", "ロバート", "センセイ", "ト", "ハナシ", "タ" }
View Full Code Here

  public void testRomajiReadingsHalfWidth() throws IOException {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), reader, null, true, JapaneseTokenizer.Mode.SEARCH);
        TokenStream stream = new CJKWidthFilter(tokenizer);
        return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(stream, true));
      }
    };
    assertAnalyzesTo(a, "今夜はロバート先生と話した",
        new String[] { "kon'ya", "ha", "robato", "sensei", "to", "hanashi", "ta" }
View Full Code Here

  public void testKatakanaReadingsHalfWidth() throws IOException {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizer.Mode.SEARCH);
        TokenStream stream = new CJKWidthFilter(tokenizer);
        return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(stream, false));
      }
    };
    assertAnalyzesTo(a, "今夜はロバート先生と話した",
        new String[] { "コンヤ", "ハ", "ロバート", "センセイ", "ト", "ハナシ", "タ" }
View Full Code Here

  public void testRomajiReadingsHalfWidth() throws IOException {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizer.Mode.SEARCH);
        TokenStream stream = new CJKWidthFilter(tokenizer);
        return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(stream, true));
      }
    };
    assertAnalyzesTo(a, "今夜はロバート先生と話した",
        new String[] { "kon'ya", "ha", "robato", "sensei", "to", "hanashi", "ta" }
View Full Code Here

  @Override
  protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    Tokenizer tokenizer = new JapaneseTokenizer(reader, userDict, true, mode);
    TokenStream stream = new JapaneseBaseFormFilter(tokenizer);
    stream = new JapanesePartOfSpeechStopFilter(true, stream, stoptags);
    stream = new CJKWidthFilter(stream);
    stream = new StopFilter(matchVersion, stream, stopwords);
    stream = new JapaneseKatakanaStemFilter(stream);
    stream = new LowerCaseFilter(matchVersion, stream);
    return new TokenStreamComponents(tokenizer, stream);
  }
View Full Code Here

    }
  }
 
  @Override
  public TokenStream create(TokenStream input) {
    return new CJKWidthFilter(input);
  }
View Full Code Here

  public void testKatakanaReadingsHalfWidth() throws IOException {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizer.Mode.SEARCH);
        TokenStream stream = new CJKWidthFilter(tokenizer);
        return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(stream, false));
      }
    };
    assertAnalyzesTo(a, "今夜はロバート先生と話した",
        new String[] { "コンヤ", "ハ", "ロバート", "センセイ", "ト", "ハナシ", "タ" }
View Full Code Here

  @Override
  protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    Tokenizer tokenizer = new JapaneseTokenizer(reader, userDict, true, mode);
    TokenStream stream = new JapaneseBaseFormFilter(tokenizer);
    stream = new JapanesePartOfSpeechStopFilter(matchVersion, stream, stoptags);
    stream = new CJKWidthFilter(stream);
    stream = new StopFilter(matchVersion, stream, stopwords);
    stream = new JapaneseKatakanaStemFilter(stream);
    stream = new LowerCaseFilter(matchVersion, stream);
    return new TokenStreamComponents(tokenizer, stream);
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.cjk.CJKWidthFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.