Examples of CharFilter


Examples of org.apache.lucene.analysis.CharFilter

    assertCharFilterEquals(filter, "時時、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。");
  }

  public void testKanaOnly() throws IOException {
    // Test kana only repetition marks
    CharFilter filter = new JapaneseIterationMarkCharFilter(
        new StringReader("時々、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。"),
        false, // no kanji
        true   // kana
    );
    assertCharFilterEquals(filter, "時々、おおのさんと一緒にお寿司が食べたいです。abcところどころ。");
View Full Code Here

Examples of org.apache.lucene.analysis.CharFilter

    assertCharFilterEquals(filter, "時々、おおのさんと一緒にお寿司が食べたいです。abcところどころ。");
  }

  public void testNone() throws IOException {
    // Test no repetition marks
    CharFilter filter = new JapaneseIterationMarkCharFilter(
        new StringReader("時々、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。"),
        false, // no kanji
        false  // no kana
    );
    assertCharFilterEquals(filter, "時々、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。");
View Full Code Here

Examples of org.apache.lucene.analysis.CharFilter

      throw new UnsupportedOperationException("read(char[], int, int)");
    }
  };
 
  public void testWrapping() throws Exception {
    CharFilter cs = new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(wrappedStream);
    try {
      cs.mark(1);
      fail();
    } catch (Exception e) {
      assertEquals("mark(int)", e.getMessage());
    }
   
    try {
      cs.markSupported();
      fail();
    } catch (Exception e) {
      assertEquals("markSupported()", e.getMessage());
    }
   
    try {
      cs.read();
      fail();
    } catch (Exception e) {
      assertEquals("read()", e.getMessage());
    }
   
    try {
      cs.read(new char[0]);
      fail();
    } catch (Exception e) {
      assertEquals("read(char[])", e.getMessage());
    }
   
    try {
      cs.read(CharBuffer.wrap(new char[0]));
      fail();
    } catch (Exception e) {
      assertEquals("read(CharBuffer)", e.getMessage());
    }
   
    try {
      cs.reset();
      fail();
    } catch (Exception e) {
      assertEquals("reset()", e.getMessage());
    }
   
    try {
      cs.skip(1);
      fail();
    } catch (Exception e) {
      assertEquals("skip(long)", e.getMessage());
    }
   
    try {
      cs.correctOffset(1);
      fail();
    } catch (Exception e) {
      assertEquals("correct(int)", e.getMessage());
    }
   
    try {
      cs.close();
      fail();
    } catch (Exception e) {
      assertEquals("close()", e.getMessage());
    }
   
    try {
      cs.read(new char[0], 0, 0);
      fail();
    } catch (Exception e) {
      assertEquals("read(char[], int, int)", e.getMessage());
    }
  }
View Full Code Here

Examples of org.apache.lucene.analysis.CharFilter

    List<String> mappingRules = new ArrayList<String>();
    mappingRules.add( "\"&uuml;\" => \"ü\"" );
    NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
    builder.add("&uuml;", "ü");
    NormalizeCharMap normMap = builder.build();
    CharFilter charStream = new MappingCharFilter( normMap, new StringReader( INPUT ) );

    // create PatternTokenizer
    TokenStream stream = new PatternTokenizer(charStream, Pattern.compile("[,;/\\s]+"), -1);
    assertTokenStreamContents(stream,
        new String[] { "Günther", "Günther", "is", "here" },
View Full Code Here

Examples of org.apache.lucene.analysis.CharFilter

      throw new UnsupportedOperationException("read(char[], int, int)");
    }
  };
 
  public void testWrapping() throws Exception {
    CharFilter cs = new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(wrappedStream);
    try {
      cs.mark(1);
      fail();
    } catch (Exception e) {
      assertEquals("mark(int)", e.getMessage());
    }
   
    try {
      cs.markSupported();
      fail();
    } catch (Exception e) {
      assertEquals("markSupported()", e.getMessage());
    }
   
    try {
      cs.read();
      fail();
    } catch (Exception e) {
      assertEquals("read()", e.getMessage());
    }
   
    try {
      cs.read(new char[0]);
      fail();
    } catch (Exception e) {
      assertEquals("read(char[])", e.getMessage());
    }
   
    try {
      cs.read(CharBuffer.wrap(new char[0]));
      fail();
    } catch (Exception e) {
      assertEquals("read(CharBuffer)", e.getMessage());
    }
   
    try {
      cs.reset();
      fail();
    } catch (Exception e) {
      assertEquals("reset()", e.getMessage());
    }
   
    try {
      cs.skip(1);
      fail();
    } catch (Exception e) {
      assertEquals("skip(long)", e.getMessage());
    }
   
    try {
      cs.correctOffset(1);
      fail();
    } catch (Exception e) {
      assertEquals("correct(int)", e.getMessage());
    }
   
    try {
      cs.close();
      fail();
    } catch (Exception e) {
      assertEquals("close()", e.getMessage());
    }
   
    try {
      cs.read(new char[0], 0, 0);
      fail();
    } catch (Exception e) {
      assertEquals("read(char[], int, int)", e.getMessage());
    }
  }
View Full Code Here

Examples of org.apache.lucene.analysis.CharFilter

public class TestJapaneseIterationMarkCharFilterFactory extends BaseTokenStreamTestCase {

  public void testIterationMarksWithKeywordTokenizer() throws IOException {
    final String text = "時々馬鹿々々しいところゞゝゝミスヾ";
    JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory();
    CharFilter filter = filterFactory.create(new StringReader(text));
    TokenStream tokenStream = new MockTokenizer(filter, MockTokenizer.KEYWORD, false);
    assertTokenStreamContents(tokenStream, new String[]{"時時馬鹿馬鹿しいところどころミスズ"});
  }
View Full Code Here

Examples of org.apache.lucene.analysis.CharFilter

    JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory();
    Map<String, String> filterArgs = Collections.emptyMap();
    filterFactory.init(filterArgs);

    CharFilter filter = filterFactory.create(
        new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
    );
    TokenStream tokenStream = tokenizerFactory.create(filter);
    assertTokenStreamContents(tokenStream, new String[]{"時時", "馬鹿馬鹿しい", "ところどころ", "ミ", "スズ"});
  }
View Full Code Here

Examples of org.apache.lucene.analysis.CharFilter

    Map<String, String> filterArgs = new HashMap<String, String>();
    filterArgs.put("normalizeKanji", "true");
    filterArgs.put("normalizeKana", "false");
    filterFactory.init(filterArgs);
   
    CharFilter filter = filterFactory.create(
        new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
    );
    TokenStream tokenStream = tokenizerFactory.create(filter);
    assertTokenStreamContents(tokenStream, new String[]{"時時", "馬鹿馬鹿しい", "ところ", "ゞ", "ゝ", "ゝ", "ミス", "ヾ"});
  }
View Full Code Here

Examples of org.apache.lucene.analysis.CharFilter

    Map<String, String> filterArgs = new HashMap<String, String>();
    filterArgs.put("normalizeKanji", "false");
    filterArgs.put("normalizeKana", "true");
    filterFactory.init(filterArgs);

    CharFilter filter = filterFactory.create(
        new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
    );
    TokenStream tokenStream = tokenizerFactory.create(filter);
    assertTokenStreamContents(tokenStream, new String[]{"時々", "馬鹿", "々", "々", "しい", "ところどころ", "ミ", "スズ"});
  }
View Full Code Here

Examples of org.apache.lucene.analysis.CharFilter

    List<String> mappingRules = new ArrayList<String>();
    mappingRules.add( "\"&uuml;\" => \"ü\"" );
    NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
    builder.add("&uuml;", "ü");
    NormalizeCharMap normMap = builder.build();
    CharFilter charStream = new MappingCharFilter( normMap, new StringReader( INPUT ) );

    // create PatternTokenizer
    TokenStream stream = new PatternTokenizer(charStream, Pattern.compile("[,;/\\s]+"), -1);
    assertTokenStreamContents(stream,
        new String[] { "Günther", "Günther", "is", "here" },
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.