Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.TokenFilter


      @Override
      public TokenStreamComponents createComponents(String fieldName, Reader reader) {
        MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
        tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
        return new TokenStreamComponents(tokenizer, new TokenFilter(tokenizer) {
          private int count = 0;

          @Override
          public boolean incrementToken() throws IOException {
            if (count++ == 5) {
View Full Code Here


  public void testFirstPosInc() throws Exception {
    Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        TokenFilter filter = new MockSynonymFilter(tokenizer);
        StopFilter stopfilter = new StopFilter(Version.LUCENE_43, filter, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
        stopfilter.setEnablePositionIncrements(false);
        return new TokenStreamComponents(tokenizer, stopfilter);
      }
    };
View Full Code Here

   
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer t = new MockTokenizer(new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(reader), MockTokenFilter.ENGLISH_STOPSET, false, -65);
        TokenFilter f = new CommonGramsFilter(TEST_VERSION_CURRENT, t, cas);
        return new TokenStreamComponents(t, f);
      }

      @Override
      protected Reader initReader(String fieldName, Reader reader) {
View Full Code Here

  private static class ThrowingAnalyzer extends Analyzer {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader input) {
      Tokenizer tokenizer = new MockTokenizer(input);
      if (fieldName.equals("distinctiveFieldName")) {
        TokenFilter tosser = new TokenFilter(tokenizer) {
          @Override
          public boolean incrementToken() throws IOException {
            throw new BadNews("Something is icky.");
          }
        };
View Full Code Here

    String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
    Tokenizer tokenizer = new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(test));
    Set articles = new HashSet();
    articles.add("l");
    articles.add("M");
    TokenFilter filter = new ElisionFilter(tokenizer, articles);
    List tas = filtre(filter);
    assertEquals("embrouille", tas.get(4));
    assertEquals("O'brian", tas.get(6));
    assertEquals("enfin", tas.get(7));
  }
View Full Code Here

    RAMDirectory dir = new MockRAMDirectory();
    IndexWriter writer = new IndexWriter(dir, new Analyzer() {

      @Override
      public TokenStream tokenStream(String fieldName, Reader reader) {
        return new TokenFilter(new StandardTokenizer(Version.LUCENE_CURRENT, reader)) {
          private int count = 0;

          @Override
          public boolean incrementToken() throws IOException {
            if (count++ == 5) {
View Full Code Here

   */
  public void testCaseSensitive() throws Exception {
    final String input = "How The s a brown s cow d like A B thing?";
    MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
    Set common = CommonGramsFilter.makeCommonSet(commonWords);
    TokenFilter cgf = new CommonGramsFilter(wt, common, false);
    assertTokenStreamContents(cgf, new String[] {"How", "The", "The_s", "s",
        "s_a", "a", "a_brown", "brown", "brown_s", "s", "s_cow", "cow",
        "cow_d", "d", "d_like", "like", "A", "B", "thing?"});
  }
View Full Code Here

   */
  public void testLastWordisStopWord() throws Exception {
    final String input = "dog the";
    MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
    CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
    TokenFilter nsf = new CommonGramsQueryFilter(cgf);
    assertTokenStreamContents(nsf, new String[] { "dog_the" });
  }
View Full Code Here

   */
  public void testFirstWordisStopWord() throws Exception {
    final String input = "the dog";
    MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
    CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
    TokenFilter nsf = new CommonGramsQueryFilter(cgf);
    assertTokenStreamContents(nsf, new String[] { "the_dog" });
  }
View Full Code Here

   */
  public void testOneWordQueryStopWord() throws Exception {
    final String input = "the";
    MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
    CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
    TokenFilter nsf = new CommonGramsQueryFilter(cgf);
    assertTokenStreamContents(nsf, new String[] { "the" });
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.TokenFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.