Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.CannedTokenStream


  public void testTwoTrailingHoles() throws IOException {
    // Analyzing "purple wizard of the", where of and the are removed as a
    // stopwords, leaving two trailing holes:
    Token[] inputTokens = new Token[] {createToken("purple", 0, 6),
                                       createToken("wizard", 7, 13)};
    ShingleFilter filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 2);

    assertTokenStreamContents(filter,
                              new String[] {"purple", "purple wizard", "wizard", "wizard _"},
                              new int[] {0, 0, 7, 7},
                              new int[] {6, 13, 13, 20},
View Full Code Here


  public void testTwoTrailingHolesTriShingle() throws IOException {
    // Analyzing "purple wizard of the", where of and the are removed as a
    // stopwords, leaving two trailing holes:
    Token[] inputTokens = new Token[] {createToken("purple", 0, 6),
                                       createToken("wizard", 7, 13)};
    ShingleFilter filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 3);

    assertTokenStreamContents(filter,
                              new String[] {"purple", "purple wizard", "purple wizard _", "wizard", "wizard _", "wizard _ _"},
                              new int[] {0, 0, 0, 7, 7, 7},
                              new int[] {6, 13, 20, 13, 20, 20},
View Full Code Here

  public void testTwoTrailingHolesTriShingleWithTokenFiller() throws IOException {
    // Analyzing "purple wizard of the", where of and the are removed as a
    // stopwords, leaving two trailing holes:
    Token[] inputTokens = new Token[] {createToken("purple", 0, 6), createToken("wizard", 7, 13)};
    ShingleFilter filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 3);
    filter.setFillerToken("--");

    assertTokenStreamContents(filter,
        new String[]{"purple", "purple wizard", "purple wizard --", "wizard", "wizard --", "wizard -- --"},
        new int[]{0, 0, 0, 7, 7, 7},
        new int[]{6, 13, 20, 13, 20, 20},
        new int[]{1, 0, 0, 1, 0, 0},
        20);

     filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 3);
    filter.setFillerToken("");

    assertTokenStreamContents(filter,
        new String[]{"purple", "purple wizard", "purple wizard ", "wizard", "wizard ", "wizard  "},
        new int[]{0, 0, 0, 7, 7, 7},
        new int[]{6, 13, 20, 13, 20, 20},
        new int[]{1, 0, 0, 1, 0, 0},
        20);


    filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 3);
    filter.setFillerToken(null);

    assertTokenStreamContents(filter,
        new String[] {"purple", "purple wizard", "purple wizard ", "wizard", "wizard ", "wizard  "},
        new int[] {0, 0, 0, 7, 7, 7},
        new int[] {6, 13, 20, 13, 20, 20},
        new int[] {1, 0, 0, 1, 0, 0},
        20);


    filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 3);
    filter.setFillerToken(null);
    filter.setTokenSeparator(null);

    assertTokenStreamContents(filter,
        new String[] {"purple", "purplewizard", "purplewizard", "wizard", "wizard", "wizard"},
View Full Code Here

  }
 
  // LUCENE-4880
  public void testEmptyString() throws IOException {
    MemoryIndex memory = new MemoryIndex();
    memory.addField("foo", new CannedTokenStream(new Token("", 0, 5)));
    IndexSearcher searcher = memory.createSearcher();
    TopDocs docs = searcher.search(new TermQuery(new Term("foo", "")), 10);
    assertEquals(1, docs.totalHits);
  }
View Full Code Here

        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
       
        return new TokenStreamComponents(tokenizer) {
          int tokenStreamCounter = 0;
          final TokenStream[] tokenStreams = new TokenStream[] {
            new CannedTokenStream(new Token[] {
                token("wifi",1,1),
                token("hotspot",0,2),
                token("network",1,1),
                token("is",1,1),
                token("slow",1,1)
              }),
            new CannedTokenStream(new Token[] {
                token("wi",1,1),
                token("hotspot",0,3),
                token("fi",1,1),
                token("network",1,1),
                token("is",1,1),
                token("fast",1,1)

              }),
            new CannedTokenStream(new Token[] {
                token("wifi",1,1),
                token("hotspot",0,2),
                token("network",1,1)
              }),
          };
View Full Code Here

        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
       
        return new TokenStreamComponents(tokenizer) {
          int tokenStreamCounter = 0;
          final TokenStream[] tokenStreams = new TokenStream[] {
            new CannedTokenStream(new Token[] {
                token("ab",1,1),
                token("ba",0,1),
                token("xc",1,1)
              }),
            new CannedTokenStream(new Token[] {
                token("ba",1,1),         
                token("xd",1,1)
              }),
            new CannedTokenStream(new Token[] {
                token("ab",1,1),
                token("ba",0,1),
                token("x",1,1)
              })
          };
View Full Code Here

          @Override
          public TokenStream getTokenStream() {
            // 4th time we are called, return tokens a b,
            // else just a:
            if (count++ != 3) {
              return new CannedTokenStream(new Token[] {
                  token("a", 1, 1),
                });
            } else {
              // After that "a b":
              return new CannedTokenStream(new Token[] {
                  token("a", 1, 1),
                  token("b", 1, 1),
                });
            }
          }
View Full Code Here

        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
       
        return new TokenStreamComponents(tokenizer) {
          int tokenStreamCounter = 0;
          final TokenStream[] tokenStreams = new TokenStream[] {
            new CannedTokenStream(new Token[] {
                token("wifi",1,1),
                token("hotspot",0,2),
                token("network",1,1),
                token("is",1,1),
                token("slow",1,1)
              }),
            new CannedTokenStream(new Token[] {
                token("wi",1,1),
                token("hotspot",0,3),
                token("fi",1,1),
                token("network",1,1),
                token("is",1,1),
                token("fast",1,1)

              }),
            new CannedTokenStream(new Token[] {
                token("wifi",1,1),
                token("hotspot",0,2),
                token("network",1,1)
              }),
          };
View Full Code Here

        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
       
        return new TokenStreamComponents(tokenizer) {
          int tokenStreamCounter = 0;
          final TokenStream[] tokenStreams = new TokenStream[] {
            new CannedTokenStream(new Token[] {
                token("ab",1,1),
                token("ba",0,1),
                token("xc",1,1)
              }),
            new CannedTokenStream(new Token[] {
                token("ba",1,1),         
                token("xd",1,1)
              }),
            new CannedTokenStream(new Token[] {
                token("ab",1,1),
                token("ba",0,1),
                token("x",1,1)
              })
          };
View Full Code Here

          @Override
          public TokenStream getTokenStream() {
            // 4th time we are called, return tokens a b,
            // else just a:
            if (count++ != 3) {
              return new CannedTokenStream(new Token[] {
                  token("a", 1, 1),
                });
            } else {
              // After that "a b":
              return new CannedTokenStream(new Token[] {
                  token("a", 1, 1),
                  token("b", 1, 1),
                });
            }
          }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.CannedTokenStream

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.