Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.CannedTokenStream


                                   Token[] tokensToCompare, int[] positionIncrements,
                                   String[] types, boolean outputUnigrams,
                                   boolean outputUnigramsIfNoShingles)
    throws IOException {
    ShingleFilter filter
      = new ShingleFilter(new CannedTokenStream(tokensToShingle), minSize, maxSize);
    filter.setOutputUnigrams(outputUnigrams);
    filter.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles);
    shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types);
  }
View Full Code Here


  protected void shingleFilterTest(String tokenSeparator, int minSize, int maxSize, Token[] tokensToShingle,
                                   Token[] tokensToCompare, int[] positionIncrements,
                                   String[] types, boolean outputUnigrams)
    throws IOException {
    ShingleFilter filter
      = new ShingleFilter(new CannedTokenStream(tokensToShingle), minSize, maxSize);
    filter.setTokenSeparator(tokenSeparator);
    filter.setOutputUnigrams(outputUnigrams);
    shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types);
  }
View Full Code Here

  public void testTrailingHole1() throws IOException {
    // Analyzing "wizard of", where of is removed as a
    // stopword leaving a trailing hole:
    Token[] inputTokens = new Token[] {createToken("wizard", 0, 6)};
    ShingleFilter filter = new ShingleFilter(new CannedTokenStream(1, 9, inputTokens), 2, 2);

    assertTokenStreamContents(filter,
                              new String[] {"wizard", "wizard _"},
                              new int[] {0, 0},
                              new int[] {6, 9},
View Full Code Here

  public void testTrailingHole2() throws IOException {
    // Analyzing "purple wizard of", where of is removed as a
    // stopword leaving a trailing hole:
    Token[] inputTokens = new Token[] {createToken("purple", 0, 6),
                                       createToken("wizard", 7, 13)};
    ShingleFilter filter = new ShingleFilter(new CannedTokenStream(1, 16, inputTokens), 2, 2);

    assertTokenStreamContents(filter,
                              new String[] {"purple", "purple wizard", "wizard", "wizard _"},
                              new int[] {0, 0, 7, 7},
                              new int[] {6, 13, 13, 16},
View Full Code Here

  public void testTwoTrailingHoles() throws IOException {
    // Analyzing "purple wizard of the", where of and the are removed as a
    // stopwords, leaving two trailing holes:
    Token[] inputTokens = new Token[] {createToken("purple", 0, 6),
                                       createToken("wizard", 7, 13)};
    ShingleFilter filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 2);

    assertTokenStreamContents(filter,
                              new String[] {"purple", "purple wizard", "wizard", "wizard _"},
                              new int[] {0, 0, 7, 7},
                              new int[] {6, 13, 13, 20},
View Full Code Here

  public void testTwoTrailingHolesTriShingle() throws IOException {
    // Analyzing "purple wizard of the", where of and the are removed as a
    // stopwords, leaving two trailing holes:
    Token[] inputTokens = new Token[] {createToken("purple", 0, 6),
                                       createToken("wizard", 7, 13)};
    ShingleFilter filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 3);

    assertTokenStreamContents(filter,
                              new String[] {"purple", "purple wizard", "purple wizard _", "wizard", "wizard _", "wizard _ _"},
                              new int[] {0, 0, 0, 7, 7, 7},
                              new int[] {6, 13, 20, 13, 20, 20},
View Full Code Here

        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
       
        return new TokenStreamComponents(tokenizer) {
          int tokenStreamCounter = 0;
          final TokenStream[] tokenStreams = new TokenStream[] {
            new CannedTokenStream(new Token[] {
                token("wifi",1,1),
                token("hotspot",0,2),
                token("network",1,1),
                token("is",1,1),
                token("slow",1,1)
              }),
            new CannedTokenStream(new Token[] {
                token("wi",1,1),
                token("hotspot",0,3),
                token("fi",1,1),
                token("network",1,1),
                token("is",1,1),
                token("fast",1,1)

              }),
            new CannedTokenStream(new Token[] {
                token("wifi",1,1),
                token("hotspot",0,2),
                token("network",1,1)
              }),
          };
View Full Code Here

        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
       
        return new TokenStreamComponents(tokenizer) {
          int tokenStreamCounter = 0;
          final TokenStream[] tokenStreams = new TokenStream[] {
            new CannedTokenStream(new Token[] {
                token("ab",1,1),
                token("ba",0,1),
                token("xc",1,1)
              }),
            new CannedTokenStream(new Token[] {
                token("ba",1,1),         
                token("xd",1,1)
              }),
            new CannedTokenStream(new Token[] {
                token("ab",1,1),
                token("ba",0,1),
                token("x",1,1)
              })
          };
View Full Code Here

          @Override
          public TokenStream getTokenStream() {
            // 4th time we are called, return tokens a b,
            // else just a:
            if (count++ != 3) {
              return new CannedTokenStream(new Token[] {
                  token("a", 1, 1),
                });
            } else {
              // After that "a b":
              return new CannedTokenStream(new Token[] {
                  token("a", 1, 1),
                  token("b", 1, 1),
                });
            }
          }
View Full Code Here

       
        return new TokenStreamComponents(tokenizer) {

          @Override
          public TokenStream getTokenStream() {
            return new CannedTokenStream(new Token[] {
                token("hairy", 1, 1),
                token("smelly", 0, 1),
                token("dog", 1, 1),
              });
          }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.CannedTokenStream

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.