Package org.apache.lucene.analysis.ngram

Examples of org.apache.lucene.analysis.ngram.EdgeNGramTokenizer


  }

  private class NGramAnalyzer extends Analyzer {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      return new TokenStreamComponents(new EdgeNGramTokenizer(reader, EdgeNGramTokenizer.Side.BACK,
          10, 20));
    }
View Full Code Here


            side = EdgeNGramTokenizer.Side.FRONT.getLabel();
        }
    }

    public TokenStream create(Reader input) {
        return new EdgeNGramTokenizer(input, side, minGramSize, maxGramSize);
    }
View Full Code Here

  // LUCENE-5269
  public void testUnicodeShinglesAndNgrams() throws Exception {
    Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, reader, 2, 94);
        //TokenStream stream = new SopTokenFilter(tokenizer);
        TokenStream stream = new ShingleFilter(tokenizer, 5);
        //stream = new SopTokenFilter(stream);
        stream = new NGramTokenFilter(TEST_VERSION_CURRENT, stream, 55, 83);
        //stream = new SopTokenFilter(stream);
View Full Code Here

                        + " To obtain the same behavior as the previous version please use \"edgeNGram\" filter which still supports side=back"
                        + " in combination with a \"keyword\" tokenizer");
            }
            final Version version = this.version == Version.LUCENE_4_3 ? Version.LUCENE_4_4 : this.version; // always use 4.4 or higher
            if (matcher == null) {
                return new EdgeNGramTokenizer(minGram, maxGram);
            } else {
                return new EdgeNGramTokenizer(minGram, maxGram) {
                    @Override
                    protected boolean isTokenChar(int chr) {
                        return matcher.isTokenChar(chr);
                    }
                };
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.ngram.EdgeNGramTokenizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.