Package org.apache.lucene.analysis.ngram

Examples of org.apache.lucene.analysis.ngram.NGramTokenizer


    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer source;
        TokenStream stream;

        if("_name_prefix".equals(fieldName)) {
            source = new NGramTokenizer(reader,mingram,maxgram);
            stream = new WordDelimiterFilter(new LowerCaseFilter(Version.LUCENE_43,source), WordDelimiterFilter.ALPHANUM, CharArraySet.EMPTY_SET);
        } else if(fieldName.startsWith("_ngram_")) {
            source = new NGramTokenizer(reader,3,4);
            stream = null;
        } else {
            source = new WhitespaceTokenizer(Version.LUCENE_43,reader);
            stream = new WordDelimiterFilter(new LowerCaseFilter(Version.LUCENE_43,source), WordDelimiterFilter.ALPHANUM, CharArraySet.EMPTY_SET);
        }
View Full Code Here


             * We added this in 0.90.2 but 0.90.1 used LUCENE_43 already so we can not rely on the lucene version.
             * Yet if somebody uses 0.90.2 or higher with a prev. lucene version we should also use the deprecated version.
             */
            final Version version = this.version == Version.LUCENE_4_3 ? Version.LUCENE_4_4 : this.version; // always use 4.4 or higher
            if (matcher == null) {
                return new NGramTokenizer(minGram, maxGram);
            } else {
                return new NGramTokenizer(minGram, maxGram) {
                    @Override
                    protected boolean isTokenChar(int chr) {
                        return matcher.isTokenChar(chr);
                    }
                };
View Full Code Here

        minGramSize = (minArg != null ? Integer.parseInt(minArg) : NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
    }

    /** Creates the {@link TokenStream} of n-grams from the given {@link Reader}. */
    public TokenStream create(Reader input) {
        return new NGramTokenizer(input, minGramSize, maxGramSize);
    }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.ngram.NGramTokenizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.