Package org.apache.lucene.analysis.standard

Examples of org.apache.lucene.analysis.standard.StandardTokenizer


   *         provided and {@link SnowballFilter}.
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(matchVersion, source);
    if (matchVersion.onOrAfter(Version.LUCENE_32)) {
      result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES);
    }
    result = new LowerCaseFilter(matchVersion, result);
View Full Code Here


    RAMDirectory dir = new MockRAMDirectory();
    IndexWriter writer = new IndexWriter(dir, new Analyzer() {

      @Override
      public TokenStream tokenStream(String fieldName, Reader reader) {
        return new TokenFilter(new StandardTokenizer(Version.LUCENE_CURRENT, reader)) {
          private int count = 0;

          @Override
          public boolean incrementToken() throws IOException {
            if (count++ == 5) {
View Full Code Here

* @version $Id: StandardTokenizerFactory.java 591158 2007-11-01 22:37:42Z hossman $
*/

public class StandardTokenizerFactory extends BaseTokenizerFactory {
  public StandardTokenizer create(Reader input) {
    return new StandardTokenizer(input);
  }
View Full Code Here

/**
* @version $Id: HTMLStripStandardTokenizerFactory.java 555343 2007-07-11 17:46:25Z hossman $
*/
public class HTMLStripStandardTokenizerFactory extends BaseTokenizerFactory {
  public TokenStream create(Reader input) {
    return new StandardTokenizer(new HTMLStripReader(input));
  }
View Full Code Here

            @Override public String name() {
                return "standard";
            }

            @Override public Tokenizer create(Reader reader) {
                return new StandardTokenizer(Lucene.ANALYZER_VERSION, reader);
            }
        }));

        tokenizerFactories.put("uax_url_email", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
            @Override public String name() {
View Full Code Here

        super(index, indexSettings, name, settings);
        maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
    }

    @Override public Tokenizer create(Reader reader) {
        StandardTokenizer tokenizer = new StandardTokenizer(version, reader);
        tokenizer.setMaxTokenLength(maxTokenLength);
        return tokenizer;
    }
View Full Code Here

    public StandardHtmlStripAnalyzer(Version version) {
        super(version, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
    }

    @Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
        final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
        src.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
        TokenStream tok = new StandardFilter(matchVersion, src);
        tok = new LowerCaseFilter(matchVersion, tok);
        tok = new StopFilter(matchVersion, tok, stopwords);
        return new TokenStreamComponents(src, tok) {
            @Override
            protected boolean reset(final Reader reader) throws IOException {
                src.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
                return super.reset(reader);
            }
        };
    }
View Full Code Here

        } else if (!(query instanceof LazyParsedQuery)) {
            // TODO FIXME
            logger.debug("can not make suggestions for queries of type " + query.getClass());
        } else {
            String originalString = ((LazyParsedQuery)query).getQueryString();
            StandardTokenizer tokenizer = new StandardTokenizer(new StringReader(originalString));
            List<String> tokens = new ArrayList<String>();
            try {
                Token token = new Token();
                while(true) {
                    token = tokenizer.next(token);
                    if (null == token) { break; }
                    tokens.add(TokenUtil.termText((Token)token.clone()));
                }

                // for every word, suggest something
View Full Code Here

    super(Version.LUCENE_31, stopSet);
  }
 
  @Override
  protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    Tokenizer tokenizer = new StandardTokenizer(Version.LUCENE_31, reader);
    TokenStream result = new StandardFilter(Version.LUCENE_31, tokenizer);
    result = new LowerCaseFilter(Version.LUCENE_31, result);
    result = new ASCIIFoldingFilter(result);
    result = new AlphaNumericMaxLengthFilter(result);
    result = new StopFilter(Version.LUCENE_31, result, stopwords);
View Full Code Here

* @deprecated Use {@link HTMLStripCharFilterFactory} and {@link StandardTokenizerFactory}
*/
@Deprecated
public class HTMLStripStandardTokenizerFactory extends BaseTokenizerFactory {
  public Tokenizer create(Reader input) {
    return new StandardTokenizer(new HTMLStripReader(input)) {
      @Override
      public void reset(Reader reader) throws IOException {
        super.reset(new HTMLStripReader(reader));
      }
    };
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.standard.StandardTokenizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.