Package org.apache.lucene.analysis.standard

Examples of org.apache.lucene.analysis.standard.StandardTokenizer


   *         , {@link SetKeywordMarkerFilter} if a stem exclusion set is
   *         provided and {@link SoraniStemFilter}.
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(matchVersion, source);
    result = new SoraniNormalizationFilter(result);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
View Full Code Here


    /**
     * Constructs a {@link StandardTokenizer} filtered by a {@link
     * StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}.
     */
    public TokenStream tokenStream(String fieldName, Reader reader) {
        TokenStream result = new StandardTokenizer(reader);
        result = new StandardFilter(result);
        result = new StopFilter(result, stopSet);
        return result;
    }
View Full Code Here

     * @return  A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
     *                  {@link GreekLowerCaseFilter} and {@link StopFilter}
     */
    public TokenStream tokenStream(String fieldName, Reader reader)
    {
      TokenStream result = new StandardTokenizer(reader);
        result = new GreekLowerCaseFilter(result, charset);
        result = new StopFilter(result, stopSet);
        return result;
    }
View Full Code Here

    public TokenStream reusableTokenStream(String fieldName, Reader reader)
      throws IOException {
      SavedStreams streams = (SavedStreams) getPreviousTokenStream();
      if (streams == null) {
        streams = new SavedStreams();
        streams.source = new StandardTokenizer(reader);
        streams.result = new GreekLowerCaseFilter(streams.source, charset);
        streams.result = new StopFilter(streams.result, stopSet);
        setPreviousTokenStream(streams);
      } else {
        streams.source.reset(reader);
View Full Code Here

  public final TokenStream tokenStream(String fieldName, Reader reader) {

    if (fieldName == null) throw new IllegalArgumentException("fieldName must not be null");
    if (reader == null) throw new IllegalArgumentException("reader must not be null");

    TokenStream result = new StandardTokenizer(reader);
    result = new StandardFilter(result);
    result = new StopFilter(result, stoptable);
    result = new FrenchStemFilter(result, excltable);
    // Convert to lowercase after stemming!
    result = new LowerCaseFilter(result);
View Full Code Here

  public TokenStream reusableTokenStream(String fieldName, Reader reader)
      throws IOException {
    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
      streams = new SavedStreams();
      streams.source = new StandardTokenizer(reader);
      streams.result = new StandardFilter(streams.source);
      streams.result = new StopFilter(streams.result, stoptable);
      streams.result = new FrenchStemFilter(streams.result, excltable);
      // Convert to lowercase after stemming!
      streams.result = new LowerCaseFilter(streams.result);
View Full Code Here

   *
   * @return  A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
   *       {@link StandardFilter}, {@link LowerCaseFilter}, and {@link StopFilter}
   */
  public final TokenStream tokenStream( String fieldName, Reader reader ) {
    TokenStream result = new StandardTokenizer( reader );
    result = new StandardFilter( result );
    result = new LowerCaseFilter( result );
    result = new StopFilter( result, stoptable );
    return result;
  }
View Full Code Here

  public TokenStream reusableTokenStream(String fieldName, Reader reader)
      throws IOException {
      SavedStreams streams = (SavedStreams) getPreviousTokenStream();
      if (streams == null) {
        streams = new SavedStreams();
        streams.source = new StandardTokenizer(reader);
        streams.result = new StandardFilter(streams.source);
        streams.result = new LowerCaseFilter(streams.result);
        streams.result = new StopFilter(streams.result, stoptable);
        setPreviousTokenStream(streams);
      } else {
View Full Code Here

   * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
   *         {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}, and
   *         {@link GermanStemFilter}
   */
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer(reader);
    result = new StandardFilter(result);
    result = new LowerCaseFilter(result);
    result = new StopFilter(result, stopSet);
    result = new GermanStemFilter(result, exclusionSet);
    return result;
View Full Code Here

    }
   
    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
      streams = new SavedStreams();
      streams.source = new StandardTokenizer(reader);
      streams.result = new StandardFilter(streams.source);
      streams.result = new LowerCaseFilter(streams.result);
      streams.result = new StopFilter(streams.result, stopSet);
      streams.result = new GermanStemFilter(streams.result, exclusionSet);
      setPreviousTokenStream(streams);
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.standard.StandardTokenizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.