Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.TokenStream


    // Ideally the Analyzer superclass should have a method with the same signature,
    // with a default impl that simply delegates to the StringReader flavour.
    if (text == null)
      throw new IllegalArgumentException("text must not be null");
   
    TokenStream stream;
    if (pattern == NON_WORD_PATTERN) { // fast path
      stream = new FastStringTokenizer(text, true, toLowerCase, stopWords);
    }
    else if (pattern == WHITESPACE_PATTERN) { // fast path
      stream = new FastStringTokenizer(text, false, toLowerCase, stopWords);
View Full Code Here


   *       {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and
   *          {@link BrazilianStemFilter}.
   */
  @Override
  public final TokenStream tokenStream(String fieldName, Reader reader) {
                TokenStream result = new StandardTokenizer( matchVersion, reader );
    result = new LowerCaseFilter( result );
    result = new StandardFilter( result );
    result = new StopFilter( StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                                         result, stoptable );
    result = new BrazilianStemFilter( result, excltable );
View Full Code Here

    this.collator = collator;
  }

  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new KeywordTokenizer(reader);
    result = new CollationKeyFilter(result, collator);
    return result;
  }
View Full Code Here

  private class PayloadAnalyzer extends Analyzer {


    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      TokenStream result = new LowerCaseTokenizer(reader);
      result = new PayloadFilter(result, fieldName);
      return result;
    }
View Full Code Here

  }

  public QueryTermVector(String queryString, Analyzer analyzer) {   
    if (analyzer != null)
    {
      TokenStream stream = analyzer.tokenStream("", new StringReader(queryString));
      if (stream != null)
      {
        List<String> terms = new ArrayList<String>();
        try {
          boolean hasMoreTokens = false;
         
          stream.reset();
          TermAttribute termAtt = stream.addAttribute(TermAttribute.class);

          hasMoreTokens = stream.incrementToken();
          while (hasMoreTokens) {
            terms.add(termAtt.term());
            hasMoreTokens = stream.incrementToken();
          }
          processTerms(terms.toArray(new String[terms.size()]));
        } catch (IOException e) {
        }
      }
View Full Code Here

      FieldQueryNode fieldNode = ((FieldQueryNode) node);
      String text = fieldNode.getTextAsString();
      String field = fieldNode.getFieldAsString();

      TokenStream source = this.analyzer.tokenStream(field, new StringReader(
          text));
      CachingTokenFilter buffer = new CachingTokenFilter(source);

      PositionIncrementAttribute posIncrAtt = null;
      int numTokens = 0;
      int positionCount = 0;
      boolean severalTokensAtSamePosition = false;

      if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
        posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
      }

      try {

        while (buffer.incrementToken()) {
          numTokens++;
          int positionIncrement = (posIncrAtt != null) ? posIncrAtt
              .getPositionIncrement() : 1;
          if (positionIncrement != 0) {
            positionCount += positionIncrement;

          } else {
            severalTokensAtSamePosition = true;
          }

        }

      } catch (IOException e) {
        // ignore
      }

      try {
        // rewind the buffer stream
        buffer.reset();

        // close original stream - all tokens buffered
        source.close();
      } catch (IOException e) {
        // ignore
      }

      if (!buffer.hasAttribute(TermAttribute.class)) {
View Full Code Here

    if (text == null)
      throw new IllegalArgumentException("text must not be null");
    if (analyzer == null)
      throw new IllegalArgumentException("analyzer must not be null");
   
    TokenStream stream = analyzer.tokenStream(fieldName,
        new StringReader(text));

    addField(fieldName, stream);
  }
View Full Code Here

  public <T> TokenStream keywordTokenStream(final Collection<T> keywords) {
    // TODO: deprecate & move this method into AnalyzerUtil?
    if (keywords == null)
      throw new IllegalArgumentException("keywords must not be null");
   
    return new TokenStream() {
      private Iterator<T> iter = keywords.iterator();
      private int start = 0;
      private TermAttribute termAtt = addAttribute(TermAttribute.class);
      private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
     
View Full Code Here

   *       {@link LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter}
   *            and {@link ArabicStemFilter}.
   */
  @Override
  public final TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new ArabicLetterTokenizer( reader );
    result = new LowerCaseFilter(result);
    // the order here is important: the stopword list is not normalized!
    result = new StopFilter( StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                             result, stoptable );
    result = new ArabicNormalizationFilter( result );
View Full Code Here

  }

  private class PayloadAnalyzer extends Analyzer {
    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      TokenStream result = new LowerCaseTokenizer(reader);
      result = new PayloadFilter(result, fieldName);
      return result;
    }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.TokenStream

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.