Package org.apache.lucene.analysis.standard

Examples of org.apache.lucene.analysis.standard.StandardTokenizer


    public TokenStream reusableTokenStream(String fieldName, Reader reader)
      throws IOException {
      SavedStreams streams = (SavedStreams) getPreviousTokenStream();
      if (streams == null) {
        streams = new SavedStreams();
        streams.source = new StandardTokenizer(matchVersion, reader);
        streams.result = new GreekLowerCaseFilter(streams.source);
        streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                                        streams.result, stopSet);
        setPreviousTokenStream(streams);
      } else {
View Full Code Here


   * @return  A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
   *       {@link StandardFilter}, {@link LowerCaseFilter}, and {@link StopFilter}
   */
  @Override
  public final TokenStream tokenStream( String fieldName, Reader reader ) {
                TokenStream result = new StandardTokenizer( matchVersion, reader );
    result = new StandardFilter( result );
    result = new LowerCaseFilter( result );
    result = new StopFilter( StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                                         result, stoptable );
    return result;
View Full Code Here

  public TokenStream reusableTokenStream(String fieldName, Reader reader)
      throws IOException {
      SavedStreams streams = (SavedStreams) getPreviousTokenStream();
      if (streams == null) {
        streams = new SavedStreams();
        streams.source = new StandardTokenizer(matchVersion, reader);
        streams.result = new StandardFilter(streams.source);
        streams.result = new LowerCaseFilter(streams.result);
        streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                                        streams.result, stoptable);
        setPreviousTokenStream(streams);
View Full Code Here

    public MultiAnalyzer() {
    }

    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
      result = new TestFilter(result);
      result = new LowerCaseFilter(result);
      return result;
    }
View Full Code Here

    public PosIncrementAnalyzer() {
    }

    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
      result = new TestPosIncrementFilter(result);
      result = new LowerCaseFilter(result);
      return result;
    }
View Full Code Here

      System.out.println("-----Tokens: " + tokCount[k] + "-----");
      for (int i = 0; i < tokCount[k]; i++) {
        buffer.append(English.intToEnglish(i).toUpperCase()).append(' ');
      }
      //make sure we produce the same tokens
      TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(buffer.toString()))));
      TokenStream sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(100));
      teeStream.consumeAllTokens();
      TokenStream stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(buffer.toString()))), 100);
      TermAttribute tfTok = stream.addAttribute(TermAttribute.class);
      TermAttribute sinkTok = sink.addAttribute(TermAttribute.class);
      for (int i=0; stream.incrementToken(); i++) {
        assertTrue(sink.incrementToken());
        assertTrue(tfTok + " is not equal to " + sinkTok + " at token: " + i, tfTok.equals(sinkTok) == true);
      }
     
      //simulate two fields, each being analyzed once, for 20 documents
      for (int j = 0; j < modCounts.length; j++) {
        int tfPos = 0;
        long start = System.currentTimeMillis();
        for (int i = 0; i < 20; i++) {
          stream = new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(buffer.toString())));
          PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
          while (stream.incrementToken()) {
            tfPos += posIncrAtt.getPositionIncrement();
          }
          stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(buffer.toString()))), modCounts[j]);
          posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
          while (stream.incrementToken()) {
            tfPos += posIncrAtt.getPositionIncrement();
          }
        }
        long finish = System.currentTimeMillis();
        System.out.println("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms");
        int sinkPos = 0;
        //simulate one field with one sink
        start = System.currentTimeMillis();
        for (int i = 0; i < 20; i++) {
          teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(buffer.toString()))));
          sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(modCounts[j]));
          PositionIncrementAttribute posIncrAtt = teeStream.getAttribute(PositionIncrementAttribute.class);
          while (teeStream.incrementToken()) {
            sinkPos += posIncrAtt.getPositionIncrement();
          }
View Full Code Here

  public ASCIIAnalyzer() {
  }

  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
    result = new StandardFilter(result);
    result = new ASCIIFoldingFilter(result);
    result = new LowerCaseFilter(result);
    return result;
  }
View Full Code Here

    this.matchVersion = matchVersion;
  }
 
  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream ts = new StandardTokenizer(matchVersion, reader);
    ts = new StandardFilter(ts);
    ts = new ThaiWordFilter(ts);
    ts = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                        ts, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
    return ts;
View Full Code Here

    }
   
    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
      streams = new SavedStreams();
      streams.source = new StandardTokenizer(matchVersion, reader);
      streams.result = new StandardFilter(streams.source);
      streams.result = new ThaiWordFilter(streams.result);
      streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                                      streams.result, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
      setPreviousTokenStream(streams);
View Full Code Here

    }
   
    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
      streams = new SavedStreams();
      streams.source = new StandardTokenizer(matchVersion, reader);
      streams.result = new StandardFilter(streams.source);
      streams.result = new LowerCaseFilter(streams.result);
      streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                                      streams.result, stopSet);
      streams.result = new GermanStemFilter(streams.result, exclusionSet);
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.standard.StandardTokenizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.