Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.WhitespaceTokenizer


        tokenBuilder.append((String) groupIter.next());
        tokenBuilder.append(" ");
      }
   
      //doc.add(new Field("groups", new IteratorTokenStream(groupIter)));
      doc.add(new Field("groups", new WhitespaceTokenizer(new StringReader(tokenBuilder.toString()))));
    }

    // Add the URL of the document
    doc.add(new Field("url", url, Field.Store.YES, Field.Index.NOT_ANALYZED));
   
    // Add the file name (without protocol, drive-letter and path)
    String filenameWithVariants = RegainToolkit.urlToWhitespacedFileName(url);
    doc.add(new Field("filename", new WhitespaceTokenizer(new StringReader(filenameWithVariants))));
    PathFilenamePair pfPair = RegainToolkit.fragmentUrl(url);

    // Add the filename field for sorting
    doc.add(new Field("filename_sort", pfPair.getFilename(), Field.Store.YES, Field.Index.NOT_ANALYZED));
View Full Code Here


   * @see org.apache.lucene.analysis.KeywordAnalyzer#tokenStream(java.lang.String,
   * java.io.Reader)
   */
  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new WhitespaceTokenizer(reader);
    result = new LowerCaseFilter(result);

    return result;
  }
View Full Code Here

  private TokenStream luceneTokenStream(String text, boolean letters, boolean toLowerCase, Set stopWords) {
    TokenStream stream;
    if (letters)
      stream = new LetterTokenizer(new StringReader(text));
    else
      stream = new WhitespaceTokenizer(new StringReader(text));
    if (toLowerCasestream = new LowerCaseFilter(stream);
    if (stopWords != null) stream = new StopFilter(stream, stopWords);
    return stream;           
  }
View Full Code Here

  public void testPositionIncrementGap() throws IOException {
    Analyzer analyzer = new Analyzer() {
      @Override
      public TokenStream tokenStream(String fieldName, Reader reader) {
        return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
      }

      @Override
      public int getPositionIncrementGap(String fieldName) {
        return 500;
View Full Code Here

  public void testTokenReuse() throws IOException {
    Analyzer analyzer = new Analyzer() {
      @Override
      public TokenStream tokenStream(String fieldName, Reader reader) {
        return new TokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader)) {
          boolean first=true;
          AttributeSource.State state;

          @Override
          public boolean incrementToken() throws IOException {
View Full Code Here

      super(matchVersion, new WhitespaceAnalyzer());
    }
   
    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      return new WhitespaceTokenizer(reader);
    }   
View Full Code Here

  private class NonreusableAnalyzer extends Analyzer {
    int invocationCount = 0;
    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      if (++invocationCount % 2 == 0)
        return new WhitespaceTokenizer(reader);
      else
        return new LetterTokenizer(reader);
    }
View Full Code Here

    public DutchSubclassAnalyzer(Version matchVersion) {
      super(matchVersion);
    }
    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      return new WhitespaceTokenizer(reader);
    }
View Full Code Here

          public ThaiSubclassAnalyzer(Version matchVersion) {
            super(matchVersion);
          }
    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      return new WhitespaceTokenizer(reader);
    }
View Full Code Here

        }
       
        @Override
        public TokenStream tokenStream(String fieldName, Reader reader) {
            PayloadData payload = (PayloadData) fieldToData.get(fieldName);
            TokenStream ts = new WhitespaceTokenizer(reader);
            if (payload != null) {
                if (payload.numFieldInstancesToSkip == 0) {
                    ts = new PayloadFilter(ts, payload.data, payload.offset, payload.length);
                } else {
                    payload.numFieldInstancesToSkip--;
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.WhitespaceTokenizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.