Examples of addAttribute()


Examples of org.apache.lucene.analysis.PorterStemFilter.addAttribute()

  public TokenStream tokenStream(String fieldName, Reader reader) {
    final TokenStream result = new PorterStemFilter(new StopFilter(
        true, new StandardTokenizer(Version.LUCENE_CURRENT, reader),
        StandardAnalyzer.STOP_WORDS_SET));
   
    TermAttribute termAtt = (TermAttribute) result
        .addAttribute(TermAttribute.class);
    StringBuilder buf = new StringBuilder();
    try {
      while (result.incrementToken()) {
        String word = new String(termAtt.termBuffer(), 0, termAtt

Examples of org.apache.lucene.analysis.TeeSinkTokenFilter.addAttribute()

    TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(new StringReader(test))));
    SinkTokenStream sink = ttf.newSinkTokenStream(sinkFilter);
   
    boolean seenDogs = false;

    TermAttribute termAtt = ttf.addAttribute(TermAttribute.class);
    TypeAttribute typeAtt = ttf.addAttribute(TypeAttribute.class);
    ttf.reset();
    while (ttf.incrementToken()) {
      if (termAtt.term().equals("dogs")) {
        seenDogs = true;

Examples of org.apache.lucene.analysis.TokenStream.addAttribute()

          String asString = writer.toString();

          // Analyze the call
          TokenStream stream = nestedAnalyzer.tokenStream(fieldName,
                  new StringReader(asString));
          TermAttribute termAtt = (TermAttribute) stream.addAttribute(TermAttribute.class);

          System.out.println("Tokens for '" + asString + "':");
          while (stream.incrementToken()) {
            System.out.println(" '" + termAtt.term() + "'");
          }

Examples of org.apache.lucene.analysis.Tokenizer.addAttribute()

        boolean foundWildcard = false;
        //Lucene tokenizer are really low level ...
        try {
            while(tokenizer.incrementToken()){
                //only interested in the start/end indexes of tokens
                OffsetAttribute offset = tokenizer.addAttribute(OffsetAttribute.class);
                if(lastAdded < 0){ //rest with this token
                    lastAdded = offset.startOffset();
                }
                if(foundWildcard){ //wildcard present in the current token
                    //two cases: "wildcar? at the end", "wild?ard within the word"

Examples of org.apache.lucene.analysis.WhitespaceTokenizer.addAttribute()

    final String input = "How the s a brown s cow d like A B thing?";
    WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
    CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
    CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
   
    CharTermAttribute term = wt.addAttribute(CharTermAttribute.class);
    assertTrue(nsf.incrementToken());
    assertEquals("How_the", term.toString());
    assertTrue(nsf.incrementToken());
    assertEquals("the_s", term.toString());
   

Examples of org.apache.lucene.analysis.cn.smart.SentenceTokenizer.addAttribute()

        if(!at.getSentences().hasNext()) { //no sentences  ... use this engine to detect
            //first the sentences
            TokenStream sentences = new SentenceTokenizer(new CharSequenceReader(at.getText()));
            try {
                while(sentences.incrementToken()){
                    OffsetAttribute offset = sentences.addAttribute(OffsetAttribute.class);
                    Sentence s = at.addSentence(offset.startOffset(), offset.endOffset());
                    if(log.isTraceEnabled()) {
                        log.trace("detected {}:{}",s,s.getSpan());
                    }
                }

Examples of org.apache.lucene.analysis.cn.smart.WordTokenFilter.addAttribute()

        //now the tokens
        TokenStream tokens = new WordTokenFilter(new AnalyzedTextSentenceTokenizer(at));
        try {
          tokens.reset();
            while(tokens.incrementToken()){
                OffsetAttribute offset = tokens.addAttribute(OffsetAttribute.class);
                Token t = at.addToken(offset.startOffset(), offset.endOffset());
                log.trace("detected {}",t);
            }
        } catch (IOException e) {
            String message = String.format("IOException while reading from "

Examples of org.apache.lucene.analysis.core.KeywordTokenizer.addAttribute()

    final int codePointCount = s.codePointCount(0, s.length());
    final int minGram = _TestUtil.nextInt(random(), 1, 3);
    final int maxGram = _TestUtil.nextInt(random(), minGram, 10);
    TokenStream tk = new KeywordTokenizer(new StringReader(s));
    tk = new NGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
    final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class);
    final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class);
    tk.reset();
    for (int start = 0; start < codePointCount; ++start) {
      for (int end = start + minGram; end <= Math.min(codePointCount, start + maxGram); ++end) {
        assertTrue(tk.incrementToken());

Examples of org.apache.lucene.analysis.core.WhitespaceTokenizer.addAttribute()

    final String input = "How the s a brown s cow d like A B thing?";
    WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
    CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
    CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
   
    CharTermAttribute term = wt.addAttribute(CharTermAttribute.class);
    nsf.reset();
    assertTrue(nsf.incrementToken());
    assertEquals("How_the", term.toString());
    assertTrue(nsf.incrementToken());
    assertEquals("the_s", term.toString());

Examples of org.apache.lucene.analysis.icu.segmentation.ICUTokenizer.addAttribute()

        //Lucene tokenizer are really low level ...
        try {
          tokenizer.reset(); //starting with Solr4 reset MUST BE called before using
            while(tokenizer.incrementToken()){
                //only interested in the start/end indexes of tokens
                OffsetAttribute offset = tokenizer.addAttribute(OffsetAttribute.class);
                if(lastAdded < 0){ //rest with this token
                    lastAdded = offset.startOffset();
                }
                if(foundWildcard){ //wildcard present in the current token
                    //two cases: "wildcar? at the end", "wild?ard within the word"
TOP
Copyright © 2015 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.