Examples of incrementToken()


Examples of org.apache.lucene.analysis.TokenStream.incrementToken()

    tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
    final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class);
    final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class);
    tk.reset();
    for (int i = minGram; i <= Math.min(codePointCount, maxGram); ++i) {
      assertTrue(tk.incrementToken());
      assertEquals(0, offsetAtt.startOffset());
      assertEquals(s.length(), offsetAtt.endOffset());
      final int end = Character.offsetByCodePoints(s, 0, i);
      assertEquals(s.substring(0, end), termAtt.toString());
    }
View Full Code Here

Examples of org.apache.lucene.analysis.TokenStream.incrementToken()

      assertEquals(0, offsetAtt.startOffset());
      assertEquals(s.length(), offsetAtt.endOffset());
      final int end = Character.offsetByCodePoints(s, 0, i);
      assertEquals(s.substring(0, end), termAtt.toString());
    }
    assertFalse(tk.incrementToken());
  }

}
View Full Code Here

Examples of org.apache.lucene.analysis.TokenStream.incrementToken()

   
    TermAttribute termAtt = (TermAttribute) result
        .addAttribute(TermAttribute.class);
    StringBuilder buf = new StringBuilder();
    try {
      while (result.incrementToken()) {
        String word = new String(termAtt.termBuffer(), 0, termAtt
            .termLength());
        buf.append(filter.encode(word)).append(" ");
       
      }
View Full Code Here

Examples of org.apache.lucene.analysis.Tokenizer.incrementToken()

      System.out.print("Text: ");
      String line = in.readLine();
      Tokenizer tokenizer = new NutchDocumentTokenizer(new StringReader(line));
      TermAttribute termAtt = tokenizer.getAttribute(TermAttribute.class);
      System.out.print("Tokens: ");
      while (tokenizer.incrementToken()) {
        System.out.print(termAtt.term());
        System.out.print(" ");
      }
      System.out.println();
    }
View Full Code Here

Examples of org.apache.lucene.analysis.ar.ArabicStemFilter.incrementToken()

    TokenStream tokenStream = new ArabicStemFilter(new ArabicNormalizationFilter(tokenizer));
    CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
    tokenStream.clearAttributes();
    StringBuilder stemmed = new StringBuilder();
    try {
      while (tokenStream.incrementToken()) {
        String curToken = termAtt.toString();
        if ( vocab != null && vocab.get(curToken) <= 0) {
          continue;
        }
        stemmed.append( curToken + " " );
View Full Code Here

Examples of org.apache.lucene.analysis.cn.smart.SentenceTokenizer.incrementToken()

        }
        if(!at.getSentences().hasNext()) { //no sentences  ... use this engine to detect
            //first the sentences
            TokenStream sentences = new SentenceTokenizer(new CharSequenceReader(at.getText()));
            try {
                while(sentences.incrementToken()){
                    OffsetAttribute offset = sentences.addAttribute(OffsetAttribute.class);
                    Sentence s = at.addSentence(offset.startOffset(), offset.endOffset());
                    if(log.isTraceEnabled()) {
                        log.trace("detected {}:{}",s,s.getSpan());
                    }
View Full Code Here

Examples of org.apache.lucene.analysis.cn.smart.WordTokenFilter.incrementToken()

        }
        //now the tokens
        TokenStream tokens = new WordTokenFilter(new AnalyzedTextSentenceTokenizer(at));
        try {
          tokens.reset();
            while(tokens.incrementToken()){
                OffsetAttribute offset = tokens.addAttribute(OffsetAttribute.class);
                Token t = at.addToken(offset.startOffset(), offset.endOffset());
                log.trace("detected {}",t);
            }
        } catch (IOException e) {
View Full Code Here

Examples of org.apache.lucene.analysis.core.KeywordTokenizer.incrementToken()

    final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class);
    final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class);
    tk.reset();
    for (int start = 0; start < codePointCount; ++start) {
      for (int end = start + minGram; end <= Math.min(codePointCount, start + maxGram); ++end) {
        assertTrue(tk.incrementToken());
        assertEquals(0, offsetAtt.startOffset());
        assertEquals(s.length(), offsetAtt.endOffset());
        final int startIndex = Character.offsetByCodePoints(s, 0, start);
        final int endIndex = Character.offsetByCodePoints(s, 0, end);
        assertEquals(s.substring(startIndex, endIndex), termAtt.toString());
View Full Code Here

Examples of org.apache.lucene.analysis.core.KeywordTokenizer.incrementToken()

        final int startIndex = Character.offsetByCodePoints(s, 0, start);
        final int endIndex = Character.offsetByCodePoints(s, 0, end);
        assertEquals(s.substring(startIndex, endIndex), termAtt.toString());
      }
    }
    assertFalse(tk.incrementToken());
  }

}
View Full Code Here

Examples of org.apache.lucene.analysis.core.KeywordTokenizer.incrementToken()

    tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
    final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class);
    final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class);
    tk.reset();
    for (int i = minGram; i <= Math.min(codePointCount, maxGram); ++i) {
      assertTrue(tk.incrementToken());
      assertEquals(0, offsetAtt.startOffset());
      assertEquals(s.length(), offsetAtt.endOffset());
      final int end = Character.offsetByCodePoints(s, 0, i);
      assertEquals(s.substring(0, end), termAtt.toString());
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.