Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute.buffer()

Package org.apache.lucene.analysis.tokenattributes

Class org.apache.lucene.analysis.tokenattributes.CharTermAttribute

Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute.buffer()

org.apache.lucene.analysis.tokenattributes.CharTermAttribute.buffer()
Returns the internal termBuffer character array which you can then directly alter. If the array is too small for your token, use {@link #resizeBuffer(int)} to increase it. Afteraltering the buffer be sure to call {@link #setLength} to record the number of validcharacters that were placed into the termBuffer.
NOTE: The returned buffer may be larger than the valid {@link #length()}.

    try {
      CharTermAttribute attribute = stream.addAttribute( CharTermAttribute.class );
      stream.reset();
      while ( stream.incrementToken() ) {
        if ( attribute.length() > 0 ) {
          String term = new String( attribute.buffer(), 0, attribute.length() );
          terms.add( term );
        }
      }
      stream.end();
    }

View Full Code Here

        if ( termsAtSamePosition == null ) {
          termsAtSamePosition = new ArrayList<Term>();
          termsPerPosition.put( position, termsAtSamePosition );
        }


        String termString = new String( termAttribute.buffer(), 0, termAttribute.length() );
        termsAtSamePosition.add( new Term( fieldName, termString ) );
        if ( termsAtSamePosition.size() > 1 ) {
          isMultiPhrase = true;
        }
      }

View Full Code Here

        int end = reuse.offset + reuse.length;
        if (reuse.length > 0) {
          reuse.chars[end++] = SynonymMap.WORD_SEPARATOR;
          reuse.length++;
        }
        System.arraycopy(termAtt.buffer(), 0, reuse.chars, end, length);
        reuse.length += length;
      }
      ts.end();
      ts.close();
      if (reuse.length == 0) {

View Full Code Here

                CharTermAttribute termAtt = tokenizer
                        .getAttribute(CharTermAttribute.class);
                OffsetAttribute offsetAtt = tokenizer
                        .getAttribute(OffsetAttribute.class);
                if (tokenizer.incrementToken()) {
                    String word = new String(termAtt.buffer(), 0,
                            termAtt.length());
                    int start = offsetAtt.startOffset();
                    int end = offsetAtt.endOffset();
                    if (prevWord == null) {
                        prevWord = word;

View Full Code Here

                new StringReader(input));


        CharTermAttribute termAtt = tokenizer
                .getAttribute(CharTermAttribute.class);
        while (tokenizer.incrementToken()) {
            String word = new String(termAtt.buffer(), 0, termAtt.length());
            sb.append(word).append("|");
        }
        System.out.println(input + " => " + sb.toString());
    }

View Full Code Here

                .getAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAtt = tokenizer
                .getAttribute(OffsetAttribute.class);
        String i = "";
        while (tokenizer.incrementToken()) {
            String word = new String(termAtt.buffer(), 0, termAtt.length());
            // int s = offsetAtt.startOffset();
            // int e = offsetAtt.endOffset();
            i = word;
        }
        if (print) {

View Full Code Here

                            .getAttribute(CharTermAttribute.class);
                    OffsetAttribute offsetAtt = tokenizer
                            .getAttribute(OffsetAttribute.class);
                    String w = "";
                    while (tokenizer.incrementToken()) {
                        String word = new String(termAtt.buffer(), 0, termAtt.length());
                        // int s = offsetAtt.startOffset();
                        // int e = offsetAtt.endOffset();
                        w = word;
                    }
                    long segs = System.currentTimeMillis() - start;

View Full Code Here

    reusableToken.clear();
    if(termAtt != null) {
      //lucene 3.0
      //reusableToken.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
      //lucene 3.1
      reusableToken.copyBuffer(termAtt.buffer(), 0, termAtt.length());
    }
    if(offsetAtt != null) {
      reusableToken.setStartOffset(offsetAtt.startOffset());
      reusableToken.setEndOffset(offsetAtt.endOffset());
    }

View Full Code Here

        if ( termsAtSamePosition == null ) {
          termsAtSamePosition = new ArrayList<Term>();
          termsPerPosition.put( position, termsAtSamePosition );
        }


        String termString = new String( termAttribute.buffer(), 0, termAttribute.length() );
        termsAtSamePosition.add( new Term( fieldName, termString ) );
        if ( termsAtSamePosition.size() > 1 ) {
          isMultiPhrase = true;
        }
      }

View Full Code Here

        ts.reset();


        Set<String> terms = new HashSet<>();
        while (ts.incrementToken()) {
            CharTermAttribute att = ts.getAttribute(CharTermAttribute.class);
            terms.add(new String(att.buffer(), 0, att.length()));
            //System.out.println(new String(att.buffer(), 0, att.length()));
        }
    }


    @SuppressWarnings("StatementWithEmptyBody")

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.