Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute.buffer()

Package org.apache.lucene.analysis.tokenattributes

Class org.apache.lucene.analysis.tokenattributes.CharTermAttribute

Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute.buffer()

org.apache.lucene.analysis.tokenattributes.CharTermAttribute.buffer()
Returns the internal termBuffer character array which you can then directly alter. If the array is too small for your token, use {@link #resizeBuffer(int)} to increase it. Afteraltering the buffer be sure to call {@link #setLength} to record the number of validcharacters that were placed into the termBuffer.
NOTE: The returned buffer may be larger than the valid {@link #length()}.

    CharTermAttribute termAtt = nptf.getAttribute(CharTermAttribute.class);
    TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
    PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
    nptf.reset();
    while (nptf.incrementToken()) {
      assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.buffer()[0]))));
      assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null);
      String type = new String(payloadAtt.getPayload().getData(), "UTF-8");
      assertTrue(type + " is not equal to " + typeAtt.type(), type.equals(typeAtt.type()) == true);
      count++;
    }

View Full Code Here

        int end = reuse.offset + reuse.length;
        if (reuse.length > 0) {
          reuse.chars[end++] = SynonymMap.WORD_SEPARATOR;
          reuse.length++;
        }
        System.arraycopy(termAtt.buffer(), 0, reuse.chars, end, length);
        reuse.length += length;
      }
      ts.end();
      ts.close();
      if (reuse.length == 0) {

View Full Code Here

      @Override
      public int nextPosition() throws IOException {
        if (stream != null) {
          while (stream.incrementToken()) {
            for (int i = 0; i < matchers.length; i++) {
              if (matchers[i].run(charTermAtt.buffer(), 0, charTermAtt.length())) {
                currentStartOffset = offsetAtt.startOffset();
                currentEndOffset = offsetAtt.endOffset();
                currentMatch = i;
                return 0;
              }

View Full Code Here

          int end = reuse.offset + reuse.length;
          if (reuse.length > 0) {
            reuse.chars[end++] = SynonymMap.WORD_SEPARATOR;
            reuse.length++;
          }
          System.arraycopy(termAtt.buffer(), 0, reuse.chars, end, length);
          reuse.length += length;
        }
        ts.end();
      }
      if (reuse.length == 0) {

View Full Code Here

    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
    stream.reset();
    StringTuple document = new StringTuple();
    while (stream.incrementToken()) {
      if (termAtt.length() > 0) {
        document.add(new String(termAtt.buffer(), 0, termAtt.length()));
      }
    }
    stream.end();
    Closeables.close(stream, true);
    context.write(key, document);

View Full Code Here

      StringBuilder contents = new StringBuilder(1000);
      TokenStream stream = analyzer.tokenStream(catMatch, new StringReader(document));
      CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
      stream.reset();
      while (stream.incrementToken()) {
        contents.append(termAtt.buffer(), 0, termAtt.length()).append(' ');
      }
      context.write(
          new Text(SPACE_NON_ALPHA_PATTERN.matcher(catMatch).replaceAll("_")),
          new Text(contents.toString()));
      stream.end();

View Full Code Here

        int end = reuse.offset + reuse.length;
        if (reuse.length > 0) {
          reuse.chars[end++] = SynonymMap.WORD_SEPARATOR;
          reuse.length++;
        }
        System.arraycopy(termAtt.buffer(), 0, reuse.chars, end, length);
        reuse.length += length;
      }
      ts.end();
      ts.close();
      if (reuse.length == 0) {

View Full Code Here

                           //t = field.tokenStreamValue().next(t);
                           field.tokenStreamValue().incrementToken();
                           CharTermAttribute term = field.tokenStreamValue().getAttribute(CharTermAttribute.class);
                           PayloadAttribute payload = field.tokenStreamValue().getAttribute(PayloadAttribute.class);


                           String value = new String(term.buffer(), 0, term.length());


                           if (value.startsWith(namePrefix))
                           {
                              // extract value
                              value = value.substring(namePrefix.length());

View Full Code Here

    final TokenStream stream = analyzer.tokenStream( field, new StringReader( text ) );
    try {
      CharTermAttribute term = stream.addAttribute( CharTermAttribute.class );
      stream.reset();
      while ( stream.incrementToken() ) {
        String s = new String( term.buffer(), 0, term.length() );
        tokenList.add( s );
      }
      stream.end();
    }
    finally {

View Full Code Here

    try {
      CharTermAttribute term = stream.addAttribute( CharTermAttribute.class );
      stream.reset();
      while ( stream.incrementToken() ) {
        Token token = new Token();
        token.copyBuffer( term.buffer(), 0, term.length() );
        tokenList.add( token );
      }
      stream.end();
    }
    finally {

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.