Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute.buffer()

Package org.apache.lucene.analysis.tokenattributes

Class org.apache.lucene.analysis.tokenattributes.CharTermAttribute

Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute.buffer()

org.apache.lucene.analysis.tokenattributes.CharTermAttribute.buffer()
Returns the internal termBuffer character array which you can then directly alter. If the array is too small for your token, use {@link #resizeBuffer(int)} to increase it. Afteraltering the buffer be sure to call {@link #setLength} to record the number of validcharacters that were placed into the termBuffer.
NOTE: The returned buffer may be larger than the valid {@link #length()}.

        if ( termsAtSamePosition == null ) {
          termsAtSamePosition = new ArrayList<Term>();
          termsPerPosition.put( position, termsAtSamePosition );
        }


        String termString = new String( termAttribute.buffer(), 0, termAttribute.length() );
        termsAtSamePosition.add( new Term( fieldName, termString ) );
        if ( termsAtSamePosition.size() > 1 ) {
          isMultiPhrase = true;
        }
      }

View Full Code Here

    CharTermAttribute attribute = stream.addAttribute( CharTermAttribute.class );
    stream.reset();


    while ( stream.incrementToken() ) {
      if ( attribute.length() > 0 ) {
        String term = new String( attribute.buffer(), 0, attribute.length() );
        terms.add( term );
      }
    }
    stream.end();
    stream.close();

View Full Code Here

  public static List<String> tokenizedTermValues(Analyzer analyzer, String field, String text) throws IOException {
    TokenStream stream = analyzer.tokenStream( field, new StringReader( text ) );
    CharTermAttribute term = stream.addAttribute( CharTermAttribute.class );
    List<String> tokenList = new ArrayList<String>();
    while ( stream.incrementToken() ) {
      String s = new String( term.buffer(), 0, term.length() );
      tokenList.add( s );
    }
    return tokenList;
  }

View Full Code Here

    TokenStream stream = analyzer.tokenStream( field, new StringReader( text ) );
    CharTermAttribute term = stream.addAttribute( CharTermAttribute.class );
    List<Token> tokenList = new ArrayList<Token>();
    while ( stream.incrementToken() ) {
      Token token = new Token();
      token.copyBuffer( term.buffer(), 0, term.length() );
      tokenList.add( token );
    }


    return tokenList.toArray( new Token[tokenList.size()] );
  }

View Full Code Here

    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
    stream.reset();
    StringTuple document = new StringTuple();
    while (stream.incrementToken()) {
      if (termAtt.length() > 0) {
        document.add(new String(termAtt.buffer(), 0, termAtt.length()));
      }
    }
    stream.end();
    Closeables.close(stream, true);
    context.write(key, document);

View Full Code Here

    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
    StringTuple document = new StringTuple();
    stream.reset();
    while (stream.incrementToken()) {
      if (termAtt.length() > 0) {
        document.add(new String(termAtt.buffer(), 0, termAtt.length()));
      }
    }
    context.write(key, document);
  }

View Full Code Here

      StringBuilder contents = new StringBuilder(1000);
      TokenStream stream = analyzer.reusableTokenStream(catMatch, new StringReader(document));
      CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
      stream.reset();
      while (stream.incrementToken()) {
        contents.append(termAtt.buffer(), 0, termAtt.length()).append(' ');
      }
      context.write(
          new Text(SPACE_NON_ALPHA_PATTERN.matcher(catMatch).replaceAll("_")),
          new Text(contents.toString()));
    }

View Full Code Here

        int end = reuse.offset + reuse.length;
        if (reuse.length > 0) {
          reuse.chars[end++] = SynonymMap.WORD_SEPARATOR;
          reuse.length++;
        }
        System.arraycopy(termAtt.buffer(), 0, reuse.chars, end, length);
        reuse.length += length;
      }
      ts.end();
      ts.close();
      if (reuse.length == 0) {

View Full Code Here

        Analyzer a = new HebrewQueryLightAnalyzer();
        TokenStream ts = a.tokenStream("foo", word);
        ts.reset();
        while (ts.incrementToken()) {
            CharTermAttribute cta = ts.getAttribute(CharTermAttribute.class);
            ret.add(new String(cta.buffer(), 0, cta.length()));
        }
        ts.close();
        a.close();
        return ret;
    }

View Full Code Here

                builder.startArray("lemmas");
                Analyzer a = new HebrewQueryLightAnalyzer();
                TokenStream ts = a.tokenStream("foo", word);
                while (ts.incrementToken()) {
                    CharTermAttribute cta = ts.getAttribute(CharTermAttribute.class);
                    builder.value(new String(cta.buffer(), 0, cta.length()));
                }
                a.close();
                builder.endArray();
            }
            builder.endObject();

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.