Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute.buffer()

Package org.apache.lucene.analysis.tokenattributes

Class org.apache.lucene.analysis.tokenattributes.CharTermAttribute

Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute.buffer()

org.apache.lucene.analysis.tokenattributes.CharTermAttribute.buffer()
Returns the internal termBuffer character array which you can then directly alter. If the array is too small for your token, use {@link #resizeBuffer(int)} to increase it. Afteraltering the buffer be sure to call {@link #setLength} to record the number of validcharacters that were placed into the termBuffer.
NOTE: The returned buffer may be larger than the valid {@link #length()}.

          PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
          PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
          stream.reset();
          while (stream.incrementToken()) {
            Token token = new Token();
            token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
            token.setStartOffset(matcher.start());
            token.setEndOffset(matcher.end());
            token.setFlags(flagsAtt.getFlags());
            token.setType(typeAtt.type());
            token.setPayload(payloadAtt.getPayload());

View Full Code Here


      // common case fast-path of first token not matching anything
      AttributeSource firstTok = nextTok();
      if (firstTok == null) return false;
      CharTermAttribute termAtt = firstTok.addAttribute(CharTermAttribute.class);
      SynonymMap result = map.submap!=null ? map.submap.get(termAtt.buffer(), 0, termAtt.length()) : null;
      if (result == null) {
        copy(this, firstTok);
        return true;
      }

View Full Code Here

        // clone ourselves.
        if (tok == this)
          tok = cloneAttributes();
        // check for positionIncrement!=1?  if>1, should not match, if==0, check multiple at this level?
        CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class);
        SynonymMap subMap = map.submap.get(termAtt.buffer(), 0, termAtt.length());


        if (subMap != null) {
          // recurse
          result = match(subMap);
        }

View Full Code Here


            // common case fast-path of first token not matching anything
            AttributeSource firstTok = nextTok();
            if (firstTok == null) return false;
            CharTermAttribute termAtt = firstTok.addAttribute(CharTermAttribute.class);
            SynonymMap result = map.submap != null ? map.submap.get(termAtt.buffer(), 0, termAtt.length()) : null;
            if (result == null) {
                copy(this, firstTok);
                return true;
            }

View Full Code Here

                // clone ourselves.
                if (tok == this)
                    tok = cloneAttributes();
                // check for positionIncrement!=1?  if>1, should not match, if==0, check multiple at this level?
                CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class);
                SynonymMap subMap = map.submap.get(termAtt.buffer(), 0, termAtt.length());


                if (subMap != null) {
                    // recurse
                    result = match(subMap);
                }

View Full Code Here

      writer.write('\t'); // edit: Inorder to match Hadoop standard
      // TextInputFormat
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      ts.reset();
      while (ts.incrementToken()) {
        char[] termBuffer = termAtt.buffer();
        int termLen = termAtt.length();
        writer.write(termBuffer, 0, termLen);
        writer.write(' ');
      }
    } finally {

View Full Code Here

    
    List<String> coll = Lists.newArrayList();
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    ts.reset();
    while (ts.incrementToken()) {
      char[] termBuffer = termAtt.buffer();
      int termLen = termAtt.length();
      String val = new String(termBuffer, 0, termLen);
      coll.add(val);
    }
    return coll.toArray(new String[coll.size()]);

View Full Code Here

      StringBuilder contents = new StringBuilder(1000);
      TokenStream stream = analyzer.reusableTokenStream(catMatch, new StringReader(document));
      CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
      stream.reset();
      while (stream.incrementToken()) {
        contents.append(termAtt.buffer(), 0, termAtt.length()).append(' ');
      }
      context.write(
          new Text(SPACE_NON_ALPHA_PATTERN.matcher(catMatch).replaceAll("_")),
          new Text(contents.toString()));
    }

View Full Code Here

        Analyzer a = new HebrewQueryLightAnalyzer();
        TokenStream ts = a.tokenStream("foo", word);
        ts.reset();
        while (ts.incrementToken()) {
            CharTermAttribute cta = ts.getAttribute(CharTermAttribute.class);
            ret.add(new String(cta.buffer(), 0, cta.length()));
        }
        ts.close();
        a.close();
        return ret;
    }

View Full Code Here

        int end = reuse.offset + reuse.length;
        if (reuse.length > 0) {
          reuse.chars[end++] = SynonymMap.WORD_SEPARATOR;
          reuse.length++;
        }
        System.arraycopy(termAtt.buffer(), 0, reuse.chars, end, length);
        reuse.length += length;
      }
      ts.end();
      ts.close();
      if (reuse.length == 0) {

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.