Package org.apache.lucene.analysis.tokenattributes

Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute.buffer()


        try (TokenStream ts = analyzer.tokenStream("full", text)) {
            CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
            OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
            ts.reset();
            while(ts.incrementToken()) {
                SToken t=new SToken(term.buffer(),0,term.length(),offset.startOffset(),offset.endOffset());
                result.add(t);
            }
            ts.end();
        }
        return result.toArray(new SToken[result.size()]);
View Full Code Here


      // common case fast-path of first token not matching anything
      AttributeSource firstTok = nextTok();
      if (firstTok == null) return false;
      CharTermAttribute termAtt = firstTok.addAttribute(CharTermAttribute.class);
      SlowSynonymMap result = map.submap!=null ? map.submap.get(termAtt.buffer(), 0, termAtt.length()) : null;
      if (result == null) {
        copy(this, firstTok);
        return true;
      }
View Full Code Here

        // clone ourselves.
        if (tok == this)
          tok = cloneAttributes();
        // check for positionIncrement!=1?  if>1, should not match, if==0, check multiple at this level?
        CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class);
        SlowSynonymMap subMap = map.submap.get(termAtt.buffer(), 0, termAtt.length());

        if (subMap != null) {
          // recurse
          result = match(subMap);
        }
View Full Code Here

      @Override
      public int nextPosition() throws IOException {
        if (stream != null) {
          while (stream.incrementToken()) {
            for (int i = 0; i < matchers.length; i++) {
              if (matchers[i].run(charTermAtt.buffer(), 0, charTermAtt.length())) {
                currentStartOffset = offsetAtt.startOffset();
                currentEndOffset = offsetAtt.endOffset();
                currentMatch = i;
                return 0;
              }
View Full Code Here

          int end = reuse.offset + reuse.length;
          if (reuse.length > 0) {
            reuse.chars[end++] = SynonymMap.WORD_SEPARATOR;
            reuse.length++;
          }
          System.arraycopy(termAtt.buffer(), 0, reuse.chars, end, length);
          reuse.length += length;
        }
        ts.end();
      } catch (IOException e) {
        priorException = e;
View Full Code Here

      document = StringEscapeUtils.unescapeHtml(WikipediaDatasetCreatorMapper.CLOSE_TEXT_TAG_PATTERN.matcher(
          WikipediaDatasetCreatorMapper.OPEN_TEXT_TAG_PATTERN.matcher(document).replaceFirst("")).replaceAll(""));
      TokenStream stream = analyzer.tokenStream(catMatch, new StringReader(document));
      CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
      while (stream.incrementToken()) {
        contents.append(termAtt.buffer(), 0, termAtt.length()).append(' ');
      }
      context.write(
          new Text(WikipediaDatasetCreatorMapper.SPACE_NON_ALPHA_PATTERN.matcher(catMatch).replaceAll("_")),
          new Text(contents.toString()));
    }
View Full Code Here

    TokenStream stream = analyzer.tokenStream(key.toString(), new StringReader(value.toString()));
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
    StringTuple document = new StringTuple();
    while (stream.incrementToken()) {
      if (termAtt.length() > 0) {
        document.add(new String(termAtt.buffer(), 0, termAtt.length()));
      }
    }
    context.write(key, document);
  }
View Full Code Here

    CharTermAttribute termAtt = nptf.getAttribute(CharTermAttribute.class);
    TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
    PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
    nptf.reset();
    while (nptf.incrementToken()) {
      assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.buffer()[0]))));
      assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null);
      String type = payloadAtt.getPayload().utf8ToString();
      assertTrue(type + " is not equal to " + typeAtt.type(), type.equals(typeAtt.type()));
      count++;
    }
View Full Code Here

    CharTermAttribute attribute = stream.addAttribute( CharTermAttribute.class );
    stream.reset();

    while ( stream.incrementToken() ) {
      if ( attribute.length() > 0 ) {
        String term = new String( attribute.buffer(), 0, attribute.length() );
        terms.add( term );
      }
    }
    stream.end();
    stream.close();
View Full Code Here

        if (termsAtSamePosition == null) {
          termsAtSamePosition = new ArrayList<Term>();
          termsPerPosition.put( position, termsAtSamePosition  );
        }

        String termString = new String( termAttribute.buffer(), 0, termAttribute.length() );
        termsAtSamePosition.add( new Term( fieldName, termString ) );
        if ( termsAtSamePosition.size() > 1 ) {
          isMultiPhrase = true;
        }
      }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.