Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute

Package org.apache.lucene.analysis.tokenattributes

Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute

org.apache.lucene.analysis.tokenattributes.CharTermAttribute
The term text of a Token.


  public static final Log log = LoggerFactory.make();


  public static List<String> tokenizedTermValues(Analyzer analyzer, String field, String text) throws IOException {
    TokenStream stream = analyzer.tokenStream( field, new StringReader( text ) );
    CharTermAttribute term = stream.addAttribute( CharTermAttribute.class );
    List<String> tokenList = new ArrayList<String>();
    while ( stream.incrementToken() ) {
      String s = new String( term.buffer(), 0, term.length() );
      tokenList.add( s );
    }
    return tokenList;
  }

View Full Code Here

    return tokenList;
  }


  public static Token[] tokensFromAnalysis(Analyzer analyzer, String field, String text) throws IOException {
    TokenStream stream = analyzer.tokenStream( field, new StringReader( text ) );
    CharTermAttribute term = stream.addAttribute( CharTermAttribute.class );
    List<Token> tokenList = new ArrayList<Token>();
    while ( stream.incrementToken() ) {
      Token token = new Token();
      token.copyBuffer( term.buffer(), 0, term.length() );
      tokenList.add( token );
    }


    return tokenList.toArray( new Token[tokenList.size()] );
  }

View Full Code Here

      record.put( "positions", fullPositions );
      return record;
    }
    else if (attr instanceof CharTermAttributeImpl) {
      GenericRecord record = new GenericData.Record( protocol.getType( "CharTermAttribute" ) );
      CharTermAttribute charAttr = (CharTermAttribute) attr;
      record.put( "sequence", charAttr.toString() );
      return record;
    }
    else if (attr instanceof PayloadAttribute) {
      GenericRecord record = new GenericData.Record( protocol.getType( "PayloadAttribute" ) );
      PayloadAttribute payloadAttr = (PayloadAttribute) attr;

View Full Code Here

    final String sentence = phraseContext.getSentence();
    try {
      Reader reader = new StringReader( sentence );
      stream = queryContext.getQueryAnalyzer().reusableTokenStream( fieldName, reader);


      CharTermAttribute termAttribute = stream.addAttribute( CharTermAttribute.class );
      PositionIncrementAttribute positionAttribute = stream.addAttribute( PositionIncrementAttribute.class );


      stream.reset();
      int position = -1; //start at -1 since we apply at least one increment
      List<Term> termsAtSamePosition = null;
      while ( stream.incrementToken() ) {
        int positionIncrement = 1;
        if ( positionAttribute != null ) {
          positionIncrement = positionAttribute.getPositionIncrement();
        }


        if ( positionIncrement > 0 ) {
          position += positionIncrement;
          termsAtSamePosition = termsPerPosition.get( position );
        }


        if ( termsAtSamePosition == null ) {
          termsAtSamePosition = new ArrayList<Term>();
          termsPerPosition.put( position, termsAtSamePosition );
        }


        String termString = new String( termAttribute.buffer(), 0, termAttribute.length() );
        termsAtSamePosition.add( new Term( fieldName, termString ) );
        if ( termsAtSamePosition.size() > 1 ) {
          isMultiPhrase = true;
        }
      }

View Full Code Here

          "Alternatively, apply the ignoreFieldBridge() option to " +
          "pass String parameters" );
    }
    Reader reader = new StringReader(localText);
    TokenStream stream = analyzer.reusableTokenStream( fieldName, reader);
    CharTermAttribute attribute = stream.addAttribute( CharTermAttribute.class );
    stream.reset();


    while ( stream.incrementToken() ) {
      if ( attribute.length() > 0 ) {
        String term = new String( attribute.buffer(), 0, attribute.length() );
        terms.add( term );
      }
    }
    stream.end();
    stream.close();

View Full Code Here


  public static final Log log = LoggerFactory.make();


  public static List<String> tokenizedTermValues(Analyzer analyzer, String field, String text) throws IOException {
    TokenStream stream = analyzer.tokenStream( field, new StringReader( text ) );
    CharTermAttribute term = stream.addAttribute( CharTermAttribute.class );
    List<String> tokenList = new ArrayList<String>();
    while ( stream.incrementToken() ) {
      String s = new String( term.buffer(), 0, term.length() );
      tokenList.add( s );
    }
    return tokenList;
  }

View Full Code Here

    return tokenList;
  }


  public static Token[] tokensFromAnalysis(Analyzer analyzer, String field, String text) throws IOException {
    TokenStream stream = analyzer.tokenStream( field, new StringReader( text ) );
    CharTermAttribute term = stream.addAttribute( CharTermAttribute.class );
    List<Token> tokenList = new ArrayList<Token>();
    while ( stream.incrementToken() ) {
      Token token = new Token();
      token.copyBuffer( term.buffer(), 0, term.length() );
      tokenList.add( token );
    }


    return tokenList.toArray( new Token[tokenList.size()] );
  }

View Full Code Here

      record.put( "positions", fullPositions );
      return record;
    }
    else if (attr instanceof CharTermAttributeImpl) {
      GenericRecord record = new GenericData.Record( protocol.getType( "CharTermAttribute" ) );
      CharTermAttribute charAttr = (CharTermAttribute) attr;
      record.put("sequence", charAttr.toString() );
      return record;
    }
    else if (attr instanceof PayloadAttribute) {
      GenericRecord record = new GenericData.Record( protocol.getType( "PayloadAttribute" ) );
      PayloadAttribute payloadAttr = (PayloadAttribute) attr;

View Full Code Here

  public void testDefaults() throws IOException {
    assertTrue(stop != null);
    TokenStream stream = stop.tokenStream("test", "This is a test of the english stop analyzer");
    try {
      assertTrue(stream != null);
      CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
      stream.reset();
    
      while (stream.incrementToken()) {
        assertFalse(inValidTokens.contains(termAtt.toString()));
      }
      stream.end();
    } finally {
      IOUtils.closeWhileHandlingException(stream);
    }

View Full Code Here

    CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false);
    StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
    TokenStream stream = newStop.tokenStream("test", "This is a good test of the english stop analyzer");
    try {
      assertNotNull(stream);
      CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
    
      stream.reset();
      while (stream.incrementToken()) {
        String text = termAtt.toString();
        assertFalse(stopWordsSet.contains(text));
      }
      stream.end();
    } finally {
      IOUtils.closeWhileHandlingException(stream);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.analysis.tokenattributes.CharTermAttribute

com.chenlb.mmseg4j.analysis.TokenUtils

com.code972.elasticsearch.rest.action.RestHebrewAnalyzerCheckWordAction

com.digitalpebble.behemoth.mahout.LuceneTokenizerMapper

com.gentics.cr.lucene.analysis.CustomPatternAnalyzerTest

com.ikanow.infinit.e.api.knowledge.SearchHandler

com.livingsocial.hive.udf.Tokenize

com.mozilla.grouperfish.lucene.analysis.en.NGramEnglishAnalyzer

com.mozilla.grouperfish.pig.eval.text.NGramTokenize

com.mozilla.grouperfish.pig.eval.text.Tokenize

ivory.core.tokenize.LuceneArabicAnalyzer

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.