Package org.apache.lucene.analysis.tokenattributes

Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute


  public static final Log log = LoggerFactory.make();

  public static List<String> tokenizedTermValues(Analyzer analyzer, String field, String text) throws IOException {
    TokenStream stream = analyzer.tokenStream( field, new StringReader( text ) );
    CharTermAttribute term = stream.addAttribute( CharTermAttribute.class );
    List<String> tokenList = new ArrayList<String>();
    while ( stream.incrementToken() ) {
      String s = new String( term.buffer(), 0, term.length() );
      tokenList.add( s );
    }
    return tokenList;
  }
View Full Code Here


    return tokenList;
  }

  public static Token[] tokensFromAnalysis(Analyzer analyzer, String field, String text) throws IOException {
    TokenStream stream = analyzer.tokenStream( field, new StringReader( text ) );
    CharTermAttribute term = stream.addAttribute( CharTermAttribute.class );
    List<Token> tokenList = new ArrayList<Token>();
    while ( stream.incrementToken() ) {
      Token token = new Token();
      token.copyBuffer( term.buffer(), 0, term.length() );
      tokenList.add( token );
    }

    return tokenList.toArray( new Token[tokenList.size()] );
  }
View Full Code Here

      record.put( "positions", fullPositions );
      return record;
    }
    else if (attr instanceof CharTermAttributeImpl) {
      GenericRecord record = new GenericData.Record( protocol.getType( "CharTermAttribute" ) );
      CharTermAttribute charAttr = (CharTermAttribute) attr;
      record.put( "sequence", charAttr.toString() );
      return record;
    }
    else if (attr instanceof PayloadAttribute) {
      GenericRecord record = new GenericData.Record( protocol.getType( "PayloadAttribute" ) );
      PayloadAttribute payloadAttr = (PayloadAttribute) attr;
View Full Code Here

    final String sentence = phraseContext.getSentence();
    try {
      Reader reader = new StringReader( sentence );
      stream = queryContext.getQueryAnalyzer().reusableTokenStream( fieldName, reader);

      CharTermAttribute termAttribute = stream.addAttribute( CharTermAttribute.class );
      PositionIncrementAttribute positionAttribute = stream.addAttribute( PositionIncrementAttribute.class );

      stream.reset();
      int position = -1; //start at -1 since we apply at least one increment
      List<Term> termsAtSamePosition = null;
      while ( stream.incrementToken() ) {
        int positionIncrement = 1;
        if ( positionAttribute != null ) {
          positionIncrement = positionAttribute.getPositionIncrement();
        }

        if ( positionIncrement > 0 ) {
          position += positionIncrement;
          termsAtSamePosition = termsPerPosition.get( position );
        }

        if ( termsAtSamePosition == null ) {
          termsAtSamePosition = new ArrayList<Term>();
          termsPerPosition.put( position, termsAtSamePosition );
        }

        String termString = new String( termAttribute.buffer(), 0, termAttribute.length() );
        termsAtSamePosition.add( new Term( fieldName, termString ) );
        if ( termsAtSamePosition.size() > 1 ) {
          isMultiPhrase = true;
        }
      }
View Full Code Here

          "Alternatively, apply the ignoreFieldBridge() option to " +
          "pass String parameters" );
    }
    Reader reader = new StringReader(localText);
    TokenStream stream = analyzer.reusableTokenStream( fieldName, reader);
    CharTermAttribute attribute = stream.addAttribute( CharTermAttribute.class );
    stream.reset();

    while ( stream.incrementToken() ) {
      if ( attribute.length() > 0 ) {
        String term = new String( attribute.buffer(), 0, attribute.length() );
        terms.add( term );
      }
    }
    stream.end();
    stream.close();
View Full Code Here

  public static final Log log = LoggerFactory.make();

  public static List<String> tokenizedTermValues(Analyzer analyzer, String field, String text) throws IOException {
    TokenStream stream = analyzer.tokenStream( field, new StringReader( text ) );
    CharTermAttribute term = stream.addAttribute( CharTermAttribute.class );
    List<String> tokenList = new ArrayList<String>();
    while ( stream.incrementToken() ) {
      String s = new String( term.buffer(), 0, term.length() );
      tokenList.add( s );
    }
    return tokenList;
  }
View Full Code Here

    return tokenList;
  }

  public static Token[] tokensFromAnalysis(Analyzer analyzer, String field, String text) throws IOException {
    TokenStream stream = analyzer.tokenStream( field, new StringReader( text ) );
    CharTermAttribute term = stream.addAttribute( CharTermAttribute.class );
    List<Token> tokenList = new ArrayList<Token>();
    while ( stream.incrementToken() ) {
      Token token = new Token();
      token.copyBuffer( term.buffer(), 0, term.length() );
      tokenList.add( token );
    }

    return tokenList.toArray( new Token[tokenList.size()] );
  }
View Full Code Here

      record.put( "positions", fullPositions );
      return record;
    }
    else if (attr instanceof CharTermAttributeImpl) {
      GenericRecord record = new GenericData.Record( protocol.getType( "CharTermAttribute" ) );
      CharTermAttribute charAttr = (CharTermAttribute) attr;
      record.put("sequence", charAttr.toString() );
      return record;
    }
    else if (attr instanceof PayloadAttribute) {
      GenericRecord record = new GenericData.Record( protocol.getType( "PayloadAttribute" ) );
      PayloadAttribute payloadAttr = (PayloadAttribute) attr;
View Full Code Here

  public void testDefaults() throws IOException {
    assertTrue(stop != null);
    TokenStream stream = stop.tokenStream("test", "This is a test of the english stop analyzer");
    try {
      assertTrue(stream != null);
      CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
      stream.reset();
   
      while (stream.incrementToken()) {
        assertFalse(inValidTokens.contains(termAtt.toString()));
      }
      stream.end();
    } finally {
      IOUtils.closeWhileHandlingException(stream);
    }
View Full Code Here

    CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false);
    StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
    TokenStream stream = newStop.tokenStream("test", "This is a good test of the english stop analyzer");
    try {
      assertNotNull(stream);
      CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
   
      stream.reset();
      while (stream.incrementToken()) {
        String text = termAtt.toString();
        assertFalse(stopWordsSet.contains(text));
      }
      stream.end();
    } finally {
      IOUtils.closeWhileHandlingException(stream);
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.tokenattributes.CharTermAttribute

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.