Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.TokenStream.addAttribute()


          String asString = writer.toString();

          // Analyze the call
          TokenStream stream = nestedAnalyzer.tokenStream(fieldName,
                  new StringReader(asString));
          TermAttribute termAtt = (TermAttribute) stream.addAttribute(TermAttribute.class);

          System.out.println("Tokens for '" + asString + "':");
          while (stream.incrementToken()) {
            System.out.println(" '" + termAtt.term() + "'");
          }
View Full Code Here


 
  protected Set<String> getHighlightWords(String searchString) {
    try {
      Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
      TokenStream stream = analyzer.tokenStream("content", new StringReader(searchString));
      TermAttribute termAtt = (TermAttribute) stream.addAttribute(TermAttribute.class);
      for (boolean next = stream.incrementToken(); next; next = stream.incrementToken()) {
        String term = termAtt.term();
        if(log.isDebug()) log.debug(term);
      }
    } catch (IOException e) {
View Full Code Here

      int wordsCount = 0;
     
      //collect token
      TokenStream ts = analyzer.tokenStream("", reader);
      ts.reset();
      TermAttribute termAtt = (TermAttribute) ts
          .addAttribute(TermAttribute.class);
      while (ts.incrementToken()) {
        if (printGate != null && printGate.filter(wordsCount)) {
          list.add(new CToken(termAtt.term(), wordsCount));
        }
View Full Code Here

        List<String> terms = new ArrayList<String>();
        try {
          boolean hasMoreTokens = false;
         
          stream.reset();
          TermAttribute termAtt = stream.addAttribute(TermAttribute.class);

          hasMoreTokens = stream.incrementToken();
          while (hasMoreTokens) {
            terms.add(termAtt.term());
            hasMoreTokens = stream.incrementToken();
View Full Code Here

    final String field = ( f == null) ? "contents" : f;
    if ( a == null) a = new StandardAnalyzer(Version.LUCENE_CURRENT);

    // [1] Parse query into separate words so that when we expand we can avoid dups
    TokenStream ts = a.tokenStream( field, new StringReader( query));
    TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
   
    while (ts.incrementToken()) {
      String word = termAtt.term();
      if ( already.add( word))
        top.add( word);
View Full Code Here

    final Set<String> already = new HashSet<String>(); // avoid dups   
    List<String> top = new LinkedList<String>(); // needs to be separately listed..

    // [1] Parse query into separate words so that when we expand we can avoid dups
    TokenStream ts = a.tokenStream( field, new StringReader( query));
    TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
   
    while (ts.incrementToken()) {
      String word = termAtt.term();
      if ( already.add( word))
        top.add( word);
View Full Code Here

              ("field must have either String or Reader value");

          int position = 0;
          // Tokenize field and add to postingTable
          TokenStream stream = analyzer.tokenStream(fieldName, reader);
          TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
          PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
         
          try {
            while (stream.incrementToken()) {
              position += (posIncrAtt.getPositionIncrement() - 1);
View Full Code Here

          int position = 0;
          // Tokenize field and add to postingTable
          TokenStream stream = analyzer.tokenStream(fieldName, reader);
          TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
          PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
         
          try {
            while (stream.incrementToken()) {
              position += (posIncrAtt.getPositionIncrement() - 1);
              position++;
View Full Code Here

    if (limit <= 0) limit = Integer.MAX_VALUE;
   
    // compute frequencies of distinct terms
    HashMap<String,MutableInteger> map = new HashMap<String,MutableInteger>();
    TokenStream stream = analyzer.tokenStream("", new StringReader(text));
    TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
    try {
      while (stream.incrementToken()) {
        MutableInteger freq = map.get(termAtt.term());
        if (freq == null) {
          freq = new MutableInteger(1);
View Full Code Here

    if (terms == null) {
      return;
    }
    TokenStream ts = analyzer.tokenStream(f.fieldName, f.queryString);
    try {
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);

      int corpusNumDocs = reader.numDocs();
      HashSet<String> processedTerms = new HashSet<String>();
      ts.reset();
      while (ts.incrementToken()) {
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.