Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.Token.termText()


          //
          // Now grab the hit-element, if present
          //
          Token t = tokens[j];
          if (highlight.contains(t.termText())) {
            excerpt.addToken(t.termText());
            excerpt.add(new Fragment(text.substring(offset, t.startOffset())));
            excerpt.add(new Highlight(text.substring(t.startOffset(),t.endOffset())));
            offset = t.endOffset();
            endToken = Math.min(j + sumContext, tokens.length);
          }
View Full Code Here


      String line = in.readLine();
      Tokenizer tokenizer = new NutchDocumentTokenizer(new StringReader(line));
      Token token;
      System.out.print("Tokens: ");
      while ((token = tokenizer.next()) != null) {
        System.out.print(token.termText());
        System.out.print(" ");
      }
      System.out.println();
    }
  }
View Full Code Here

          if (LOG.isWarnEnabled()) {
            LOG.warn("Line does not contain a field name: " + line);
          }
          continue;
        }
        String field = token.termText();
        token = ts.next();
        if (token == null) {
          if (LOG.isWarnEnabled()) {
            LOG.warn("Line contains only a field name, no word: " + line);
          }
View Full Code Here

          if (LOG.isWarnEnabled()) {
            LOG.warn("Line contains only a field name, no word: " + line);
          }
          continue;
        }
        String gram = token.termText();
        while ((token = ts.next()) != null) {
          gram = gram + SEPARATOR + token.termText();
        }
        HashSet table = (HashSet)commonTerms.get(field);
        if (table == null) {
View Full Code Here

          }
          continue;
        }
        String gram = token.termText();
        while ((token = ts.next()) != null) {
          gram = gram + SEPARATOR + token.termText();
        }
        HashSet table = (HashSet)commonTerms.get(field);
        if (table == null) {
          table = new HashSet();
          commonTerms.put(field, table);
View Full Code Here

      while ((token = ts.next()) != null) {
        if (token.getPositionIncrement() != 0 && prev != null)
          result.add(prev.termText());
        prev = token;
        position += token.getPositionIncrement();
        if ((position + arity(token.termText())) == phrase.getTerms().length)
          break;
      }
    } catch (IOException e) {
      throw new RuntimeException(e.toString());
    }
View Full Code Here

                // of statement will change
                StringBuffer sb = new StringBuffer(statement);
                for (int i = suggestions.length - 1; i >= 0; i--) {
                    Token t = (Token) tokens.get(i);
                    // only replace if word actually changed
                    if (!t.termText().equalsIgnoreCase(suggestions[i])) {
                        sb.replace(t.startOffset(), t.endOffset(), suggestions[i]);
                    }
                }
                return sb.toString();
            } else {
View Full Code Here

            try {
                Token t;
                while ((t = ts.next()) != null) {
                    String origWord = statement.substring(t.startOffset(), t.endOffset());
                    if (t.getPositionIncrement() > 0) {
                        words.add(t.termText());
                        tokens.add(t);
                    } else {
                        // very simple implementation: use termText with length
                        // closer to original word
                        Token current = (Token) tokens.get(tokens.size() - 1);
View Full Code Here

                        tokens.add(t);
                    } else {
                        // very simple implementation: use termText with length
                        // closer to original word
                        Token current = (Token) tokens.get(tokens.size() - 1);
                        if (Math.abs(origWord.length() - current.termText().length()) > Math.abs(origWord.length()
                                - t.termText().length())) {
                            // replace current token and word
                            words.set(words.size() - 1, t.termText());
                            tokens.set(tokens.size() - 1, t);
                        }
View Full Code Here

        while ((j < endToken) && (j - startToken < SUM_LENGTH)) {
          //
          // Now grab the hit-element, if present
          //
          Token t = tokens[j];
          if (highlight.contains(t.termText())) {
            excerpt.addToken(t.termText());
            excerpt.add(new Fragment(text.substring(offset, t.startOffset())));
            excerpt.add(new Highlight(text.substring(t.startOffset(),t.endOffset())));
            offset = t.endOffset();
            endToken = Math.min(j+SUM_CONTEXT, tokens.length);
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.