Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.TokenStream


      if (mHighlighter != null)
      {
        try
        {
          TokenStream tokenStream = new StandardAnalyzer().tokenStream("message", new StringReader(encoded_message));
          String highlighted = mHighlighter.getBestFragments(tokenStream, encoded_message, 25, "...");

          if (!highlighted.equals(""))
          {
            encoded_message = highlighted;
View Full Code Here


          String encoded_message = encodeHtml(message_buf.toString());

          if (highlighter != null)
          {
            TokenStream tokenStream = new StandardAnalyzer().tokenStream("message", new StringReader(encoded_message));
            String highlighted = highlighter.getBestFragments(tokenStream, encoded_message, 25, "...");

            if (!highlighted.equals(""))
            {
              encoded_message = highlighted;
View Full Code Here

          StringWriter writer = new java.io.StringWriter();
          pipe(reader, writer);
          String asString = writer.toString();

          // Analyze the call
          TokenStream stream = nestedAnalyzer.tokenStream(fieldName,
                  new StringReader(asString));
          TermAttribute termAtt = (TermAttribute) stream.addAttribute(TermAttribute.class);

          System.out.println("Tokens for '" + asString + "':");
          while (stream.incrementToken()) {
            System.out.println(" '" + termAtt.term() + "'");
          }
          stream.reset();
          return stream;
          // Do the call a second time and return the result this time
          // Old behaviour
          // return nestedAnalyzer.tokenStream(fieldName, new StringReader(asString));
        } catch (IOException exc) {
View Full Code Here

        String resHighlSummary = null;
        // Remove 'html', this works the same way as PageResponse.printNoHTML()
        text = RegainToolkit.replace(text, "<", "&lt;");
        text = RegainToolkit.replace(text, ">", "&gt;");

        TokenStream tokenStream = mAnalyzer.tokenStream("content",
                new StringReader(text));
        // Get 3 best fragments and seperate with a " ... "
        resHighlSummary = highlighter.getBestFragments(tokenStream, text, 3, " ... ");

        if (resHighlSummary != null) {
          //System.out.println("Highlighted summary: " + resHighlSummary);
          // write the result back to the document in a new field
          document.add(new Field("highlightedSummary", resHighlSummary, Field.Store.NO, Field.Index.NOT_ANALYZED));
          document.add(new Field("highlightedSummary", CompressionTools.compressString(resHighlSummary), Field.Store.YES));
        }
      }
      // Highlight the title
      text = document.get("title");
      String resHighlTitle = null;
      if (text != null) {
        TokenStream tokenStream = mAnalyzer.tokenStream("content",
                new StringReader(text));
        // Get the best fragment
        resHighlTitle = highlighter.getBestFragment(tokenStream, text);
      }
View Full Code Here

  /**
   * Use the passed analyzer to get a list of tokens from the text
   */
  private static Token[] tokensFromAnalysis(Analyzer analyzer, String text) throws IOException
  {
    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    ArrayList<Token> tokenList = new ArrayList<Token>(); Token token = null;
    while ( (token = stream.next()) != null) tokenList.add(token);
    Token[] tokens = new Token[tokenList.size()];
    for (int i = 0; i < tokens.length; i++) tokens[i] = tokenList.get(i);
    return (tokens);
    }
View Full Code Here

  synBuffer.append( (adjectives.size() > 0) ? wnetTools.getSynonyms(adjectives.get(0), "a"):"");
  String[] synonyms = synBuffer.toString().trim().split(" ");
*/
  //*-- tokenize the question
  StandardBgramAnalyzer analyzer = new StandardBgramAnalyzer(); analyzer.setExtractEntities(true);
  TokenStream stream = analyzer.tokenStream("contents", new StringReader(question));
  ArrayList<Token> tokenList = new ArrayList<Token>(); Token token = null;
  entities = new ArrayList<String>();    //*-- list of entities in the question
  while ( (token = stream.next()) != null)
   { tokenList.add(token); if (token.type().equals("<ENTITY>")) entities.add(token.termText()); }
  //*-------------------------------------------------------------------
  //*-- build the query with the five components
  //*--
 
View Full Code Here

   { int addChars = sentExtractor.getMaxCharsPerSentence() - sentence.length();
     int beginIndex = (prevSentence.length() > addChars) ? prevSentence.length() - addChars: 0;
     sentence = prevSentence.substring(beginIndex, prevSentence.length()) + sentence;
   }
   //*-- build a list of tokens from the sentence
   TokenStream stream = analyzer.tokenStream("contents", new StringReader(sentence));
   ArrayList<Token> tokenList = new ArrayList<Token>(); Token token = null;
   while ( (token = stream.next()) != null) tokenList.add(token);
  
   //*-- initialize the boolean arrays and scores
   boolean[] foundWords = new boolean[tokenList.size()];
   for (int i = 0; i < nouns.length; i++) foundNouns[i] = false;
   for (int i = 0; i < verbs.length; i++) foundVerbs[i] = false;
View Full Code Here

/**
  * Use the passed analyzer to get a list of tokens from the text
  */
private static Token[] tokensFromAnalysis(Analyzer analyzer, String text) throws IOException
{
   TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
   ArrayList<Token> tokenList = new ArrayList<Token>(); Token token = null;
   while ( (token = stream.next()) != null) tokenList.add(token);
   Token[] tokens = new Token[tokenList.size()];
   for (int i = 0; i < tokens.length; i++) tokens[i] = tokenList.get(i);
   return (tokens);
   }
View Full Code Here

    /**
     * Use the passed analyzer to get a list of tokens from the text
     */
    private static Token[] tokensFromAnalysis(Analyzer analyzer, String text
    {
      TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
      ArrayList<Token> tokenList = new ArrayList<Token>(); Token token = null;
      try { while ( (token = stream.next()) != null) tokenList.add(token); }
      catch (IOException ie) { System.err.println("Tokenizer problem: " + ie.getMessage()); }
      Token[] tokens = new Token[tokenList.size()];
      for (int i = 0; i < tokens.length; i++) tokens[i] = tokenList.get(i);
      return (tokens);
    }
View Full Code Here

public StandardBgramAnalyzer() { }
public TokenStream tokenStream (String fieldName, Reader reader)
{
   TokenStream ts = (extractEntities) ?
     new EntFilter ( new BgramFilter( new LowerCaseFilter( new StandardFilter( new StandardTokenizer(reader) ) ) ) ):
                     new BgramFilter( new LowerCaseFilter( new StandardFilter( new StandardTokenizer(reader) ) ) );
   return (ts);
}
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.TokenStream

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.