Package org.apache.lucene.search.highlight

Examples of org.apache.lucene.search.highlight.Highlighter


        String content=doc.get("content");
       
        Scorer qs=new QueryScorer(q);
       
        SimpleHTMLFormatter formatter=new SimpleHTMLFormatter("<span class=\"hl\">","</span>");
        Highlighter hl=new Highlighter(formatter,qs);
        String[] fragments=hl.getBestFragments(analyzer, "content",content, 1);
       
        Map<String,String[]> fields=convert(doc);
        fields.put("fragment",fragments);
        hit.setFields(fields);
        hitList.add(hit);
View Full Code Here


        String content=doc.get("content");
       
        Scorer qs=new QueryScorer(q);
       
        SimpleHTMLFormatter formatter=new SimpleHTMLFormatter("<span class=\"hl\">","</span>");
        Highlighter hl=new Highlighter(formatter,qs);
        String[] fragments=hl.getBestFragments(analyzer, "content",content, 1);
       
        Map<String,String[]> fields=convert(doc);
        fields.put("fragment",fragments);
        hit.setFields(fields);
        hitList.add(hit);
View Full Code Here

  {
   if (queryString.length() > 0)
   { Query query = qp.parse(queryString.toString());
     QueryScorer qScorer = new QueryScorer(query);
     SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"hlight\">", "</span>");
     Highlighter highlighter = new Highlighter(formatter, qScorer);
     Fragmenter fragmenter = new SimpleFragmenter(80);        //*-- use fragments of 50 bytes each
     highlighter.setTextFragmenter(fragmenter);
     TokenStream tokenStream = sAnalyzer.tokenStream("contents", new StringReader(text));
     result = highlighter.getBestFragments(tokenStream, text, 3, "...<br>")//*-- collect upto three fragments
   }
  }
  catch (ParseException pe) { logger.error("Query parse error " + pe.getMessage() ); }

  //*-- if no tokens were extracted, then return the original string
View Full Code Here

     *     Then call getSnippet(...) to get the resulting snippet
     */   
    private void addSnippets (GroupedSearchResults res, String snippetOfField,
            int snippetLength, QueryScorer scorer, Formatter simpleHtmlFormatter) throws IOException {      

        Highlighter highlighter = new Highlighter(simpleHtmlFormatter, scorer);
        highlighter.setTextFragmenter(NULL_FRAGMENTER);
        highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); // make sure the whole text will be analyzed
        // Here we store every seen phrase. It is used to give less score to
        // recurrying phrases
        Set<String> usedSnippets= new HashSet<String>();

        for (int j = 0; j < res.groups() ; j++) {  // for each group
            Vector<Document> resDocs = res.getGroup(j).last();
            int docsLen= resDocs.size();
            for (int i = 0; i < docsLen; i++) { // for each document on that group              
                Document doc = resDocs.get(i); // get the document i
                String text = doc.get(snippetOfField)// text to be snippeted
                if (null == text){
                    logger.warn("Asked to snippet an unexisting field: " + snippetOfField );
                    continue;
                }

                TokenStream tokenStream = queryParser.tokenStream(snippetOfField, new StringReader(text));              
                TextFragment[] fragments = highlighter.getBestTextFragments(tokenStream, text, false, 1);

                String result= null;
                if ( null != fragments  && 0 < fragments.length) {
                    result= getSnippet(fragments[0].toString(), snippetLength, scorer, usedSnippets);
                }
View Full Code Here

                new CachingTokenFilter(new DomainSearchAnalyzer().tokenStream(field, new StringReader(text)));

      QueryScorer scorer = new QueryScorer(query, field, "");
      scorer.init(tokenStream);
     
            Highlighter highlighter =
                new Highlighter(new DomainSearchFormatter(), scorer);
            highlighter.setTextFragmenter(fragmenter);
            tokenStream.reset();

            try {
                return highlighter.getBestFragments(tokenStream, text, 2, " ... ");

            } catch (InvalidTokenOffsetsException e) {
                // could not create fragments, return empty string
            }
View Full Code Here

        if (shouldNumberBeHighlighted(name, numericValue, fieldFixedQuery)) {
          String numberHighlight = preTag + text + postTag;
          result.add(new StringField(name, numberHighlight, Store.YES));
        }
      } else {
        Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(fieldFixedQuery, name));
        TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, docId, name, analyzer);
        TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);
        for (int j = 0; j < frag.length; j++) {
          if ((frag[j] != null) && (frag[j].getScore() > 0)) {
            result.add(new StringField(name, frag[j].toString(), Store.YES));
          }
        }
View Full Code Here

    public Collection findPages( String query, int flags )
        throws ProviderException
    {
        IndexSearcher  searcher = null;
        ArrayList<SearchResult> list = null;
        Highlighter highlighter = null;

        try
        {
            String[] queryfields = { LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS };
            QueryParser qp = new MultiFieldQueryParser( Version.LUCENE_46, queryfields, getLuceneAnalyzer() );

            //QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() );
            Query luceneQuery = qp.parse( query );

            if( (flags & FLAG_CONTEXTS) != 0 )
            {
                highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"),
                                              new SimpleHTMLEncoder(),
                                              new QueryScorer(luceneQuery));
            }

            try
            {
                File dir = new File(m_luceneDirectory);
                Directory luceneDir = new SimpleFSDirectory(dir, null);
                IndexReader reader = DirectoryReader.open(luceneDir);
                searcher = new IndexSearcher(reader);
            }
            catch( Exception ex )
            {
                log.info("Lucene not yet ready; indexing not started",ex);
                return null;
            }

            ScoreDoc[] hits = searcher.search(luceneQuery, MAX_SEARCH_HITS).scoreDocs;

            list = new ArrayList<SearchResult>(hits.length);
            for ( int curr = 0; curr < hits.length; curr++ )
            {
                int docID = hits[curr].doc;
                Document doc = searcher.doc( docID );
                String pageName = doc.get(LUCENE_ID);
                WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION);

                if(page != null)
                {
                    if(page instanceof Attachment)
                    {
                        // Currently attachments don't look nice on the search-results page
                        // When the search-results are cleaned up this can be enabled again.
                    }

                    int score = (int)(hits[curr].score * 100);


                    // Get highlighted search contexts
                    String text = doc.get(LUCENE_PAGE_CONTENTS);

                    String[] fragments = new String[0];
                    if( text != null && highlighter != null )
                    {
                        TokenStream tokenStream = getLuceneAnalyzer()
                        .tokenStream(LUCENE_PAGE_CONTENTS, new StringReader(text));
                        fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS);

                    }

                    SearchResult result = new SearchResultImpl( page, score, fragments );    
                    list.add(result);
View Full Code Here

        String prefixPlaceholder = "$HIGHLIGHT_PREFIX$";
        String suffixPlaceholder = "$HIGHLIGHT_SUFFIX$";       
        SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(prefixPlaceholder, suffixPlaceholder);

        // create highlighter
        Highlighter highlighter = getwgacore().getLuceneManager().createHighlighter(name, query, formatter);
       
        // retrieve itemtext
        String originalText = itemTextValue(name, encode);       
        if (originalText == null) {
            return null;
        }
       
        // create text to analyze
        LuceneConfiguration config = getwgacore().getLuceneManager().retrieveLuceneConfig(content().getDatabase().getDbReference());
        LuceneIndexItemRule rule = config.getMatchingItemRule(name);
        String analyzeText = rule.parseItemValue(originalText);

        // create tokenstream
        TokenStream tokenStream = getwgacore().getLuceneManager().createTokenStream(analyzeText, content());
       
        // create fragmenter and set fragmentsize to itemText.length to ensure only one fragments with the whole itemText is returned       
        Fragmenter fragmenter = new SimpleFragmenter(originalText.length() + 1); // if analyzeText.length == originalText.length we might get two fragments from lucene without the +1 (possible lucene bug)
        highlighter.setTextFragmenter(fragmenter);
               
        try {
            String highlighted = highlighter.getBestFragment(tokenStream, originalText.toString());
            if (highlighted != null) {
              // replace highlight placeholders with correct prefix and suffix
              highlighted = WGUtils.strReplace(highlighted, prefixPlaceholder, prefix, true);
              highlighted = WGUtils.strReplace(highlighted, suffixPlaceholder, suffix, true);
View Full Code Here

        String prefixPlaceholder = "$HIGHLIGHT_PREFIX$";
        String suffixPlaceholder = "$HIGHLIGHT_SUFFIX$";
        SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(prefixPlaceholder, suffixPlaceholder);

        // create highlighter
        Highlighter highlighter = getwgacore().getLuceneManager().createHighlighter(name.toUpperCase(), query, formatter);
       
        // retrieve metatext
        String originalText = metaTextValue(name, encode);       
        if (originalText == null) {
            return metalist(name);
        }
       
        // create tokenstream
        TokenStream tokenStream = getwgacore().getLuceneManager().createTokenStream(originalText, content());
       
        // create fragmenter and set fragmentsize to metaText.length to ensure only one fragments with the whole metaText is returned       
        Fragmenter fragmenter = new SimpleFragmenter(originalText.length() + 1); // +1 is necessary here
        highlighter.setTextFragmenter(fragmenter);
               
        try {
            String highlighted = highlighter.getBestFragment(tokenStream, originalText);
            if (highlighted != null) {

                // replace highlight placeholders with correct prefix and suffix
              highlighted = WGUtils.strReplace(highlighted, prefixPlaceholder, prefix, true);
              highlighted = WGUtils.strReplace(highlighted, suffixPlaceholder, suffix, true);
View Full Code Here

        String prefixPlaceholder = "$HIGHLIGHT_PREFIX$";
        String suffixPlaceholder = "$HIGHLIGHT_SUFFIX$";
        SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(prefixPlaceholder, suffixPlaceholder);

        // create highlighter
        Highlighter highlighter = getwgacore().getLuceneManager().createHighlighter(itemname, query, formatter);
       
        // retrieve itemtext
        String text = itemTextValue(itemname, encode);       
        if (text == null) {
            return Collections.EMPTY_LIST;
        }
       
        // remove html/xml from text
        // fragments should not contain html/xml bc. of design issues
        try {
            text = WGUtils.toPlainText(text, " ", false);
            // B000049EA
            // if the item value contains encoded html entities these entities has been converted to their characters
            // we should do an html encode for sure
            // text = WGUtils.encodeHTML(text); --> has side effects @see B00004BBE
        }
        catch (IOException e) {
            addwarning("Unable to highlight item '" + itemname + "' bc. of exception '" + e.getMessage() + "'.");
            return Collections.EMPTY_LIST;
        }
       
        // create tokenstream
        TokenStream tokenStream = getwgacore().getLuceneManager().createTokenStream(text, content());
               
        // create fragmenter
        Fragmenter fragmenter = new SimpleFragmenter(fragmentSize);
        highlighter.setTextFragmenter(fragmenter);
       
        try {
            String[] highlighted = highlighter.getBestFragments(tokenStream, text, maxFragments);           
            if (highlighted != null) {
                ArrayList list = new ArrayList();
                for (int i=0; i < highlighted.length; i++) {
                  // B00004BBE
                  // evtl. encode fragment
View Full Code Here

TOP

Related Classes of org.apache.lucene.search.highlight.Highlighter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.