Package com.gentics.cr.lucene.search.highlight

Source Code of com.gentics.cr.lucene.search.highlight.PhraseBolder

package com.gentics.cr.lucene.search.highlight;

import java.io.IOException;
import java.io.StringReader;

import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.highlight.TokenGroup;

import com.gentics.cr.configuration.GenericConfiguration;
import com.gentics.cr.lucene.indexer.index.LuceneAnalyzerFactory;
import com.gentics.cr.monitoring.MonitorFactory;
import com.gentics.cr.monitoring.UseCase;

/**
* PhraseBolder.
* Last changed: $Date: 2009-06-26 15:48:16 +0200 (Fr, 26 Jun 2009) $
* @version $Revision: 105 $
* @author $Author: supnig@constantinopel.at $
*
*/
public class PhraseBolder extends ContentHighlighter implements Formatter {
  /**
    * Log4j logger for error and debug messages.
    */
  private static final Logger LOGGER = Logger.getLogger(PhraseBolder.class);

  /**
   * Default max fragments.
   */
  private static final int DEFAULT_MAX_FRAGMENTS = 3;
  /**
   * Default fragment size.
   */
  private static final int DEFAULT_FRAGMENT_SIZE = 100;
  /**
   * used analyzer.
   */
  private Analyzer analyzer = null;

  /**
   * Create new Instance of PhraseBolder.
   * @param config configuration
   */
  public PhraseBolder(final GenericConfiguration config) {
    super(config);
    analyzer = LuceneAnalyzerFactory.createAnalyzer(config);
  }

  /**
   * Highlights Terms by enclosing them with <b>term</b>.
   * @param originalTermText
   * @param tokenGroup
   * @return highlightedterm
   */
  public final String highlightTerm(final String originalTermText, final TokenGroup tokenGroup) {
    UseCase uc = MonitorFactory.startUseCase("Highlight.PhraseBolder.highlightTerm()");
    if (tokenGroup.getTotalScore() <= 0) {
      uc.stop();
      return originalTermText;
    }
    uc.stop();
    return getHighlightPrefix() + originalTermText + getHighlightPostfix();

  }

  /**
   * @param attribute
   * @param parsedQuery
   * @return highlighted text
   *
   */
  public final String highlight(final String attribute, final Query parsedQuery) {
    UseCase uc = MonitorFactory.startUseCase("Highlight.PhraseBolder.highlight()");
    String result = "";
    if (attribute != null && parsedQuery != null) {
      Highlighter highlighter = new Highlighter(this, new QueryScorer(parsedQuery));
      highlighter.setTextFragmenter(new WordCountFragmenter(getFragmentSize()));

      TokenStream tokenStream = analyzer.tokenStream(this.getHighlightAttribute(), new StringReader(attribute));
      try {
        UseCase ucFragments = MonitorFactory.startUseCase("Highlight.PhraseBolder.highlight()#getFragments");
        TextFragment[] frags = highlighter
            .getBestTextFragments(tokenStream, attribute, true, getMaxFragments());
        ucFragments.stop();
        boolean first = true;
        int startPosition = -1;
        int endPosition = -1;
        for (TextFragment frag : frags) {
          String fragment = frag.toString();
          fragment = fragment.replaceAll(REMOVE_TEXT_FROM_FRAGMENT_REGEX, "");
          startPosition = attribute.indexOf(fragment);
          endPosition = startPosition + fragment.length();
          if (!first || (addSeperatorArroundAllFragments() && startPosition != 0)) {
            result += getFragmentSeperator();
          }
          result += fragment;
        }
        if (addSeperatorArroundAllFragments() && endPosition != attribute.length() && result.length() != 0) {
          result += getFragmentSeperator();
        }
      } catch (IOException e) {
        LOGGER.error("Error getting fragments from highlighter.", e);
      } catch (InvalidTokenOffsetsException e) {
        LOGGER.error("Error getting fragments from highlighter.", e);
      }
    }
    uc.stop();
    return result;
  }

  @Override
  protected final int getDefaultFragmentSize() {
    return DEFAULT_FRAGMENT_SIZE;
  }

  @Override
  protected final int getDefaultMaxFragments() {
    return DEFAULT_MAX_FRAGMENTS;
  }

}
TOP

Related Classes of com.gentics.cr.lucene.search.highlight.PhraseBolder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.