/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.highlight;
import com.google.common.collect.Maps;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.text.StringText;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.internal.AnalyzerMapper;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
/**
*
*/
public class PlainHighlighter implements Highlighter {
private static final String CACHE_KEY = "highlight-plain";
@Override
public String[] names() {
return new String[] { "plain", "highlighter" };
}
public HighlightField highlight(HighlighterContext highlighterContext) {
SearchContextHighlight.Field field = highlighterContext.field;
SearchContext context = highlighterContext.context;
FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
FieldMapper<?> mapper = highlighterContext.mapper;
Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
if (!hitContext.cache().containsKey(CACHE_KEY)) {
Map<FieldMapper<?>, org.apache.lucene.search.highlight.Highlighter> mappers = Maps.newHashMap();
hitContext.cache().put(CACHE_KEY, mappers);
}
@SuppressWarnings("unchecked")
Map<FieldMapper<?>, org.apache.lucene.search.highlight.Highlighter> cache = (Map<FieldMapper<?>, org.apache.lucene.search.highlight.Highlighter>) hitContext.cache().get(CACHE_KEY);
org.apache.lucene.search.highlight.Highlighter entry = cache.get(mapper);
if (entry == null) {
Query query = highlighterContext.query.originalQuery();
QueryScorer queryScorer = new CustomQueryScorer(query, field.fieldOptions().requireFieldMatch() ? mapper.names().indexName() : null);
queryScorer.setExpandMultiTermQuery(true);
Fragmenter fragmenter;
if (field.fieldOptions().numberOfFragments() == 0) {
fragmenter = new NullFragmenter();
} else if (field.fieldOptions().fragmenter() == null) {
fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize());
} else if ("simple".equals(field.fieldOptions().fragmenter())) {
fragmenter = new SimpleFragmenter(field.fieldOptions().fragmentCharSize());
} else if ("span".equals(field.fieldOptions().fragmenter())) {
fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize());
} else {
throw new ElasticsearchIllegalArgumentException("unknown fragmenter option [" + field.fieldOptions().fragmenter() + "] for the field [" + highlighterContext.fieldName + "]");
}
Formatter formatter = new SimpleHTMLFormatter(field.fieldOptions().preTags()[0], field.fieldOptions().postTags()[0]);
entry = new org.apache.lucene.search.highlight.Highlighter(formatter, encoder, queryScorer);
entry.setTextFragmenter(fragmenter);
// always highlight across all data
entry.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);
cache.put(mapper, entry);
}
// a HACK to make highlighter do highlighting, even though its using the single frag list builder
int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? 1 : field.fieldOptions().numberOfFragments();
ArrayList<TextFragment> fragsList = new ArrayList<>();
List<Object> textsToHighlight;
AnalyzerMapper analyzerMapper = context.mapperService().documentMapper(hitContext.hit().type()).analyzerMapper();
Analyzer analyzer = analyzerMapper.setAnalyzer(highlighterContext);
try {
textsToHighlight = HighlightUtils.loadFieldValues(field, mapper, context, hitContext);
for (Object textToHighlight : textsToHighlight) {
String text = textToHighlight.toString();
TokenStream tokenStream = analyzer.tokenStream(mapper.names().indexName(), text);
if (!tokenStream.hasAttribute(CharTermAttribute.class) || !tokenStream.hasAttribute(OffsetAttribute.class)) {
// can't perform highlighting if the stream has no terms (binary token stream) or no offsets
continue;
}
TextFragment[] bestTextFragments = entry.getBestTextFragments(tokenStream, text, false, numberOfFragments);
for (TextFragment bestTextFragment : bestTextFragments) {
if (bestTextFragment != null && bestTextFragment.getScore() > 0) {
fragsList.add(bestTextFragment);
}
}
}
} catch (Exception e) {
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
}
if (field.fieldOptions().scoreOrdered()) {
CollectionUtil.introSort(fragsList, new Comparator<TextFragment>() {
public int compare(TextFragment o1, TextFragment o2) {
return Math.round(o2.getScore() - o1.getScore());
}
});
}
String[] fragments;
// number_of_fragments is set to 0 but we have a multivalued field
if (field.fieldOptions().numberOfFragments() == 0 && textsToHighlight.size() > 1 && fragsList.size() > 0) {
fragments = new String[fragsList.size()];
for (int i = 0; i < fragsList.size(); i++) {
fragments[i] = fragsList.get(i).toString();
}
} else {
// refine numberOfFragments if needed
numberOfFragments = fragsList.size() < numberOfFragments ? fragsList.size() : numberOfFragments;
fragments = new String[numberOfFragments];
for (int i = 0; i < fragments.length; i++) {
fragments[i] = fragsList.get(i).toString();
}
}
if (fragments.length > 0) {
return new HighlightField(highlighterContext.fieldName, StringText.convertFromStringArray(fragments));
}
int noMatchSize = highlighterContext.field.fieldOptions().noMatchSize();
if (noMatchSize > 0 && textsToHighlight.size() > 0) {
// Pull an excerpt from the beginning of the string but make sure to split the string on a term boundary.
String fieldContents = textsToHighlight.get(0).toString();
int end;
try {
end = findGoodEndForNoHighlightExcerpt(noMatchSize, analyzer.tokenStream(mapper.names().indexName(), fieldContents));
} catch (Exception e) {
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
}
if (end > 0) {
return new HighlightField(highlighterContext.fieldName, new Text[] { new StringText(fieldContents.substring(0, end)) });
}
}
return null;
}
private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, TokenStream tokenStream) throws IOException {
try {
if (!tokenStream.hasAttribute(OffsetAttribute.class)) {
// Can't split on term boundaries without offsets
return -1;
}
int end = -1;
tokenStream.reset();
while (tokenStream.incrementToken()) {
OffsetAttribute attr = tokenStream.getAttribute(OffsetAttribute.class);
if (attr.endOffset() >= noMatchSize) {
// Jump to the end of this token if it wouldn't put us past the boundary
if (attr.endOffset() == noMatchSize) {
end = noMatchSize;
}
return end;
}
end = attr.endOffset();
}
// We've exhausted the token stream so we should just highlight everything.
return end;
} finally {
tokenStream.end();
tokenStream.close();
}
}
}