SolrParams params = req.getParams();
if (!isHighlightingEnabled(params))
return null;
SolrIndexSearcher searcher = req.getSearcher();
IndexSchema schema = searcher.getSchema();
NamedList fragments = new SimpleOrderedMap();
String[] fieldNames = getHighlightFields(query, req, defaultFields);
Set<String> fset = new HashSet<String>();
{
// pre-fetch documents using the Searcher's doc cache
for(String f : fieldNames) { fset.add(f); }
// fetch unique key if one exists.
SchemaField keyField = schema.getUniqueKeyField();
if(null != keyField)
fset.add(keyField.getName());
}
// Highlight each document
DocIterator iterator = docs.iterator();
for (int i = 0; i < docs.size(); i++) {
int docId = iterator.nextDoc();
Document doc = searcher.doc(docId, fset);
NamedList docSummaries = new SimpleOrderedMap();
for (String fieldName : fieldNames) {
fieldName = fieldName.trim();
String[] docTexts = doc.getValues(fieldName);
if (docTexts == null) continue;
TokenStream tstream = null;
int numFragments = getMaxSnippets(fieldName, params);
boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);
String[] summaries = null;
List<TextFragment> frags = new ArrayList<TextFragment>();
TermOffsetsTokenStream tots = null;
for (int j = 0; j < docTexts.length; j++) {
// create TokenStream
try {
// attempt term vectors
if( tots == null ) {
TokenStream tvStream = TokenSources.getTokenStream(searcher.getReader(), docId, fieldName);
if (tvStream != null) {
tots = new TermOffsetsTokenStream(tvStream);
tstream = tots.getMultiValuedTokenStream( docTexts[j].length() );
} else {
// fall back to analyzer
tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
}
}
}
catch (IllegalArgumentException e) {
// fall back to analyzer
tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
}
Highlighter highlighter;
if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) {
// TODO: this is not always necessary - eventually we would like to avoid this wrap
// when it is not needed.
tstream = new CachingTokenFilter(tstream);
// get highlighter
highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tstream);
// after highlighter initialization, reset tstream since construction of highlighter already used it
tstream.reset();
}
else {
// use "the old way"
highlighter = getHighlighter(query, fieldName, req);
}
int maxCharsToAnalyze = params.getFieldInt(fieldName,
HighlightParams.MAX_CHARS,
Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);
if (maxCharsToAnalyze < 0) {
highlighter.setMaxDocCharsToAnalyze(docTexts[j].length());
} else {
highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
}
try {
TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tstream, docTexts[j], mergeContiguousFragments, numFragments);
for (int k = 0; k < bestTextFragments.length; k++) {
if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) {
frags.add(bestTextFragments[k]);
}
}
} catch (InvalidTokenOffsetsException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
}
}
// sort such that the fragments with the highest score come first
Collections.sort(frags, new Comparator<TextFragment>() {
public int compare(TextFragment arg0, TextFragment arg1) {
return Math.round(arg1.getScore() - arg0.getScore());
}
});
// convert fragments back into text
// TODO: we can include score and position information in output as snippet attributes
if (frags.size() > 0) {
ArrayList<String> fragTexts = new ArrayList<String>();
for (TextFragment fragment: frags) {
if ((fragment != null) && (fragment.getScore() > 0)) {
fragTexts.add(fragment.toString());
}
if (fragTexts.size() >= numFragments) break;
}
summaries = fragTexts.toArray(new String[0]);
if (summaries.length > 0)
docSummaries.add(fieldName, summaries);
}
// no summeries made, copy text from alternate field
if (summaries == null || summaries.length == 0) {
String alternateField = req.getParams().getFieldParam(fieldName, HighlightParams.ALTERNATE_FIELD);
if (alternateField != null && alternateField.length() > 0) {
String[] altTexts = doc.getValues(alternateField);
if (altTexts != null && altTexts.length > 0){
int alternateFieldLen = req.getParams().getFieldInt(fieldName, HighlightParams.ALTERNATE_FIELD_LENGTH,0);
if( alternateFieldLen <= 0 ){
docSummaries.add(fieldName, altTexts);
}
else{
List<String> altList = new ArrayList<String>();
int len = 0;
for( String altText: altTexts ){
altList.add( len + altText.length() > alternateFieldLen ?
new String(altText.substring( 0, alternateFieldLen - len )) : altText );
len += altText.length();
if( len >= alternateFieldLen ) break;
}
docSummaries.add(fieldName, altList);
}
}
}
}
}
String printId = schema.printableUniqueKey(doc);
fragments.add(printId == null ? null : printId, docSummaries);
}
return fragments;
}