Package org.elasticsearch.search.highlight

Source Code of org.elasticsearch.search.highlight.PostingsHighlighter$MapperHighlighterEntry

/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.highlight;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoringRewrite;
import org.apache.lucene.search.TopTermsRewrite;
import org.apache.lucene.search.highlight.Encoder;
import org.apache.lucene.search.postingshighlight.CustomPassageFormatter;
import org.apache.lucene.search.postingshighlight.CustomPostingsHighlighter;
import org.apache.lucene.search.postingshighlight.Snippet;
import org.apache.lucene.search.postingshighlight.WholeBreakIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.text.StringText;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.internal.SearchContext;

import java.io.IOException;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;

public class PostingsHighlighter implements Highlighter {

    private static final String CACHE_KEY = "highlight-postings";

    @Override
    public String[] names() {
        return new String[]{"postings", "postings-highlighter"};
    }

    @Override
    public HighlightField highlight(HighlighterContext highlighterContext) {

        FieldMapper<?> fieldMapper = highlighterContext.mapper;
        SearchContextHighlight.Field field = highlighterContext.field;
        if (fieldMapper.fieldType().indexOptions() != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
            throw new ElasticsearchIllegalArgumentException("the field [" + highlighterContext.fieldName + "] should be indexed with positions and offsets in the postings list to be used with postings highlighter");
        }

        SearchContext context = highlighterContext.context;
        FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;

        if (!hitContext.cache().containsKey(CACHE_KEY)) {
            //get the non rewritten query and rewrite it
            Query query;
            try {
                query = rewrite(highlighterContext, hitContext.topLevelReader());
            } catch (IOException e) {
                throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
            }
            SortedSet<Term> queryTerms = extractTerms(query);
            hitContext.cache().put(CACHE_KEY, new HighlighterEntry(queryTerms));
        }

        HighlighterEntry highlighterEntry = (HighlighterEntry) hitContext.cache().get(CACHE_KEY);
        MapperHighlighterEntry mapperHighlighterEntry = highlighterEntry.mappers.get(fieldMapper);

        if (mapperHighlighterEntry == null) {
            Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
            CustomPassageFormatter passageFormatter = new CustomPassageFormatter(field.fieldOptions().preTags()[0], field.fieldOptions().postTags()[0], encoder);
            BytesRef[] filteredQueryTerms = filterTerms(highlighterEntry.queryTerms, fieldMapper.names().indexName(), field.fieldOptions().requireFieldMatch());
            mapperHighlighterEntry = new MapperHighlighterEntry(passageFormatter, filteredQueryTerms);
        }

        //we merge back multiple values into a single value using the paragraph separator, unless we have to highlight every single value separately (number_of_fragments=0).
        boolean mergeValues = field.fieldOptions().numberOfFragments() != 0;
        List<Snippet> snippets = new ArrayList<>();
        int numberOfFragments;

        try {
            //we manually load the field values (from source if needed)
            List<Object> textsToHighlight = HighlightUtils.loadFieldValues(field, fieldMapper, context, hitContext);
            CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(mapperHighlighterEntry.passageFormatter, textsToHighlight, mergeValues, Integer.MAX_VALUE-1, field.fieldOptions().noMatchSize());

             if (field.fieldOptions().numberOfFragments() == 0) {
                highlighter.setBreakIterator(new WholeBreakIterator());
                numberOfFragments = 1; //1 per value since we highlight per value
            } else {
                numberOfFragments = field.fieldOptions().numberOfFragments();
            }

            //we highlight every value separately calling the highlight method multiple times, only if we need to have back a snippet per value (whole value)
            int values = mergeValues ? 1 : textsToHighlight.size();
            for (int i = 0; i < values; i++) {
                Snippet[] fieldSnippets = highlighter.highlightDoc(fieldMapper.names().indexName(), mapperHighlighterEntry.filteredQueryTerms, hitContext.searcher(), hitContext.docId(), numberOfFragments);
                if (fieldSnippets != null) {
                    for (Snippet fieldSnippet : fieldSnippets) {
                        if (Strings.hasText(fieldSnippet.getText())) {
                            snippets.add(fieldSnippet);
                        }
                    }
                }
            }

        } catch(IOException e) {
            throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
        }

        snippets = filterSnippets(snippets, field.fieldOptions().numberOfFragments());

        if (field.fieldOptions().scoreOrdered()) {
            //let's sort the snippets by score if needed
            CollectionUtil.introSort(snippets, new Comparator<Snippet>() {
                public int compare(Snippet o1, Snippet o2) {
                    return (int) Math.signum(o2.getScore() - o1.getScore());
                }
            });
        }

        String[] fragments = new String[snippets.size()];
        for (int i = 0; i < fragments.length; i++) {
            fragments[i] = snippets.get(i).getText();
        }

        if (fragments.length > 0) {
            return new HighlightField(highlighterContext.fieldName, StringText.convertFromStringArray(fragments));
        }

        return null;
    }

    private static Query rewrite(HighlighterContext highlighterContext, IndexReader reader) throws IOException {

        Query original = highlighterContext.query.originalQuery();

        //we walk the query tree and when we encounter multi term queries we need to make sure the rewrite method
        //supports multi term extraction. If not we temporarily override it (and restore it after the rewrite).
        List<Tuple<MultiTermQuery, MultiTermQuery.RewriteMethod>> modifiedMultiTermQueries = Lists.newArrayList();
        overrideMultiTermRewriteMethod(original, modifiedMultiTermQueries);

        //rewrite is expensive: if the query was already rewritten we try not to rewrite it again
        if (highlighterContext.query.queryRewritten() && modifiedMultiTermQueries.size() == 0) {
            //return the already rewritten query
            return highlighterContext.query.query();
        }

        Query query = original;
        for (Query rewrittenQuery = query.rewrite(reader); rewrittenQuery != query;
             rewrittenQuery = query.rewrite(reader)) {
            query = rewrittenQuery;
        }

        //set back the original rewrite method after the rewrite is done
        for (Tuple<MultiTermQuery, MultiTermQuery.RewriteMethod> modifiedMultiTermQuery : modifiedMultiTermQueries) {
            modifiedMultiTermQuery.v1().setRewriteMethod(modifiedMultiTermQuery.v2());
        }

        return query;
    }

    private static void overrideMultiTermRewriteMethod(Query query, List<Tuple<MultiTermQuery, MultiTermQuery.RewriteMethod>> modifiedMultiTermQueries) {

        if (query instanceof  MultiTermQuery) {
            MultiTermQuery originalMultiTermQuery = (MultiTermQuery) query;
            if (!allowsForTermExtraction(originalMultiTermQuery.getRewriteMethod())) {
                MultiTermQuery.RewriteMethod originalRewriteMethod = originalMultiTermQuery.getRewriteMethod();
                originalMultiTermQuery.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(50));
                //we need to rewrite anyway if it is a multi term query which was rewritten with the wrong rewrite method
                modifiedMultiTermQueries.add(Tuple.tuple(originalMultiTermQuery, originalRewriteMethod));
            }
        }

        if (query instanceof BooleanQuery) {
            BooleanQuery booleanQuery = (BooleanQuery) query;
            for (BooleanClause booleanClause : booleanQuery) {
                overrideMultiTermRewriteMethod(booleanClause.getQuery(), modifiedMultiTermQueries);
            }
        }

        if (query instanceof FilteredQuery) {
            overrideMultiTermRewriteMethod(((FilteredQuery) query).getQuery(), modifiedMultiTermQueries);
        }

        if (query instanceof ConstantScoreQuery) {
            overrideMultiTermRewriteMethod(((ConstantScoreQuery) query).getQuery(), modifiedMultiTermQueries);
        }
    }

    private static boolean allowsForTermExtraction(MultiTermQuery.RewriteMethod rewriteMethod) {
        return rewriteMethod instanceof TopTermsRewrite || rewriteMethod instanceof ScoringRewrite;
    }

    private static SortedSet<Term> extractTerms(Query query) {
        SortedSet<Term> queryTerms = new TreeSet<>();
        query.extractTerms(queryTerms);
        return queryTerms;
    }

    private static BytesRef[] filterTerms(SortedSet<Term> queryTerms, String field, boolean requireFieldMatch) {
        SortedSet<Term> fieldTerms;
        if (requireFieldMatch) {
            Term floor = new Term(field, "");
            Term ceiling = new Term(field, UnicodeUtil.BIG_TERM);
            fieldTerms = queryTerms.subSet(floor, ceiling);
        } else {
            fieldTerms = queryTerms;
        }

        BytesRef terms[] = new BytesRef[fieldTerms.size()];
        int termUpto = 0;
        for(Term term : fieldTerms) {
            terms[termUpto++] = term.bytes();
        }

        return terms;
    }

    private static List<Snippet> filterSnippets(List<Snippet> snippets, int numberOfFragments) {

        //We need to filter the snippets as due to no_match_size we could have
        //either highlighted snippets together non highlighted ones
        //We don't want to mix those up
        List<Snippet> filteredSnippets = new ArrayList<>(snippets.size());
        for (Snippet snippet : snippets) {
            if (snippet.isHighlighted()) {
                filteredSnippets.add(snippet);
            }
        }

        //if there's at least one highlighted snippet, we return all the highlighted ones
        //otherwise we return the first non highlighted one if available
        if (filteredSnippets.size() == 0) {
            if (snippets.size() > 0) {
                Snippet snippet = snippets.get(0);
                //if we did discrete per value highlighting using whole break iterator (as number_of_fragments was 0)
                //we need to obtain the first sentence of the first value
                if (numberOfFragments == 0) {
                    BreakIterator bi = BreakIterator.getSentenceInstance(Locale.ROOT);
                    String text = snippet.getText();
                    bi.setText(text);
                    int next = bi.next();
                    if (next != BreakIterator.DONE) {
                        String newText = text.substring(0, next).trim();
                        snippet = new Snippet(newText, snippet.getScore(), snippet.isHighlighted());
                    }
                }
                filteredSnippets.add(snippet);
            }
        }

        return filteredSnippets;
    }

    private static class HighlighterEntry {
        final SortedSet<Term> queryTerms;
        Map<FieldMapper<?>, MapperHighlighterEntry> mappers = Maps.newHashMap();

        private HighlighterEntry(SortedSet<Term> queryTerms) {
            this.queryTerms = queryTerms;
        }
    }

    private static class MapperHighlighterEntry {
        final CustomPassageFormatter passageFormatter;
        final BytesRef[] filteredQueryTerms;

        private MapperHighlighterEntry(CustomPassageFormatter passageFormatter, BytesRef[] filteredQueryTerms) {
            this.passageFormatter = passageFormatter;
            this.filteredQueryTerms = filteredQueryTerms;
        }
    }
}
TOP

Related Classes of org.elasticsearch.search.highlight.PostingsHighlighter$MapperHighlighterEntry

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.