Package org.elasticsearch.search.highlight

Source Code of org.elasticsearch.search.highlight.HighlighterParseElement

/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.search.highlight;

import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.apache.lucene.search.vectorhighlight.SimpleBoundaryScanner;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.query.IndexQueryParserService;
import org.elasticsearch.search.SearchParseElement;
import org.elasticsearch.search.SearchParseException;
import org.elasticsearch.search.internal.SearchContext;

import java.io.IOException;
import java.util.List;
import java.util.Set;

import static com.google.common.collect.Lists.newArrayList;

/**
* <pre>
* highlight : {
*  tags_schema : "styled",
*  pre_tags : ["tag1", "tag2"],
*  post_tags : ["tag1", "tag2"],
*  order : "score",
*  highlight_filter : true,
*  fields : {
*      field1 : {  },
*      field2 : { fragment_size : 100, number_of_fragments : 2 },
*      field3 : { number_of_fragments : 5, order : "simple", tags_schema : "styled" },
*      field4 : { number_of_fragments: 0, pre_tags : ["openingTagA", "openingTagB"], post_tags : ["closingTag"] }
*  }
* }
* </pre>
*/
public class HighlighterParseElement implements SearchParseElement {

    private static final String[] DEFAULT_PRE_TAGS = new String[]{"<em>"};
    private static final String[] DEFAULT_POST_TAGS = new String[]{"</em>"};

    private static final String[] STYLED_PRE_TAG = {
            "<em class=\"hlt1\">", "<em class=\"hlt2\">", "<em class=\"hlt3\">",
            "<em class=\"hlt4\">", "<em class=\"hlt5\">", "<em class=\"hlt6\">",
            "<em class=\"hlt7\">", "<em class=\"hlt8\">", "<em class=\"hlt9\">",
            "<em class=\"hlt10\">"
    };
    private static final String[] STYLED_POST_TAGS = {"</em>"};

    @Override
    public void parse(XContentParser parser, SearchContext context) throws Exception {
        try {
            context.highlight(parse(parser, context.queryParserService()));
        } catch (ElasticsearchIllegalArgumentException ex) {
            throw new SearchParseException(context, "Error while trying to parse Highlighter element in request");
        }
    }

    public SearchContextHighlight parse(XContentParser parser, IndexQueryParserService queryParserService) throws IOException {
        XContentParser.Token token;
        String topLevelFieldName = null;
        final List<Tuple<String, SearchContextHighlight.FieldOptions.Builder>> fieldsOptions = newArrayList();

        final SearchContextHighlight.FieldOptions.Builder globalOptionsBuilder = new SearchContextHighlight.FieldOptions.Builder()
                .preTags(DEFAULT_PRE_TAGS).postTags(DEFAULT_POST_TAGS).scoreOrdered(false).highlightFilter(false)
                .requireFieldMatch(false).forceSource(false).fragmentCharSize(100).numberOfFragments(5)
                .encoder("default").boundaryMaxScan(SimpleBoundaryScanner.DEFAULT_MAX_SCAN)
                .boundaryChars(SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS)
                .noMatchSize(0).phraseLimit(256);

        while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
            if (token == XContentParser.Token.FIELD_NAME) {
                topLevelFieldName = parser.currentName();
            } else if (token == XContentParser.Token.START_ARRAY) {
                if ("pre_tags".equals(topLevelFieldName) || "preTags".equals(topLevelFieldName)) {
                    List<String> preTagsList = Lists.newArrayList();
                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
                        preTagsList.add(parser.text());
                    }
                    globalOptionsBuilder.preTags(preTagsList.toArray(new String[preTagsList.size()]));
                } else if ("post_tags".equals(topLevelFieldName) || "postTags".equals(topLevelFieldName)) {
                    List<String> postTagsList = Lists.newArrayList();
                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
                        postTagsList.add(parser.text());
                    }
                    globalOptionsBuilder.postTags(postTagsList.toArray(new String[postTagsList.size()]));
                } else if ("fields".equals(topLevelFieldName)) {
                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
                        if (token == XContentParser.Token.START_OBJECT) {
                            String highlightFieldName = null;
                            while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
                                if (token == XContentParser.Token.FIELD_NAME) {
                                    if (highlightFieldName != null) {
                                        throw new ElasticsearchIllegalArgumentException("If highlighter fields is an array it must contain objects containing a single field");
                                    }
                                    highlightFieldName = parser.currentName();
                                } else if (token == XContentParser.Token.START_OBJECT) {
                                    fieldsOptions.add(Tuple.tuple(highlightFieldName, parseFields(parser, queryParserService)));
                                }
                            }
                        } else {
                            throw new ElasticsearchIllegalArgumentException("If highlighter fields is an array it must contain objects containing a single field");
                        }
                    }
                }
            } else if (token.isValue()) {
                if ("order".equals(topLevelFieldName)) {
                    globalOptionsBuilder.scoreOrdered("score".equals(parser.text()));
                } else if ("tags_schema".equals(topLevelFieldName) || "tagsSchema".equals(topLevelFieldName)) {
                    String schema = parser.text();
                    if ("styled".equals(schema)) {
                        globalOptionsBuilder.preTags(STYLED_PRE_TAG);
                        globalOptionsBuilder.postTags(STYLED_POST_TAGS);
                    }
                } else if ("highlight_filter".equals(topLevelFieldName) || "highlightFilter".equals(topLevelFieldName)) {
                    globalOptionsBuilder.highlightFilter(parser.booleanValue());
                } else if ("fragment_size".equals(topLevelFieldName) || "fragmentSize".equals(topLevelFieldName)) {
                    globalOptionsBuilder.fragmentCharSize(parser.intValue());
                } else if ("number_of_fragments".equals(topLevelFieldName) || "numberOfFragments".equals(topLevelFieldName)) {
                    globalOptionsBuilder.numberOfFragments(parser.intValue());
                } else if ("encoder".equals(topLevelFieldName)) {
                    globalOptionsBuilder.encoder(parser.text());
                } else if ("require_field_match".equals(topLevelFieldName) || "requireFieldMatch".equals(topLevelFieldName)) {
                    globalOptionsBuilder.requireFieldMatch(parser.booleanValue());
                } else if ("boundary_max_scan".equals(topLevelFieldName) || "boundaryMaxScan".equals(topLevelFieldName)) {
                    globalOptionsBuilder.boundaryMaxScan(parser.intValue());
                } else if ("boundary_chars".equals(topLevelFieldName) || "boundaryChars".equals(topLevelFieldName)) {
                    char[] charsArr = parser.text().toCharArray();
                    Character[] globalBoundaryChars = new Character[charsArr.length];
                    for (int i = 0; i < charsArr.length; i++) {
                        globalBoundaryChars[i] = charsArr[i];
                    }
                    globalOptionsBuilder.boundaryChars(globalBoundaryChars);
                } else if ("type".equals(topLevelFieldName)) {
                    globalOptionsBuilder.highlighterType(parser.text());
                } else if ("fragmenter".equals(topLevelFieldName)) {
                    globalOptionsBuilder.fragmenter(parser.text());
                } else if ("no_match_size".equals(topLevelFieldName) || "noMatchSize".equals(topLevelFieldName)) {
                    globalOptionsBuilder.noMatchSize(parser.intValue());
                } else if ("force_source".equals(topLevelFieldName) || "forceSource".equals(topLevelFieldName)) {
                    globalOptionsBuilder.forceSource(parser.booleanValue());
                } else if ("phrase_limit".equals(topLevelFieldName) || "phraseLimit".equals(topLevelFieldName)) {
                    globalOptionsBuilder.phraseLimit(parser.intValue());
                }
            } else if (token == XContentParser.Token.START_OBJECT && "options".equals(topLevelFieldName)) {
                globalOptionsBuilder.options(parser.map());
            } else if (token == XContentParser.Token.START_OBJECT) {
                if ("fields".equals(topLevelFieldName)) {
                    String highlightFieldName = null;
                    while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
                        if (token == XContentParser.Token.FIELD_NAME) {
                            highlightFieldName = parser.currentName();
                        } else if (token == XContentParser.Token.START_OBJECT) {
                            fieldsOptions.add(Tuple.tuple(highlightFieldName, parseFields(parser, queryParserService)));
                        }
                    }
                } else if ("highlight_query".equals(topLevelFieldName) || "highlightQuery".equals(topLevelFieldName)) {
                    globalOptionsBuilder.highlightQuery(queryParserService.parse(parser).query());
                }
            }
        }

        final SearchContextHighlight.FieldOptions globalOptions = globalOptionsBuilder.build();
        if (globalOptions.preTags() != null && globalOptions.postTags() == null) {
            throw new ElasticsearchIllegalArgumentException("Highlighter global preTags are set, but global postTags are not set");
        }

        final List<SearchContextHighlight.Field> fields = Lists.newArrayList();
        // now, go over and fill all fieldsOptions with default values from the global state
        for (final Tuple<String, SearchContextHighlight.FieldOptions.Builder> tuple : fieldsOptions) {
            fields.add(new SearchContextHighlight.Field(tuple.v1(), tuple.v2().merge(globalOptions).build()));
        }
        return new SearchContextHighlight(fields);
    }

    protected SearchContextHighlight.FieldOptions.Builder parseFields(XContentParser parser, IndexQueryParserService queryParserService) throws IOException {
        XContentParser.Token token;

        final SearchContextHighlight.FieldOptions.Builder fieldOptionsBuilder = new SearchContextHighlight.FieldOptions.Builder();
        String fieldName = null;
        while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
            if (token == XContentParser.Token.FIELD_NAME) {
                fieldName = parser.currentName();
            } else if (token == XContentParser.Token.START_ARRAY) {
                if ("pre_tags".equals(fieldName) || "preTags".equals(fieldName)) {
                    List<String> preTagsList = Lists.newArrayList();
                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
                        preTagsList.add(parser.text());
                    }
                    fieldOptionsBuilder.preTags(preTagsList.toArray(new String[preTagsList.size()]));
                } else if ("post_tags".equals(fieldName) || "postTags".equals(fieldName)) {
                    List<String> postTagsList = Lists.newArrayList();
                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
                        postTagsList.add(parser.text());
                    }
                    fieldOptionsBuilder.postTags(postTagsList.toArray(new String[postTagsList.size()]));
                } else if ("matched_fields".equals(fieldName) || "matchedFields".equals(fieldName)) {
                    Set<String> matchedFields = Sets.newHashSet();
                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
                        matchedFields.add(parser.text());
                    }
                    fieldOptionsBuilder.matchedFields(matchedFields);
                }
            } else if (token.isValue()) {
                if ("fragment_size".equals(fieldName) || "fragmentSize".equals(fieldName)) {
                    fieldOptionsBuilder.fragmentCharSize(parser.intValue());
                } else if ("number_of_fragments".equals(fieldName) || "numberOfFragments".equals(fieldName)) {
                    fieldOptionsBuilder.numberOfFragments(parser.intValue());
                } else if ("fragment_offset".equals(fieldName) || "fragmentOffset".equals(fieldName)) {
                    fieldOptionsBuilder.fragmentOffset(parser.intValue());
                } else if ("highlight_filter".equals(fieldName) || "highlightFilter".equals(fieldName)) {
                    fieldOptionsBuilder.highlightFilter(parser.booleanValue());
                } else if ("order".equals(fieldName)) {
                    fieldOptionsBuilder.scoreOrdered("score".equals(parser.text()));
                } else if ("require_field_match".equals(fieldName) || "requireFieldMatch".equals(fieldName)) {
                    fieldOptionsBuilder.requireFieldMatch(parser.booleanValue());
                } else if ("boundary_max_scan".equals(fieldName) || "boundaryMaxScan".equals(fieldName)) {
                    fieldOptionsBuilder.boundaryMaxScan(parser.intValue());
                } else if ("boundary_chars".equals(fieldName) || "boundaryChars".equals(fieldName)) {
                    char[] charsArr = parser.text().toCharArray();
                    Character[] boundaryChars = new Character[charsArr.length];
                    for (int i = 0; i < charsArr.length; i++) {
                        boundaryChars[i] = charsArr[i];
                    }
                    fieldOptionsBuilder.boundaryChars(boundaryChars);
                } else if ("type".equals(fieldName)) {
                    fieldOptionsBuilder.highlighterType(parser.text());
                } else if ("fragmenter".equals(fieldName)) {
                    fieldOptionsBuilder.fragmenter(parser.text());
                } else if ("no_match_size".equals(fieldName) || "noMatchSize".equals(fieldName)) {
                    fieldOptionsBuilder.noMatchSize(parser.intValue());
                } else if ("force_source".equals(fieldName) || "forceSource".equals(fieldName)) {
                    fieldOptionsBuilder.forceSource(parser.booleanValue());
                } else if ("phrase_limit".equals(fieldName) || "phraseLimit".equals(fieldName)) {
                    fieldOptionsBuilder.phraseLimit(parser.intValue());
                }
            } else if (token == XContentParser.Token.START_OBJECT) {
                if ("highlight_query".equals(fieldName) || "highlightQuery".equals(fieldName)) {
                    fieldOptionsBuilder.highlightQuery(queryParserService.parse(parser).query());
                } else if ("options".equals(fieldName)) {
                    fieldOptionsBuilder.options(parser.map());
                }
            }
        }
        return fieldOptionsBuilder;
    }
}
TOP

Related Classes of org.elasticsearch.search.highlight.HighlighterParseElement

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.