Package io.crate.metadata

Source Code of io.crate.metadata.FulltextAnalyzerResolver

/*
* Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
* license agreements.  See the NOTICE file distributed with this work for
* additional information regarding copyright ownership.  Crate licenses
* this file to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.  You may
* obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
* License for the specific language governing permissions and limitations
* under the License.
*
* However, if you have executed another commercial license agreement
* with Crate these terms will supersede the license and you may use the
* software solely pursuant to the terms of the relevant commercial agreement.
*/

package io.crate.metadata;

import com.google.common.collect.ImmutableSet;
import org.apache.lucene.analysis.Analyzer;
import io.crate.Constants;
import io.crate.exceptions.AnalyzerInvalidException;
import io.crate.exceptions.AnalyzerUnknownException;
import org.elasticsearch.cluster.ClusterService;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.settings.loader.JsonSettingsLoader;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.indices.analysis.IndicesAnalysisService;

import java.io.IOException;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Set;

/**
* Service to get builtin and custom analyzers, tokenizers, token_filters, char_filters
*/
public class FulltextAnalyzerResolver {

    private final ClusterService clusterService;
    private final IndicesAnalysisService indicesAnalysisService;

    // redefined list of extended analyzers not available outside of
    // a concrete index (see AnalyzerModule.ExtendedProcessor)
    // stripped Prebuilt<Thingy> (e.g. PreBuiltTokenFilters)
    private static final ImmutableSet<String> EXTENDED_BUILTIN_TOKEN_FILTERS = ImmutableSet.of(
            "limit", "delimited_payload_filter", "synonym",
            "keep", "pattern_capture", "pattern_replace",
            "dictionary_decompounder", "hyphenation_decompounder",
            "keyword_marker", "stemmer_override",
            "hunspell", "cjk_bigram", "cjk_width");
    private static final ImmutableSet<String> EXTENDED_BUILTIN_CHAR_FILTERS = ImmutableSet
            .of("mapping", "pattern_replace");

    // used for saving the creation statement
    public static final String SQL_STATEMENT_KEY = "_sql_stmt";

    private ESLogger logger = Loggers.getLogger(FulltextAnalyzerResolver.class);


    public enum CustomType {
        ANALYZER("analyzer"),
        TOKENIZER("tokenizer"),
        TOKEN_FILTER("filter"),
        CHAR_FILTER("char_filter");

        private String name;

        private CustomType(String name) {
            this.name = name;
        }

        public String getName() {
            return this.name;
        }
    }

    @Inject
    public FulltextAnalyzerResolver(ClusterService clusterService,
                                    IndicesAnalysisService indicesAnalysisService) {
        this.clusterService = clusterService;
        this.indicesAnalysisService = indicesAnalysisService;
    }

    public boolean hasAnalyzer(String name) {
        return hasBuiltInAnalyzer(name) || hasCustomAnalyzer(name);
    }

    public boolean hasBuiltInAnalyzer(String name) {
        return indicesAnalysisService.hasAnalyzer(name);
    }

    public Analyzer getBuiltInAnalyzer(String name) {
        return indicesAnalysisService.analyzer(name);
    }

    /**
     * get all the builtin Analyzers defined in Crate
     * @return an Iterable of Strings
     */
    public Set<String> getBuiltInAnalyzers() {
        return new ImmutableSet.Builder<String>()
                .addAll(indicesAnalysisService.analyzerProviderFactories().keySet()).build();
    }

    /**
     * get the custom analyzer created by the CREATE ANALYZER command.
     * This does not include definitions for custom tokenizers, token-filters or char-filters
     *
     * @param name the name of the analyzer
     * @return Settings defining a custom Analyzer
     */
    public Settings getCustomAnalyzer(String name) {
        return getCustomThingy(name, CustomType.ANALYZER);
    }

    /**
     * get the source of the custom analyzer with name ``name``.
     * This is the statement it was created with.
     *
     * @param name the name of the custom analyzer
     * @return the source as String or null if no source exists)
     */
    public String getCustomAnalyzerSource(String name) {
        return clusterService.state().metaData().persistentSettings().get(
                String.format("%s.%s.%s.%s", Constants.CUSTOM_ANALYSIS_SETTINGS_PREFIX,
                        CustomType.ANALYZER.getName(), name, SQL_STATEMENT_KEY)
        );
    }

    public Map<String, Settings> getCustomAnalyzers() throws IOException {
        Map<String, Settings> result = new HashMap<>();
        for (Map.Entry<String, String> entry : getCustomThingies(CustomType.ANALYZER)
                .getAsMap().entrySet()) {
            if (!entry.getKey().endsWith("." + SQL_STATEMENT_KEY)) {
                result.put(entry.getKey(), decodeSettings(entry.getValue()));
            }
        }
        return result;
    }

    public boolean hasCustomAnalyzer(String name) {
        return hasCustomThingy(name, CustomType.ANALYZER);
    }


    public boolean hasTokenizer(String name) {
        return hasBuiltInTokenizer(name) || hasCustomTokenizer(name);
    }

    public boolean hasBuiltInTokenizer(String name) {
        return indicesAnalysisService.hasTokenizer(name);
    }

    public Set<String> getBuiltInTokenizers() {
        return new ImmutableSet.Builder<String>()
                .addAll(indicesAnalysisService.tokenizerFactories().keySet())
                .build();
    }

    public boolean hasCustomTokenizer(String name) {
        return hasCustomThingy(name, CustomType.TOKENIZER);
    }

    public Map<String, Settings> getCustomTokenizers() throws IOException {
        Map<String, Settings> result = new HashMap<>();
        for (Map.Entry<String, String> entry : getCustomThingies(CustomType.TOKENIZER).getAsMap
                ().entrySet()) {
            result.put(entry.getKey(), decodeSettings(entry.getValue()));
        }
        return result;
    }


    public boolean hasCharFilter(String name) {
        return hasBuiltInCharFilter(name) || hasCustomCharFilter(name);
    }

    public boolean hasBuiltInCharFilter(String name) {
        return EXTENDED_BUILTIN_CHAR_FILTERS.contains(name) || indicesAnalysisService.hasCharFilter(name);
    }

    public boolean hasCustomCharFilter(String name) {
        return hasCustomThingy(name, CustomType.CHAR_FILTER);
    }

    public Set<String> getBuiltInCharFilters() {
        return new ImmutableSet.Builder<String>().addAll(EXTENDED_BUILTIN_CHAR_FILTERS)
                .addAll(indicesAnalysisService.charFilterFactories().keySet())
                .build();
    }

    public Map<String, Settings> getCustomCharFilters() throws IOException {
        Map<String, Settings> result = new HashMap<>();
        for (Map.Entry<String, String> entry : getCustomThingies(CustomType.CHAR_FILTER).getAsMap
                ().entrySet()) {
            result.put(entry.getKey(), decodeSettings(entry.getValue()));
        }
        return result;
    }

    public boolean hasTokenFilter(String name) {
        return hasBuiltInTokenFilter(name) || hasCustomTokenFilter(name);
    }

    public boolean hasBuiltInTokenFilter(String name) {
        return EXTENDED_BUILTIN_TOKEN_FILTERS.contains(name) || indicesAnalysisService.hasTokenFilter(name);
    }

    public Set<String> getBuiltInTokenFilters() {
        return new ImmutableSet.Builder<String>()
                .addAll(EXTENDED_BUILTIN_TOKEN_FILTERS)
                .addAll(indicesAnalysisService.tokenFilterFactories().keySet())
                .build();
    }

    public boolean hasCustomTokenFilter(String name) {
        return hasCustomThingy(name, CustomType.TOKEN_FILTER);
    }

    public Map<String, Settings> getCustomTokenFilters() throws IOException {
        Map<String, Settings> result = new HashMap<>();
        for (Map.Entry<String, String> entry : getCustomThingies(CustomType.TOKEN_FILTER).getAsMap
                ().entrySet()) {
            result.put(entry.getKey(), decodeSettings(entry.getValue()));
        }
        return result;
    }

    public static BytesReference encodeSettings(Settings settings) throws IOException {
        BytesStreamOutput bso = new BytesStreamOutput();
        XContentBuilder builder = XContentFactory.jsonBuilder(bso);
        builder.startObject();
        for (Map.Entry<String, String> entry : settings.getAsMap().entrySet()) {
            builder.field(entry.getKey(), entry.getValue());
        }
        builder.endObject();
        builder.flush();
        return bso.bytes();
    }

    public static Settings decodeSettings(String encodedSettings) throws IOException {
        Map<String, String> loaded = new JsonSettingsLoader().load(encodedSettings);
        return ImmutableSettings.builder().put(loaded).build();


    }

    /**
     * used to get custom analyzers, tokenizers, token-filters or char-filters with name ``name``
     * from crate-cluster-settings
     * @param name
     * @param type
     * @return a full settings instance for the thingy with given name and type or null if it does not exists
     */
    private Settings getCustomThingy(String name, CustomType type) {
        if (name == null) {
            return null;
        }
        String encodedSettings = clusterService.state().metaData().persistentSettings().get(
                String.format("%s.%s.%s", Constants.CUSTOM_ANALYSIS_SETTINGS_PREFIX, type.getName(), name)
        );
        Settings decoded = null;
        if (encodedSettings != null) {
            try {
                decoded = decodeSettings(encodedSettings);
            } catch (IOException e) {
                logger.warn("Could not decode settings for {} '{}'.", e, type.getName(), name);
            }
        }
        return decoded;
    }

    private Settings getCustomThingies(CustomType type) {
        Map<String, Settings> settingsMap = clusterService.state().metaData().persistentSettings
                ().getGroups(Constants.CUSTOM_ANALYSIS_SETTINGS_PREFIX);
        Settings result = settingsMap.get(type.getName());
        return result != null ? result : ImmutableSettings.EMPTY;
    }

    /**
     * used to check if custom analyzer, tokenizer, token-filter or char-filter with name ``name`` exists
     * @param name
     * @param type
     * @return true if exists, false otherwise
     */
    private boolean hasCustomThingy(String name, CustomType type) {
        return clusterService.state().metaData().persistentSettings().getAsMap().containsKey(
                String.format(Locale.ROOT, "%s.%s.%s", Constants.CUSTOM_ANALYSIS_SETTINGS_PREFIX, type.getName(), name));
    }

    /**
     * resolve the full settings necessary for the custom analyzer with name ``name``
     * to be included in index-settings to get applied on an index.
     *
     * Resolves all custom tokenizer, token-filter and char-filter settings and includes them
     *
     * @param name the name of the analyzer to resolve
     * @return Settings ready for inclusion into a CreateIndexRequest
     * @throws AnalyzerInvalidException if no custom analyzer with name ``name`` could be found
     */
    public Settings resolveFullCustomAnalyzerSettings(String name) throws AnalyzerInvalidException {
        ImmutableSettings.Builder builder = ImmutableSettings.builder();
        Settings analyzerSettings = getCustomAnalyzer(name);
        if (analyzerSettings != null) {

            builder.put(analyzerSettings);

            String tokenizerName = analyzerSettings.get(String.format("index.analysis.analyzer.%s.tokenizer", name));
            if (tokenizerName != null) {
                Settings customTokenizerSettings = getCustomTokenizer(tokenizerName);
                if (customTokenizerSettings != null) {
                    builder.put(customTokenizerSettings);
                } else if (!hasBuiltInTokenizer(tokenizerName)) {
                    throw new AnalyzerInvalidException(String.format("Invalid Analyzer: could not resolve tokenizer '%s'", tokenizerName));
                }
            }

            String[] tokenFilterNames = analyzerSettings.getAsArray(String.format("index.analysis.analyzer.%s.filter", name));
            for (int i=0; i<tokenFilterNames.length; i++) {
                Settings customTokenFilterSettings = getCustomTokenFilter(tokenFilterNames[i]);
                if (customTokenFilterSettings != null) {
                    builder.put(customTokenFilterSettings);
                } else if (!hasBuiltInTokenFilter(tokenFilterNames[i])) {
                    throw new AnalyzerInvalidException(String.format("Invalid Analyzer: could not resolve token-filter '%s'", tokenFilterNames[i]));
                }
            }

            String[] charFilterNames = analyzerSettings.getAsArray(String.format("index.analysis.analyzer.%s.char_filter", name));
            for (int i=0; i<charFilterNames.length; i++) {
                Settings customCharFilterSettings = getCustomCharFilter(charFilterNames[i]);
                if (customCharFilterSettings != null) {
                    builder.put(customCharFilterSettings);
                } else if (!hasBuiltInCharFilter(charFilterNames[i])) {
                    throw new AnalyzerInvalidException(String.format("Invalid Analyzer: could not resolve char-filter '%s'", charFilterNames[i]));
                }
            }
        } else {
            throw new AnalyzerUnknownException(name);
        }
        return builder.build();
    }

    public TokenizerFactory getBuiltinTokenizer(String name) {
        return indicesAnalysisService.tokenizerFactoryFactory(name).create(null, null); // arguments do not matter here
    }

    public Settings getCustomTokenizer(String name) {
        return getCustomThingy(name, CustomType.TOKENIZER);
    }

    public Settings getCustomTokenFilter(String name) {
        return getCustomThingy(name, CustomType.TOKEN_FILTER);
    }

    public Settings getCustomCharFilter(String name) {
        return getCustomThingy(name, CustomType.CHAR_FILTER);
    }
}
TOP

Related Classes of io.crate.metadata.FulltextAnalyzerResolver

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.