Package org.elasticsearch.index.analysis

Source Code of org.elasticsearch.index.analysis.PreBuiltAnalyzerTests

/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.indices.analysis.PreBuiltAnalyzers;
import org.elasticsearch.test.ElasticsearchSingleNodeTest;
import org.junit.Test;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;

import static org.hamcrest.Matchers.*;

/**
*
*/
public class PreBuiltAnalyzerTests extends ElasticsearchSingleNodeTest {

    @Test
    public void testThatDefaultAndStandardAnalyzerAreTheSameInstance() {
        Analyzer currentStandardAnalyzer = PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.CURRENT);
        Analyzer currentDefaultAnalyzer = PreBuiltAnalyzers.DEFAULT.getAnalyzer(Version.CURRENT);

        // special case, these two are the same instance
        assertThat(currentDefaultAnalyzer, is(currentStandardAnalyzer));
    }

    @Test
    public void testThatDefaultAndStandardAnalyzerChangedIn10Beta1() throws IOException {
        Analyzer currentStandardAnalyzer = PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.V_1_0_0_Beta1);
        Analyzer currentDefaultAnalyzer = PreBuiltAnalyzers.DEFAULT.getAnalyzer(Version.V_1_0_0_Beta1);

        // special case, these two are the same instance
        assertThat(currentDefaultAnalyzer, is(currentStandardAnalyzer));
        PreBuiltAnalyzers.DEFAULT.getAnalyzer(Version.V_1_0_0_Beta1);
        final int n = scaledRandomIntBetween(10, 100);
        Version version = Version.CURRENT;
        for(int i = 0; i < n; i++) {
            if (version.equals(Version.V_1_0_0_Beta1)) {
                assertThat(currentDefaultAnalyzer, is(PreBuiltAnalyzers.DEFAULT.getAnalyzer(version)));
            } else {
                assertThat(currentDefaultAnalyzer, not(is(PreBuiltAnalyzers.DEFAULT.getAnalyzer(version))));
            }
            Analyzer analyzer = PreBuiltAnalyzers.DEFAULT.getAnalyzer(version);
            TokenStream ts = analyzer.tokenStream("foo", "This is it Dude");
            ts.reset();
            CharTermAttribute charTermAttribute = ts.addAttribute(CharTermAttribute.class);
            List<String> list = new ArrayList<>();
            while(ts.incrementToken()) {
                list.add(charTermAttribute.toString());
            }
            if (version.onOrAfter(Version.V_1_0_0_Beta1)) {
                assertThat(list.size(), is(4));
                assertThat(list, contains("this", "is", "it", "dude"));

            } else {
                assertThat(list.size(), is(1));
                assertThat(list, contains("dude"));
            }
            ts.close();
            version = randomVersion();
        }
    }

    @Test
    public void testAnalyzerChangedIn10RC1() throws IOException {
        Analyzer pattern = PreBuiltAnalyzers.PATTERN.getAnalyzer(Version.V_1_0_0_RC1);
        Analyzer standardHtml = PreBuiltAnalyzers.STANDARD_HTML_STRIP.getAnalyzer(Version.V_1_0_0_RC1);
        final int n = scaledRandomIntBetween(10, 100);
        Version version = Version.CURRENT;
        for(int i = 0; i < n; i++) {
            if (version.equals(Version.V_1_0_0_RC1)) {
                assertThat(pattern, is(PreBuiltAnalyzers.PATTERN.getAnalyzer(version)));
                assertThat(standardHtml, is(PreBuiltAnalyzers.STANDARD_HTML_STRIP.getAnalyzer(version)));
            } else {
                assertThat(pattern, not(is(PreBuiltAnalyzers.DEFAULT.getAnalyzer(version))));
                assertThat(standardHtml, not(is(PreBuiltAnalyzers.DEFAULT.getAnalyzer(version))));
            }
            Analyzer analyzer = randomBoolean() ? PreBuiltAnalyzers.PATTERN.getAnalyzer(version) :  PreBuiltAnalyzers.STANDARD_HTML_STRIP.getAnalyzer(version);
            TokenStream ts = analyzer.tokenStream("foo", "This is it Dude");
            ts.reset();
            CharTermAttribute charTermAttribute = ts.addAttribute(CharTermAttribute.class);
            List<String> list = new ArrayList<>();
            while(ts.incrementToken()) {
                list.add(charTermAttribute.toString());
            }
            if (version.onOrAfter(Version.V_1_0_0_RC1)) {
                assertThat(list.toString(), list.size(), is(4));
                assertThat(list, contains("this", "is", "it", "dude"));

            } else {
                assertThat(list.size(), is(1));
                assertThat(list, contains("dude"));
            }
            ts.close();
            version = randomVersion();
        }
    }

    @Test
    public void testThatInstancesAreTheSameAlwaysForKeywordAnalyzer() {
        assertThat(PreBuiltAnalyzers.KEYWORD.getAnalyzer(Version.CURRENT),
                is(PreBuiltAnalyzers.KEYWORD.getAnalyzer(Version.V_0_18_0)));
    }

    @Test
    public void testThatInstancesAreCachedAndReused() {
        assertThat(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.CURRENT),
                is(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.CURRENT)));
        assertThat(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.V_0_18_0),
                is(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.V_0_18_0)));
    }

    @Test
    public void testThatInstancesWithSameLuceneVersionAreReused() {
        // both are lucene 4.4 and should return the same instance
        assertThat(PreBuiltAnalyzers.CATALAN.getAnalyzer(Version.V_0_90_4),
                is(PreBuiltAnalyzers.CATALAN.getAnalyzer(Version.V_0_90_5)));
    }

    @Test
    public void testThatAnalyzersAreUsedInMapping() throws IOException {
        int randomInt = randomInt(PreBuiltAnalyzers.values().length-1);
        PreBuiltAnalyzers randomPreBuiltAnalyzer = PreBuiltAnalyzers.values()[randomInt];
        String analyzerName = randomPreBuiltAnalyzer.name().toLowerCase(Locale.ROOT);

        Version randomVersion = randomVersion();
        Settings indexSettings = ImmutableSettings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, randomVersion).build();

        NamedAnalyzer namedAnalyzer = new PreBuiltAnalyzerProvider(analyzerName, AnalyzerScope.INDEX, randomPreBuiltAnalyzer.getAnalyzer(randomVersion)).get();

        String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
                .startObject("properties").startObject("field").field("type", "string").field("analyzer", analyzerName).endObject().endObject()
                .endObject().endObject().string();
        DocumentMapper docMapper = createIndex("test", indexSettings).mapperService().documentMapperParser().parse(mapping);

        FieldMapper fieldMapper = docMapper.mappers().name("field").mapper();
        assertThat(fieldMapper.searchAnalyzer(), instanceOf(NamedAnalyzer.class));
        NamedAnalyzer fieldMapperNamedAnalyzer = (NamedAnalyzer) fieldMapper.searchAnalyzer();

        assertThat(fieldMapperNamedAnalyzer.analyzer(), is(namedAnalyzer.analyzer()));
    }
}
TOP

Related Classes of org.elasticsearch.index.analysis.PreBuiltAnalyzerTests

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.