From e808530ec1f19b1481e68acc8c3c849b04b125d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Fri, 27 Apr 2018 17:58:09 +0200 Subject: [PATCH 1/3] Deprecate `nGram` and `edgeNGram` names for ngram filters (#27429) The camel case name `nGram` should be removed in favour of `ngram` and similar for `edgeNGram` and `edge_ngram`. Before removal, we need to deprecate the camel case names first. This change adds deprecation warnings for indices with versions 6.4.0 and higher and logs deprecation warnings. --- .../analysis/common/CommonAnalysisPlugin.java | 21 +++- .../common/CommonAnalysisPluginTests.java | 119 ++++++++++++++++++ .../common/NGramTokenizerFactoryTests.java | 21 +--- .../analysis/PreConfiguredTokenFilter.java | 9 ++ 4 files changed, 146 insertions(+), 24 deletions(-) create mode 100644 modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index a01eb52fdd498..6fa8506900796 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -214,9 +214,14 @@ public List getPreConfiguredTokenFilters() { filters.add(PreConfiguredTokenFilter.singleton("dutch_stem", false, input -> new SnowballFilter(input, new DutchStemmer()))); filters.add(PreConfiguredTokenFilter.singleton("edge_ngram", false, input -> new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE))); - // TODO deprecate edgeNGram - filters.add(PreConfiguredTokenFilter.singleton("edgeNGram", false, input -> - new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE))); + filters.add(PreConfiguredTokenFilter.singletonWithVersion("edgeNGram", false, (reader, version) -> { + if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) { + DEPRECATION_LOGGER.deprecatedAndMaybeLog("esgeNGram_deprecation", + "The [edgeNGram] token filter name is deprecated and will be removed in a future version. " + + "Please change the filter name to [edge_ngram] instead."); + } + return new EdgeNGramTokenFilter(reader, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE); + })); filters.add(PreConfiguredTokenFilter.singleton("elision", true, input -> new ElisionFilter(input, FrenchAnalyzer.DEFAULT_ARTICLES))); filters.add(PreConfiguredTokenFilter.singleton("french_stem", false, input -> new SnowballFilter(input, new FrenchStemmer()))); @@ -233,8 +238,14 @@ public List getPreConfiguredTokenFilters() { LimitTokenCountFilterFactory.DEFAULT_MAX_TOKEN_COUNT, LimitTokenCountFilterFactory.DEFAULT_CONSUME_ALL_TOKENS))); filters.add(PreConfiguredTokenFilter.singleton("ngram", false, NGramTokenFilter::new)); - // TODO deprecate nGram - filters.add(PreConfiguredTokenFilter.singleton("nGram", false, NGramTokenFilter::new)); + filters.add(PreConfiguredTokenFilter.singletonWithVersion("nGram", false, (reader, version) -> { + if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) { + DEPRECATION_LOGGER.deprecatedAndMaybeLog("nGram_deprecation", + "The [nGram] token filter name is deprecated and will be removed in a future version. " + + "Please change the filter name to [ngram] instead."); + } + return new NGramTokenFilter(reader); + })); filters.add(PreConfiguredTokenFilter.singleton("persian_normalization", true, PersianNormalizationFilter::new)); filters.add(PreConfiguredTokenFilter.singleton("porter_stem", false, PorterStemFilter::new)); filters.add(PreConfiguredTokenFilter.singleton("reverse", false, ReverseStringFilter::new)); diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java new file mode 100644 index 0000000000000..e3322c48730bd --- /dev/null +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java @@ -0,0 +1,119 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.analysis.common; + +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.Tokenizer; +import org.elasticsearch.Version; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.IndexSettingsModule; +import org.elasticsearch.test.VersionUtils; + +import java.io.IOException; +import java.io.StringReader; +import java.util.Map; + +public class CommonAnalysisPluginTest extends ESTestCase { + + /** + * Check that the deprecated name "nGram" issues a deprecation warning for indices created since 6.3.0 + */ + public void testNGramDeprecationWarning() throws IOException { + Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT)) + .build(); + + IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { + Map tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter; + TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram"); + Tokenizer tokenizer = new MockTokenizer(); + tokenizer.setReader(new StringReader("foo bar")); + assertNotNull(tokenFilterFactory.create(tokenizer)); + assertWarnings( + "The [nGram] token filter name is deprecated and will be removed in a future version. " + + "Please change the filter name to [ngram] instead."); + } + } + + /** + * Check that the deprecated name "nGram" does NOT issues a deprecation warning for indices created before 6.4.0 + */ + public void testNGramNoDeprecationWarningPre6_4() throws IOException { + Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put(IndexMetaData.SETTING_VERSION_CREATED, + VersionUtils.randomVersionBetween(random(), Version.V_5_0_0, Version.V_6_3_0)) + .build(); + + IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { + Map tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter; + TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram"); + Tokenizer tokenizer = new MockTokenizer(); + tokenizer.setReader(new StringReader("foo bar")); + assertNotNull(tokenFilterFactory.create(tokenizer)); + } + } + + /** + * Check that the deprecated name "edgeNGram" issues a deprecation warning for indices created since 6.3.0 + */ + public void testEdgeNGramDeprecationWarning() throws IOException { + Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT)) + .build(); + + IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { + Map tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter; + TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram"); + Tokenizer tokenizer = new MockTokenizer(); + tokenizer.setReader(new StringReader("foo bar")); + assertNotNull(tokenFilterFactory.create(tokenizer)); + assertWarnings( + "The [edgeNGram] token filter name is deprecated and will be removed in a future version. " + + "Please change the filter name to [edge_ngram] instead."); + } + } + + /** + * Check that the deprecated name "edgeNGram" does NOT issues a deprecation warning for indices created before 6.4.0 + */ + public void testEdgeNGramNoDeprecationWarningPre6_4() throws IOException { + Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put(IndexMetaData.SETTING_VERSION_CREATED, + VersionUtils.randomVersionBetween(random(), Version.V_5_0_0, Version.V_6_3_0)) + .build(); + + IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { + Map tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter; + TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram"); + Tokenizer tokenizer = new MockTokenizer(); + tokenizer.setReader(new StringReader("foo bar")); + assertNotNull(tokenFilterFactory.create(tokenizer)); + } + } +} diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java index 3c6250eacfa66..62e29bae31c81 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java @@ -34,15 +34,11 @@ import org.elasticsearch.index.analysis.NGramTokenizerFactory; import org.elasticsearch.test.ESTokenStreamTestCase; import org.elasticsearch.test.IndexSettingsModule; +import org.elasticsearch.test.VersionUtils; import java.io.IOException; import java.io.StringReader; -import java.lang.reflect.Field; -import java.lang.reflect.Modifier; -import java.util.ArrayList; import java.util.Arrays; -import java.util.List; -import java.util.Random; import static com.carrotsearch.randomizedtesting.RandomizedTest.scaledRandomIntBetween; import static org.hamcrest.Matchers.instanceOf; @@ -131,7 +127,7 @@ public void testBackwardsCompatibilityEdgeNgramTokenFilter() throws Exception { for (int i = 0; i < iters; i++) { final Index index = new Index("test", "_na_"); final String name = "ngr"; - Version v = randomVersion(random()); + Version v = VersionUtils.randomVersion(random()); Builder builder = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3); boolean reverse = random().nextBoolean(); if (reverse) { @@ -152,7 +148,6 @@ public void testBackwardsCompatibilityEdgeNgramTokenFilter() throws Exception { } } - /*` * test that throws an error when trying to get a NGramTokenizer where difference between max_gram and min_gram * is greater than the allowed value of max_ngram_diff @@ -177,16 +172,4 @@ public void testMaxNGramDiffException() throws Exception{ + IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey() + "] index level setting.", ex.getMessage()); } - - private Version randomVersion(Random random) throws IllegalArgumentException, IllegalAccessException { - Field[] declaredFields = Version.class.getFields(); - List versionFields = new ArrayList<>(); - for (Field field : declaredFields) { - if ((field.getModifiers() & Modifier.STATIC) != 0 && field.getName().startsWith("V_") && field.getType() == Version.class) { - versionFields.add(field); - } - } - return (Version) versionFields.get(random.nextInt(versionFields.size())).get(Version.class); - } - } diff --git a/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java b/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java index 777fb589c9db0..12130e856f32a 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java @@ -41,6 +41,15 @@ public static PreConfiguredTokenFilter singleton(String name, boolean useFilterF (tokenStream, version) -> create.apply(tokenStream)); } + /** + * Create a pre-configured token filter that may not vary at all. + */ + public static PreConfiguredTokenFilter singletonWithVersion(String name, boolean useFilterForMultitermQueries, + BiFunction create) { + return new PreConfiguredTokenFilter(name, useFilterForMultitermQueries, CachingStrategy.ONE, + (tokenStream, version) -> create.apply(tokenStream, version)); + } + /** * Create a pre-configured token filter that may vary based on the Lucene version. */ From 0de4aac37cd320be4283d5f65885e7ef72f95110 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Tue, 15 May 2018 16:40:16 +0200 Subject: [PATCH 2/3] Renamed test class --- .../analysis/common/CommonAnalysisPluginTests.java | 2 +- sdfhksldj | 0 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 sdfhksldj diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java index e3322c48730bd..1d2b8a36810eb 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java @@ -35,7 +35,7 @@ import java.io.StringReader; import java.util.Map; -public class CommonAnalysisPluginTest extends ESTestCase { +public class CommonAnalysisPluginTests extends ESTestCase { /** * Check that the deprecated name "nGram" issues a deprecation warning for indices created since 6.3.0 diff --git a/sdfhksldj b/sdfhksldj new file mode 100644 index 0000000000000..e69de29bb2d1d From b8367592ce5a58ed3b4aa160719aed4887d62ef8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Wed, 16 May 2018 15:49:13 +0200 Subject: [PATCH 3/3] corrected typo --- .../org/elasticsearch/analysis/common/CommonAnalysisPlugin.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index 178bf60cf5ed0..624194092a02e 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -239,7 +239,7 @@ public List getPreConfiguredTokenFilters() { new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE))); filters.add(PreConfiguredTokenFilter.singletonWithVersion("edgeNGram", false, (reader, version) -> { if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) { - DEPRECATION_LOGGER.deprecatedAndMaybeLog("esgeNGram_deprecation", + DEPRECATION_LOGGER.deprecatedAndMaybeLog("edgeNGram_deprecation", "The [edgeNGram] token filter name is deprecated and will be removed in a future version. " + "Please change the filter name to [edge_ngram] instead."); }