From 3fce0423fe0783881e40a578a04a666e764cdab1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Wed, 18 Dec 2024 15:25:44 +0100 Subject: [PATCH 1/3] Revert "Remove deprecations and 7.x related code from analysis common (#113009)" This reverts commit ef37511f0a3525274d73f2515d19d8b52f9cc150. --- .../analysis/common/CommonAnalysisPlugin.java | 132 ++++++++++++- .../common/CommonAnalysisPluginTests.java | 186 ++++++++++++++++++ .../common/EdgeNGramTokenizerTests.java | 3 +- 3 files changed, 314 insertions(+), 7 deletions(-) create mode 100644 modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index a97154fd4d1ff..dec2526db8515 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -101,7 +101,12 @@ import org.apache.lucene.analysis.tr.TurkishAnalyzer; import org.apache.lucene.analysis.util.ElisionFilter; import org.apache.lucene.util.SetOnce; +import org.elasticsearch.common.logging.DeprecationCategory; +import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.regex.Regex; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; +import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.analysis.AnalyzerProvider; import org.elasticsearch.index.analysis.CharFilterFactory; @@ -134,6 +139,8 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, ScriptPlugin { + private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(CommonAnalysisPlugin.class); + private final SetOnce scriptServiceHolder = new SetOnce<>(); private final SetOnce synonymsManagementServiceHolder = new SetOnce<>(); @@ -224,6 +231,28 @@ public Map> getTokenFilters() { filters.put("dictionary_decompounder", requiresAnalysisSettings(DictionaryCompoundWordTokenFilterFactory::new)); filters.put("dutch_stem", DutchStemTokenFilterFactory::new); filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new); + filters.put("edgeNGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> { + return new EdgeNGramTokenFilterFactory(indexSettings, environment, name, settings) { + @Override + public TokenStream create(TokenStream tokenStream) { + if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_8_0_0)) { + throw new IllegalArgumentException( + "The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. " + + "Please change the filter name to [edge_ngram] instead." + ); + } else { + deprecationLogger.warn( + DeprecationCategory.ANALYSIS, + "edgeNGram_deprecation", + "The [edgeNGram] token filter name is deprecated and will be removed in a future version. " + + "Please change the filter name to [edge_ngram] instead." + ); + } + return super.create(tokenStream); + } + + }; + }); filters.put("elision", requiresAnalysisSettings(ElisionTokenFilterFactory::new)); filters.put("fingerprint", FingerprintTokenFilterFactory::new); filters.put("flatten_graph", FlattenGraphTokenFilterFactory::new); @@ -243,6 +272,28 @@ public Map> getTokenFilters() { filters.put("min_hash", MinHashTokenFilterFactory::new); filters.put("multiplexer", MultiplexerTokenFilterFactory::new); filters.put("ngram", NGramTokenFilterFactory::new); + filters.put("nGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> { + return new NGramTokenFilterFactory(indexSettings, environment, name, settings) { + @Override + public TokenStream create(TokenStream tokenStream) { + if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_8_0_0)) { + throw new IllegalArgumentException( + "The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. " + + "Please change the filter name to [ngram] instead." + ); + } else { + deprecationLogger.warn( + DeprecationCategory.ANALYSIS, + "nGram_deprecation", + "The [nGram] token filter name is deprecated and will be removed in a future version. " + + "Please change the filter name to [ngram] instead." + ); + } + return super.create(tokenStream); + } + + }; + }); filters.put("pattern_capture", requiresAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new)); filters.put("pattern_replace", requiresAnalysisSettings(PatternReplaceTokenFilterFactory::new)); filters.put("persian_normalization", PersianNormalizationFilterFactory::new); @@ -294,7 +345,39 @@ public Map> getTokenizers() { tokenizers.put("simple_pattern", SimplePatternTokenizerFactory::new); tokenizers.put("simple_pattern_split", SimplePatternSplitTokenizerFactory::new); tokenizers.put("thai", ThaiTokenizerFactory::new); + tokenizers.put("nGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> { + if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_8_0_0)) { + throw new IllegalArgumentException( + "The [nGram] tokenizer name was deprecated in 7.6. " + + "Please use the tokenizer name to [ngram] for indices created in versions 8 or higher instead." + ); + } else if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_7_6_0)) { + deprecationLogger.warn( + DeprecationCategory.ANALYSIS, + "nGram_tokenizer_deprecation", + "The [nGram] tokenizer name is deprecated and will be removed in a future version. " + + "Please change the tokenizer name to [ngram] instead." + ); + } + return new NGramTokenizerFactory(indexSettings, environment, name, settings); + }); tokenizers.put("ngram", NGramTokenizerFactory::new); + tokenizers.put("edgeNGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> { + if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_8_0_0)) { + throw new IllegalArgumentException( + "The [edgeNGram] tokenizer name was deprecated in 7.6. " + + "Please use the tokenizer name to [edge_nGram] for indices created in versions 8 or higher instead." + ); + } else if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_7_6_0)) { + deprecationLogger.warn( + DeprecationCategory.ANALYSIS, + "edgeNGram_tokenizer_deprecation", + "The [edgeNGram] tokenizer name is deprecated and will be removed in a future version. " + + "Please change the tokenizer name to [edge_ngram] instead." + ); + } + return new EdgeNGramTokenizerFactory(indexSettings, environment, name, settings); + }); tokenizers.put("edge_ngram", EdgeNGramTokenizerFactory::new); tokenizers.put("char_group", CharGroupTokenizerFactory::new); tokenizers.put("classic", ClassicTokenizerFactory::new); @@ -505,17 +588,54 @@ public List getPreConfiguredTokenizers() { tokenizers.add(PreConfiguredTokenizer.singleton("letter", LetterTokenizer::new)); tokenizers.add(PreConfiguredTokenizer.singleton("whitespace", WhitespaceTokenizer::new)); tokenizers.add(PreConfiguredTokenizer.singleton("ngram", NGramTokenizer::new)); - tokenizers.add( - PreConfiguredTokenizer.indexVersion( - "edge_ngram", - (version) -> new EdgeNGramTokenizer(NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE, NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE) - ) - ); + tokenizers.add(PreConfiguredTokenizer.indexVersion("edge_ngram", (version) -> { + if (version.onOrAfter(IndexVersions.V_7_3_0)) { + return new EdgeNGramTokenizer(NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE, NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE); + } + return new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE); + })); tokenizers.add(PreConfiguredTokenizer.singleton("pattern", () -> new PatternTokenizer(Regex.compile("\\W+", null), -1))); tokenizers.add(PreConfiguredTokenizer.singleton("thai", ThaiTokenizer::new)); // TODO deprecate and remove in API // This is already broken with normalization, so backwards compat isn't necessary? tokenizers.add(PreConfiguredTokenizer.singleton("lowercase", XLowerCaseTokenizer::new)); + + // Temporary shim for aliases. TODO deprecate after they are moved + tokenizers.add(PreConfiguredTokenizer.indexVersion("nGram", (version) -> { + if (version.onOrAfter(IndexVersions.V_8_0_0)) { + throw new IllegalArgumentException( + "The [nGram] tokenizer name was deprecated in 7.6. " + + "Please use the tokenizer name to [ngram] for indices created in versions 8 or higher instead." + ); + } else if (version.onOrAfter(IndexVersions.V_7_6_0)) { + deprecationLogger.warn( + DeprecationCategory.ANALYSIS, + "nGram_tokenizer_deprecation", + "The [nGram] tokenizer name is deprecated and will be removed in a future version. " + + "Please change the tokenizer name to [ngram] instead." + ); + } + return new NGramTokenizer(); + })); + tokenizers.add(PreConfiguredTokenizer.indexVersion("edgeNGram", (version) -> { + if (version.onOrAfter(IndexVersions.V_8_0_0)) { + throw new IllegalArgumentException( + "The [edgeNGram] tokenizer name was deprecated in 7.6. " + + "Please use the tokenizer name to [edge_ngram] for indices created in versions 8 or higher instead." + ); + } else if (version.onOrAfter(IndexVersions.V_7_6_0)) { + deprecationLogger.warn( + DeprecationCategory.ANALYSIS, + "edgeNGram_tokenizer_deprecation", + "The [edgeNGram] tokenizer name is deprecated and will be removed in a future version. " + + "Please change the tokenizer name to [edge_ngram] instead." + ); + } + if (version.onOrAfter(IndexVersions.V_7_3_0)) { + return new EdgeNGramTokenizer(NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE, NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE); + } + return new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE); + })); tokenizers.add(PreConfiguredTokenizer.singleton("PathHierarchy", PathHierarchyTokenizer::new)); return tokenizers; diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java new file mode 100644 index 0000000000000..3263704d38e1d --- /dev/null +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java @@ -0,0 +1,186 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.analysis.common; + +import org.apache.lucene.analysis.Tokenizer; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; +import org.elasticsearch.index.analysis.TokenizerFactory; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.IndexSettingsModule; +import org.elasticsearch.test.index.IndexVersionUtils; + +import java.io.IOException; +import java.util.Map; + +public class CommonAnalysisPluginTests extends ESTestCase { + + /** + * Check that the deprecated "nGram" filter throws exception for indices created since 7.0.0 and + * logs a warning for earlier indices when the filter is used as a custom filter + */ + public void testNGramFilterInCustomAnalyzerDeprecationError() throws IOException { + final Settings settings = Settings.builder() + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put( + IndexMetadata.SETTING_VERSION_CREATED, + IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_8_0_0, IndexVersion.current()) + ) + .put("index.analysis.analyzer.custom_analyzer.type", "custom") + .put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard") + .putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram") + .put("index.analysis.filter.my_ngram.type", "nGram") + .build(); + + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { + IllegalArgumentException ex = expectThrows( + IllegalArgumentException.class, + () -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin) + ); + assertEquals( + "The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. " + + "Please change the filter name to [ngram] instead.", + ex.getMessage() + ); + } + } + + /** + * Check that the deprecated "edgeNGram" filter throws exception for indices created since 7.0.0 and + * logs a warning for earlier indices when the filter is used as a custom filter + */ + public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOException { + final Settings settings = Settings.builder() + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put( + IndexMetadata.SETTING_VERSION_CREATED, + IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_8_0_0, IndexVersion.current()) + ) + .put("index.analysis.analyzer.custom_analyzer.type", "custom") + .put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard") + .putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram") + .put("index.analysis.filter.my_ngram.type", "edgeNGram") + .build(); + + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { + IllegalArgumentException ex = expectThrows( + IllegalArgumentException.class, + () -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin) + ); + assertEquals( + "The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. " + + "Please change the filter name to [edge_ngram] instead.", + ex.getMessage() + ); + } + } + + /** + * Check that we log a deprecation warning for "nGram" and "edgeNGram" tokenizer names with 7.6 and + * disallow usages for indices created after 8.0 + */ + public void testNGramTokenizerDeprecation() throws IOException { + expectThrows( + IllegalArgumentException.class, + () -> doTestPrebuiltTokenizerDeprecation( + "nGram", + "ngram", + IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_8_0_0, IndexVersion.current()), + true + ) + ); + expectThrows( + IllegalArgumentException.class, + () -> doTestPrebuiltTokenizerDeprecation( + "edgeNGram", + "edge_ngram", + IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_8_0_0, IndexVersion.current()), + true + ) + ); + expectThrows( + IllegalArgumentException.class, + () -> doTestCustomTokenizerDeprecation( + "nGram", + "ngram", + IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_8_0_0, IndexVersion.current()), + true + ) + ); + expectThrows( + IllegalArgumentException.class, + () -> doTestCustomTokenizerDeprecation( + "edgeNGram", + "edge_ngram", + IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_8_0_0, IndexVersion.current()), + true + ) + ); + } + + public void doTestPrebuiltTokenizerDeprecation(String deprecatedName, String replacement, IndexVersion version, boolean expectWarning) + throws IOException { + final Settings settings = Settings.builder() + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put(IndexMetadata.SETTING_VERSION_CREATED, version) + .build(); + + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { + Map tokenizers = createTestAnalysis( + IndexSettingsModule.newIndexSettings("index", settings), + settings, + commonAnalysisPlugin + ).tokenizer; + TokenizerFactory tokenizerFactory = tokenizers.get(deprecatedName); + + Tokenizer tokenizer = tokenizerFactory.create(); + assertNotNull(tokenizer); + if (expectWarning) { + assertWarnings( + "The [" + + deprecatedName + + "] tokenizer name is deprecated and will be removed in a future version. " + + "Please change the tokenizer name to [" + + replacement + + "] instead." + ); + } + } + } + + public void doTestCustomTokenizerDeprecation(String deprecatedName, String replacement, IndexVersion version, boolean expectWarning) + throws IOException { + final Settings settings = Settings.builder() + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put(IndexMetadata.SETTING_VERSION_CREATED, version) + .put("index.analysis.analyzer.custom_analyzer.type", "custom") + .put("index.analysis.analyzer.custom_analyzer.tokenizer", "my_tokenizer") + .put("index.analysis.tokenizer.my_tokenizer.type", deprecatedName) + .build(); + + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { + createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin); + + if (expectWarning) { + assertWarnings( + "The [" + + deprecatedName + + "] tokenizer name is deprecated and will be removed in a future version. " + + "Please change the tokenizer name to [" + + replacement + + "] instead." + ); + } + } + } +} diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerTests.java index 11d1653439e59..c998e927e25a8 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerTests.java @@ -34,7 +34,7 @@ public class EdgeNGramTokenizerTests extends ESTokenStreamTestCase { - private static IndexAnalyzers buildAnalyzers(IndexVersion version, String tokenizer) throws IOException { + private IndexAnalyzers buildAnalyzers(IndexVersion version, String tokenizer) throws IOException { Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build(); Settings indexSettings = Settings.builder() .put(IndexMetadata.SETTING_VERSION_CREATED, version) @@ -54,6 +54,7 @@ public void testPreConfiguredTokenizer() throws IOException { assertNotNull(analyzer); assertAnalyzesTo(analyzer, "test", new String[] { "t", "te" }); } + } public void testCustomTokenChars() throws IOException { From 5ed66a35e217a587f9a5ef8563a6215bb0bea821 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Wed, 18 Dec 2024 15:50:14 +0100 Subject: [PATCH 2/3] Revert removal of 7.x related code from analysis common This reverts #113009 and re-adds previous v7 tests since we now support v7 indices as read-only on v9. --- .../common/CommonAnalysisPluginTests.java | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java index 3263704d38e1d..9972d58b2dcc1 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java @@ -53,6 +53,25 @@ public void testNGramFilterInCustomAnalyzerDeprecationError() throws IOException ex.getMessage() ); } + + final Settings settingsPre7 = Settings.builder() + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put( + IndexMetadata.SETTING_VERSION_CREATED, + IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_7_0_0, IndexVersions.V_7_6_0) + ) + .put("index.analysis.analyzer.custom_analyzer.type", "custom") + .put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard") + .putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram") + .put("index.analysis.filter.my_ngram.type", "nGram") + .build(); + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { + createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7), settingsPre7, commonAnalysisPlugin); + assertWarnings( + "The [nGram] token filter name is deprecated and will be removed in a future version. " + + "Please change the filter name to [ngram] instead." + ); + } } /** @@ -83,6 +102,26 @@ public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOExcep ex.getMessage() ); } + + final Settings settingsPre7 = Settings.builder() + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put( + IndexMetadata.SETTING_VERSION_CREATED, + IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_7_0_0, IndexVersions.V_7_6_0) + ) + .put("index.analysis.analyzer.custom_analyzer.type", "custom") + .put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard") + .putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram") + .put("index.analysis.filter.my_ngram.type", "edgeNGram") + .build(); + + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { + createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7), settingsPre7, commonAnalysisPlugin); + assertWarnings( + "The [edgeNGram] token filter name is deprecated and will be removed in a future version. " + + "Please change the filter name to [edge_ngram] instead." + ); + } } /** @@ -90,6 +129,39 @@ public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOExcep * disallow usages for indices created after 8.0 */ public void testNGramTokenizerDeprecation() throws IOException { + // tests for prebuilt tokenizer + doTestPrebuiltTokenizerDeprecation( + "nGram", + "ngram", + IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_7_0_0, IndexVersions.V_7_5_2), + false + ); + doTestPrebuiltTokenizerDeprecation( + "edgeNGram", + "edge_ngram", + IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_7_0_0, IndexVersions.V_7_5_2), + false + ); + doTestPrebuiltTokenizerDeprecation( + "nGram", + "ngram", + IndexVersionUtils.randomVersionBetween( + random(), + IndexVersions.V_7_6_0, + IndexVersion.max(IndexVersions.V_7_6_0, IndexVersionUtils.getPreviousVersion(IndexVersions.V_8_0_0)) + ), + true + ); + doTestPrebuiltTokenizerDeprecation( + "edgeNGram", + "edge_ngram", + IndexVersionUtils.randomVersionBetween( + random(), + IndexVersions.V_7_6_0, + IndexVersion.max(IndexVersions.V_7_6_0, IndexVersionUtils.getPreviousVersion(IndexVersions.V_8_0_0)) + ), + true + ); expectThrows( IllegalArgumentException.class, () -> doTestPrebuiltTokenizerDeprecation( @@ -108,6 +180,40 @@ public void testNGramTokenizerDeprecation() throws IOException { true ) ); + + // same batch of tests for custom tokenizer definition in the settings + doTestCustomTokenizerDeprecation( + "nGram", + "ngram", + IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_7_0_0, IndexVersions.V_7_5_2), + false + ); + doTestCustomTokenizerDeprecation( + "edgeNGram", + "edge_ngram", + IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_7_0_0, IndexVersions.V_7_5_2), + false + ); + doTestCustomTokenizerDeprecation( + "nGram", + "ngram", + IndexVersionUtils.randomVersionBetween( + random(), + IndexVersions.V_7_6_0, + IndexVersion.max(IndexVersions.V_7_6_0, IndexVersionUtils.getPreviousVersion(IndexVersions.V_8_0_0)) + ), + true + ); + doTestCustomTokenizerDeprecation( + "edgeNGram", + "edge_ngram", + IndexVersionUtils.randomVersionBetween( + random(), + IndexVersions.V_7_6_0, + IndexVersion.max(IndexVersions.V_7_6_0, IndexVersionUtils.getPreviousVersion(IndexVersions.V_8_0_0)) + ), + true + ); expectThrows( IllegalArgumentException.class, () -> doTestCustomTokenizerDeprecation( From c098f82d16fb1d66231b9cca42f76f222e42482f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Wed, 18 Dec 2024 22:10:20 +0100 Subject: [PATCH 3/3] bye bye TODO --- .../org/elasticsearch/analysis/common/CommonAnalysisPlugin.java | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index dec2526db8515..c980aaba71444 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -600,7 +600,6 @@ public List getPreConfiguredTokenizers() { // This is already broken with normalization, so backwards compat isn't necessary? tokenizers.add(PreConfiguredTokenizer.singleton("lowercase", XLowerCaseTokenizer::new)); - // Temporary shim for aliases. TODO deprecate after they are moved tokenizers.add(PreConfiguredTokenizer.indexVersion("nGram", (version) -> { if (version.onOrAfter(IndexVersions.V_8_0_0)) { throw new IllegalArgumentException(