From 3fce0423fe0783881e40a578a04a666e764cdab1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoph=20B=C3=BCscher?= <christophbuescher@posteo.de>
Date: Wed, 18 Dec 2024 15:25:44 +0100
Subject: [PATCH 1/3] Revert "Remove deprecations and 7.x related code from
 analysis common (#113009)"

This reverts commit ef37511f0a3525274d73f2515d19d8b52f9cc150.
---
 .../analysis/common/CommonAnalysisPlugin.java | 132 ++++++++++++-
 .../common/CommonAnalysisPluginTests.java     | 186 ++++++++++++++++++
 .../common/EdgeNGramTokenizerTests.java       |   3 +-
 3 files changed, 314 insertions(+), 7 deletions(-)
 create mode 100644 modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java
diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
index a97154fd4d1ff..dec2526db8515 100644
--- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
@@ -101,7 +101,12 @@
 import org.apache.lucene.analysis.tr.TurkishAnalyzer;
 import org.apache.lucene.analysis.util.ElisionFilter;
 import org.apache.lucene.util.SetOnce;
+import org.elasticsearch.common.logging.DeprecationCategory;
+import org.elasticsearch.common.logging.DeprecationLogger;
 import org.elasticsearch.common.regex.Regex;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.env.Environment;
+import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.IndexVersions;
 import org.elasticsearch.index.analysis.AnalyzerProvider;
 import org.elasticsearch.index.analysis.CharFilterFactory;
@@ -134,6 +139,8 @@
 
 public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, ScriptPlugin {
 
+    private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(CommonAnalysisPlugin.class);
+
     private final SetOnce<ScriptService> scriptServiceHolder = new SetOnce<>();
     private final SetOnce<SynonymsManagementAPIService> synonymsManagementServiceHolder = new SetOnce<>();
 
@@ -224,6 +231,28 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
         filters.put("dictionary_decompounder", requiresAnalysisSettings(DictionaryCompoundWordTokenFilterFactory::new));
         filters.put("dutch_stem", DutchStemTokenFilterFactory::new);
         filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new);
+        filters.put("edgeNGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
+            return new EdgeNGramTokenFilterFactory(indexSettings, environment, name, settings) {
+                @Override
+                public TokenStream create(TokenStream tokenStream) {
+                    if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_8_0_0)) {
+                        throw new IllegalArgumentException(
+                            "The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+                                + "Please change the filter name to [edge_ngram] instead."
+                        );
+                    } else {
+                        deprecationLogger.warn(
+                            DeprecationCategory.ANALYSIS,
+                            "edgeNGram_deprecation",
+                            "The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
+                                + "Please change the filter name to [edge_ngram] instead."
+                        );
+                    }
+                    return super.create(tokenStream);
+                }
+
+            };
+        });
         filters.put("elision", requiresAnalysisSettings(ElisionTokenFilterFactory::new));
         filters.put("fingerprint", FingerprintTokenFilterFactory::new);
         filters.put("flatten_graph", FlattenGraphTokenFilterFactory::new);
@@ -243,6 +272,28 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
         filters.put("min_hash", MinHashTokenFilterFactory::new);
         filters.put("multiplexer", MultiplexerTokenFilterFactory::new);
         filters.put("ngram", NGramTokenFilterFactory::new);
+        filters.put("nGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
+            return new NGramTokenFilterFactory(indexSettings, environment, name, settings) {
+                @Override
+                public TokenStream create(TokenStream tokenStream) {
+                    if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_8_0_0)) {
+                        throw new IllegalArgumentException(
+                            "The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+                                + "Please change the filter name to [ngram] instead."
+                        );
+                    } else {
+                        deprecationLogger.warn(
+                            DeprecationCategory.ANALYSIS,
+                            "nGram_deprecation",
+                            "The [nGram] token filter name is deprecated and will be removed in a future version. "
+                                + "Please change the filter name to [ngram] instead."
+                        );
+                    }
+                    return super.create(tokenStream);
+                }
+
+            };
+        });
         filters.put("pattern_capture", requiresAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
         filters.put("pattern_replace", requiresAnalysisSettings(PatternReplaceTokenFilterFactory::new));
         filters.put("persian_normalization", PersianNormalizationFilterFactory::new);
@@ -294,7 +345,39 @@ public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
         tokenizers.put("simple_pattern", SimplePatternTokenizerFactory::new);
         tokenizers.put("simple_pattern_split", SimplePatternSplitTokenizerFactory::new);
         tokenizers.put("thai", ThaiTokenizerFactory::new);
+        tokenizers.put("nGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
+            if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_8_0_0)) {
+                throw new IllegalArgumentException(
+                    "The [nGram] tokenizer name was deprecated in 7.6. "
+                        + "Please use the tokenizer name to [ngram] for indices created in versions 8 or higher instead."
+                );
+            } else if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_7_6_0)) {
+                deprecationLogger.warn(
+                    DeprecationCategory.ANALYSIS,
+                    "nGram_tokenizer_deprecation",
+                    "The [nGram] tokenizer name is deprecated and will be removed in a future version. "
+                        + "Please change the tokenizer name to [ngram] instead."
+                );
+            }
+            return new NGramTokenizerFactory(indexSettings, environment, name, settings);
+        });
         tokenizers.put("ngram", NGramTokenizerFactory::new);
+        tokenizers.put("edgeNGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
+            if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_8_0_0)) {
+                throw new IllegalArgumentException(
+                    "The [edgeNGram] tokenizer name was deprecated in 7.6. "
+                        + "Please use the tokenizer name to [edge_nGram] for indices created in versions 8 or higher instead."
+                );
+            } else if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.V_7_6_0)) {
+                deprecationLogger.warn(
+                    DeprecationCategory.ANALYSIS,
+                    "edgeNGram_tokenizer_deprecation",
+                    "The [edgeNGram] tokenizer name is deprecated and will be removed in a future version. "
+                        + "Please change the tokenizer name to [edge_ngram] instead."
+                );
+            }
+            return new EdgeNGramTokenizerFactory(indexSettings, environment, name, settings);
+        });
         tokenizers.put("edge_ngram", EdgeNGramTokenizerFactory::new);
         tokenizers.put("char_group", CharGroupTokenizerFactory::new);
         tokenizers.put("classic", ClassicTokenizerFactory::new);
@@ -505,17 +588,54 @@ public List<PreConfiguredTokenizer> getPreConfiguredTokenizers() {
         tokenizers.add(PreConfiguredTokenizer.singleton("letter", LetterTokenizer::new));
         tokenizers.add(PreConfiguredTokenizer.singleton("whitespace", WhitespaceTokenizer::new));
         tokenizers.add(PreConfiguredTokenizer.singleton("ngram", NGramTokenizer::new));
-        tokenizers.add(
-            PreConfiguredTokenizer.indexVersion(
-                "edge_ngram",
-                (version) -> new EdgeNGramTokenizer(NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE, NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE)
-            )
-        );
+        tokenizers.add(PreConfiguredTokenizer.indexVersion("edge_ngram", (version) -> {
+            if (version.onOrAfter(IndexVersions.V_7_3_0)) {
+                return new EdgeNGramTokenizer(NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE, NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
+            }
+            return new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
+        }));
         tokenizers.add(PreConfiguredTokenizer.singleton("pattern", () -> new PatternTokenizer(Regex.compile("\\W+", null), -1)));
         tokenizers.add(PreConfiguredTokenizer.singleton("thai", ThaiTokenizer::new));
         // TODO deprecate and remove in API
         // This is already broken with normalization, so backwards compat isn't necessary?
         tokenizers.add(PreConfiguredTokenizer.singleton("lowercase", XLowerCaseTokenizer::new));
+
+        // Temporary shim for aliases. TODO deprecate after they are moved
+        tokenizers.add(PreConfiguredTokenizer.indexVersion("nGram", (version) -> {
+            if (version.onOrAfter(IndexVersions.V_8_0_0)) {
+                throw new IllegalArgumentException(
+                    "The [nGram] tokenizer name was deprecated in 7.6. "
+                        + "Please use the tokenizer name to [ngram] for indices created in versions 8 or higher instead."
+                );
+            } else if (version.onOrAfter(IndexVersions.V_7_6_0)) {
+                deprecationLogger.warn(
+                    DeprecationCategory.ANALYSIS,
+                    "nGram_tokenizer_deprecation",
+                    "The [nGram] tokenizer name is deprecated and will be removed in a future version. "
+                        + "Please change the tokenizer name to [ngram] instead."
+                );
+            }
+            return new NGramTokenizer();
+        }));
+        tokenizers.add(PreConfiguredTokenizer.indexVersion("edgeNGram", (version) -> {
+            if (version.onOrAfter(IndexVersions.V_8_0_0)) {
+                throw new IllegalArgumentException(
+                    "The [edgeNGram] tokenizer name was deprecated in 7.6. "
+                        + "Please use the tokenizer name to [edge_ngram] for indices created in versions 8 or higher instead."
+                );
+            } else if (version.onOrAfter(IndexVersions.V_7_6_0)) {
+                deprecationLogger.warn(
+                    DeprecationCategory.ANALYSIS,
+                    "edgeNGram_tokenizer_deprecation",
+                    "The [edgeNGram] tokenizer name is deprecated and will be removed in a future version. "
+                        + "Please change the tokenizer name to [edge_ngram] instead."
+                );
+            }
+            if (version.onOrAfter(IndexVersions.V_7_3_0)) {
+                return new EdgeNGramTokenizer(NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE, NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
+            }
+            return new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
+        }));
         tokenizers.add(PreConfiguredTokenizer.singleton("PathHierarchy", PathHierarchyTokenizer::new));
 
         return tokenizers;
diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java
new file mode 100644
index 0000000000000..3263704d38e1d
--- /dev/null
+++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java
@@ -0,0 +1,186 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.analysis.common;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.elasticsearch.cluster.metadata.IndexMetadata;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.env.Environment;
+import org.elasticsearch.index.IndexVersion;
+import org.elasticsearch.index.IndexVersions;
+import org.elasticsearch.index.analysis.TokenizerFactory;
+import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.test.IndexSettingsModule;
+import org.elasticsearch.test.index.IndexVersionUtils;
+
+import java.io.IOException;
+import java.util.Map;
+
+public class CommonAnalysisPluginTests extends ESTestCase {
+
+    /**
+     * Check that the deprecated "nGram" filter throws exception for indices created since 7.0.0 and
+     * logs a warning for earlier indices when the filter is used as a custom filter
+     */
+    public void testNGramFilterInCustomAnalyzerDeprecationError() throws IOException {
+        final Settings settings = Settings.builder()
+            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+            .put(
+                IndexMetadata.SETTING_VERSION_CREATED,
+                IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_8_0_0, IndexVersion.current())
+            )
+            .put("index.analysis.analyzer.custom_analyzer.type", "custom")
+            .put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
+            .putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
+            .put("index.analysis.filter.my_ngram.type", "nGram")
+            .build();
+
+        try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
+            IllegalArgumentException ex = expectThrows(
+                IllegalArgumentException.class,
+                () -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin)
+            );
+            assertEquals(
+                "The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+                    + "Please change the filter name to [ngram] instead.",
+                ex.getMessage()
+            );
+        }
+    }
+
+    /**
+     * Check that the deprecated "edgeNGram" filter throws exception for indices created since 7.0.0 and
+     * logs a warning for earlier indices when the filter is used as a custom filter
+     */
+    public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOException {
+        final Settings settings = Settings.builder()
+            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+            .put(
+                IndexMetadata.SETTING_VERSION_CREATED,
+                IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_8_0_0, IndexVersion.current())
+            )
+            .put("index.analysis.analyzer.custom_analyzer.type", "custom")
+            .put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
+            .putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
+            .put("index.analysis.filter.my_ngram.type", "edgeNGram")
+            .build();
+
+        try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
+            IllegalArgumentException ex = expectThrows(
+                IllegalArgumentException.class,
+                () -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin)
+            );
+            assertEquals(
+                "The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+                    + "Please change the filter name to [edge_ngram] instead.",
+                ex.getMessage()
+            );
+        }
+    }
+
+    /**
+     * Check that we log a deprecation warning for "nGram" and "edgeNGram" tokenizer names with 7.6 and
+     * disallow usages for indices created after 8.0
+     */
+    public void testNGramTokenizerDeprecation() throws IOException {
+        expectThrows(
+            IllegalArgumentException.class,
+            () -> doTestPrebuiltTokenizerDeprecation(
+                "nGram",
+                "ngram",
+                IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_8_0_0, IndexVersion.current()),
+                true
+            )
+        );
+        expectThrows(
+            IllegalArgumentException.class,
+            () -> doTestPrebuiltTokenizerDeprecation(
+                "edgeNGram",
+                "edge_ngram",
+                IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_8_0_0, IndexVersion.current()),
+                true
+            )
+        );
+        expectThrows(
+            IllegalArgumentException.class,
+            () -> doTestCustomTokenizerDeprecation(
+                "nGram",
+                "ngram",
+                IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_8_0_0, IndexVersion.current()),
+                true
+            )
+        );
+        expectThrows(
+            IllegalArgumentException.class,
+            () -> doTestCustomTokenizerDeprecation(
+                "edgeNGram",
+                "edge_ngram",
+                IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_8_0_0, IndexVersion.current()),
+                true
+            )
+        );
+    }
+
+    public void doTestPrebuiltTokenizerDeprecation(String deprecatedName, String replacement, IndexVersion version, boolean expectWarning)
+        throws IOException {
+        final Settings settings = Settings.builder()
+            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+            .put(IndexMetadata.SETTING_VERSION_CREATED, version)
+            .build();
+
+        try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
+            Map<String, TokenizerFactory> tokenizers = createTestAnalysis(
+                IndexSettingsModule.newIndexSettings("index", settings),
+                settings,
+                commonAnalysisPlugin
+            ).tokenizer;
+            TokenizerFactory tokenizerFactory = tokenizers.get(deprecatedName);
+
+            Tokenizer tokenizer = tokenizerFactory.create();
+            assertNotNull(tokenizer);
+            if (expectWarning) {
+                assertWarnings(
+                    "The ["
+                        + deprecatedName
+                        + "] tokenizer name is deprecated and will be removed in a future version. "
+                        + "Please change the tokenizer name to ["
+                        + replacement
+                        + "] instead."
+                );
+            }
+        }
+    }
+
+    public void doTestCustomTokenizerDeprecation(String deprecatedName, String replacement, IndexVersion version, boolean expectWarning)
+        throws IOException {
+        final Settings settings = Settings.builder()
+            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+            .put(IndexMetadata.SETTING_VERSION_CREATED, version)
+            .put("index.analysis.analyzer.custom_analyzer.type", "custom")
+            .put("index.analysis.analyzer.custom_analyzer.tokenizer", "my_tokenizer")
+            .put("index.analysis.tokenizer.my_tokenizer.type", deprecatedName)
+            .build();
+
+        try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
+            createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin);
+
+            if (expectWarning) {
+                assertWarnings(
+                    "The ["
+                        + deprecatedName
+                        + "] tokenizer name is deprecated and will be removed in a future version. "
+                        + "Please change the tokenizer name to ["
+                        + replacement
+                        + "] instead."
+                );
+            }
+        }
+    }
+}
diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerTests.java
index 11d1653439e59..c998e927e25a8 100644
--- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerTests.java
+++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerTests.java
@@ -34,7 +34,7 @@
 
 public class EdgeNGramTokenizerTests extends ESTokenStreamTestCase {
 
-    private static IndexAnalyzers buildAnalyzers(IndexVersion version, String tokenizer) throws IOException {
+    private IndexAnalyzers buildAnalyzers(IndexVersion version, String tokenizer) throws IOException {
         Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
         Settings indexSettings = Settings.builder()
             .put(IndexMetadata.SETTING_VERSION_CREATED, version)
@@ -54,6 +54,7 @@ public void testPreConfiguredTokenizer() throws IOException {
             assertNotNull(analyzer);
             assertAnalyzesTo(analyzer, "test", new String[] { "t", "te" });
         }
+
     }
 
     public void testCustomTokenChars() throws IOException {

From 5ed66a35e217a587f9a5ef8563a6215bb0bea821 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoph=20B=C3=BCscher?= <christophbuescher@posteo.de>
Date: Wed, 18 Dec 2024 15:50:14 +0100
Subject: [PATCH 2/3] Revert removal of 7.x related code from analysis common

This reverts #113009 and re-adds previous v7 tests since we
now support v7 indices as read-only on v9.
---
 .../common/CommonAnalysisPluginTests.java     | 106 ++++++++++++++++++
 1 file changed, 106 insertions(+)

diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java
index 3263704d38e1d..9972d58b2dcc1 100644
--- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java
+++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java
@@ -53,6 +53,25 @@ public void testNGramFilterInCustomAnalyzerDeprecationError() throws IOException
                 ex.getMessage()
             );
         }
+
+        final Settings settingsPre7 = Settings.builder()
+            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+            .put(
+                IndexMetadata.SETTING_VERSION_CREATED,
+                IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_7_0_0, IndexVersions.V_7_6_0)
+            )
+            .put("index.analysis.analyzer.custom_analyzer.type", "custom")
+            .put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
+            .putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
+            .put("index.analysis.filter.my_ngram.type", "nGram")
+            .build();
+        try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
+            createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7), settingsPre7, commonAnalysisPlugin);
+            assertWarnings(
+                "The [nGram] token filter name is deprecated and will be removed in a future version. "
+                    + "Please change the filter name to [ngram] instead."
+            );
+        }
     }
 
     /**
@@ -83,6 +102,26 @@ public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOExcep
                 ex.getMessage()
             );
         }
+
+        final Settings settingsPre7 = Settings.builder()
+            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+            .put(
+                IndexMetadata.SETTING_VERSION_CREATED,
+                IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_7_0_0, IndexVersions.V_7_6_0)
+            )
+            .put("index.analysis.analyzer.custom_analyzer.type", "custom")
+            .put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
+            .putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
+            .put("index.analysis.filter.my_ngram.type", "edgeNGram")
+            .build();
+
+        try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
+            createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7), settingsPre7, commonAnalysisPlugin);
+            assertWarnings(
+                "The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
+                    + "Please change the filter name to [edge_ngram] instead."
+            );
+        }
     }
 
     /**
@@ -90,6 +129,39 @@ public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOExcep
      * disallow usages for indices created after 8.0
      */
     public void testNGramTokenizerDeprecation() throws IOException {
+        // tests for prebuilt tokenizer
+        doTestPrebuiltTokenizerDeprecation(
+            "nGram",
+            "ngram",
+            IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_7_0_0, IndexVersions.V_7_5_2),
+            false
+        );
+        doTestPrebuiltTokenizerDeprecation(
+            "edgeNGram",
+            "edge_ngram",
+            IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_7_0_0, IndexVersions.V_7_5_2),
+            false
+        );
+        doTestPrebuiltTokenizerDeprecation(
+            "nGram",
+            "ngram",
+            IndexVersionUtils.randomVersionBetween(
+                random(),
+                IndexVersions.V_7_6_0,
+                IndexVersion.max(IndexVersions.V_7_6_0, IndexVersionUtils.getPreviousVersion(IndexVersions.V_8_0_0))
+            ),
+            true
+        );
+        doTestPrebuiltTokenizerDeprecation(
+            "edgeNGram",
+            "edge_ngram",
+            IndexVersionUtils.randomVersionBetween(
+                random(),
+                IndexVersions.V_7_6_0,
+                IndexVersion.max(IndexVersions.V_7_6_0, IndexVersionUtils.getPreviousVersion(IndexVersions.V_8_0_0))
+            ),
+            true
+        );
         expectThrows(
             IllegalArgumentException.class,
             () -> doTestPrebuiltTokenizerDeprecation(
@@ -108,6 +180,40 @@ public void testNGramTokenizerDeprecation() throws IOException {
                 true
             )
         );
+
+        // same batch of tests for custom tokenizer definition in the settings
+        doTestCustomTokenizerDeprecation(
+            "nGram",
+            "ngram",
+            IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_7_0_0, IndexVersions.V_7_5_2),
+            false
+        );
+        doTestCustomTokenizerDeprecation(
+            "edgeNGram",
+            "edge_ngram",
+            IndexVersionUtils.randomVersionBetween(random(), IndexVersions.V_7_0_0, IndexVersions.V_7_5_2),
+            false
+        );
+        doTestCustomTokenizerDeprecation(
+            "nGram",
+            "ngram",
+            IndexVersionUtils.randomVersionBetween(
+                random(),
+                IndexVersions.V_7_6_0,
+                IndexVersion.max(IndexVersions.V_7_6_0, IndexVersionUtils.getPreviousVersion(IndexVersions.V_8_0_0))
+            ),
+            true
+        );
+        doTestCustomTokenizerDeprecation(
+            "edgeNGram",
+            "edge_ngram",
+            IndexVersionUtils.randomVersionBetween(
+                random(),
+                IndexVersions.V_7_6_0,
+                IndexVersion.max(IndexVersions.V_7_6_0, IndexVersionUtils.getPreviousVersion(IndexVersions.V_8_0_0))
+            ),
+            true
+        );
         expectThrows(
             IllegalArgumentException.class,
             () -> doTestCustomTokenizerDeprecation(

From c098f82d16fb1d66231b9cca42f76f222e42482f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoph=20B=C3=BCscher?= <christophbuescher@posteo.de>
Date: Wed, 18 Dec 2024 22:10:20 +0100
Subject: [PATCH 3/3] bye bye TODO

---
 .../org/elasticsearch/analysis/common/CommonAnalysisPlugin.java  | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
index dec2526db8515..c980aaba71444 100644
--- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
@@ -600,7 +600,6 @@ public List<PreConfiguredTokenizer> getPreConfiguredTokenizers() {
         // This is already broken with normalization, so backwards compat isn't necessary?
         tokenizers.add(PreConfiguredTokenizer.singleton("lowercase", XLowerCaseTokenizer::new));
 
-        // Temporary shim for aliases. TODO deprecate after they are moved
         tokenizers.add(PreConfiguredTokenizer.indexVersion("nGram", (version) -> {
             if (version.onOrAfter(IndexVersions.V_8_0_0)) {
                 throw new IllegalArgumentException(