|
19 | 19 |
|
20 | 20 | package org.elasticsearch.analysis.common; |
21 | 21 |
|
| 22 | +import org.apache.lucene.analysis.Tokenizer; |
22 | 23 | import org.elasticsearch.Version; |
23 | 24 | import org.elasticsearch.cluster.metadata.IndexMetaData; |
24 | 25 | import org.elasticsearch.common.settings.Settings; |
25 | 26 | import org.elasticsearch.env.Environment; |
| 27 | +import org.elasticsearch.index.analysis.TokenizerFactory; |
26 | 28 | import org.elasticsearch.test.ESTestCase; |
27 | 29 | import org.elasticsearch.test.IndexSettingsModule; |
28 | 30 | import org.elasticsearch.test.VersionUtils; |
29 | 31 |
|
30 | 32 | import java.io.IOException; |
| 33 | +import java.util.Map; |
31 | 34 |
|
32 | 35 | public class CommonAnalysisPluginTests extends ESTestCase { |
33 | 36 |
|
@@ -102,4 +105,82 @@ public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOExcep |
102 | 105 | + "Please change the filter name to [edge_ngram] instead."); |
103 | 106 | } |
104 | 107 | } |
| 108 | + |
| 109 | + /** |
| 110 | + * Check that we log a deprecation warning for "nGram" and "edgeNGram" tokenizer names with 7.6 and |
| 111 | + * disallow usages for indices created after 8.0 |
| 112 | + */ |
| 113 | + public void testNGramTokenizerDeprecation() throws IOException { |
| 114 | + // tests for prebuilt tokenizer |
| 115 | + doTestPrebuiltTokenizerDeprecation("nGram", "ngram", |
| 116 | + VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, Version.V_7_5_2), false); |
| 117 | + doTestPrebuiltTokenizerDeprecation("edgeNGram", "edge_ngram", |
| 118 | + VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, Version.V_7_5_2), false); |
| 119 | + doTestPrebuiltTokenizerDeprecation("nGram", "ngram", |
| 120 | + VersionUtils.randomVersionBetween(random(), Version.V_7_6_0, |
| 121 | + Version.max(Version.V_7_6_0, VersionUtils.getPreviousVersion(Version.V_8_0_0))), |
| 122 | + true); |
| 123 | + doTestPrebuiltTokenizerDeprecation("edgeNGram", "edge_ngram", |
| 124 | + VersionUtils.randomVersionBetween(random(), Version.V_7_6_0, |
| 125 | + Version.max(Version.V_7_6_0, VersionUtils.getPreviousVersion(Version.V_8_0_0))), true); |
| 126 | + expectThrows(IllegalArgumentException.class, () -> doTestPrebuiltTokenizerDeprecation("nGram", "ngram", |
| 127 | + VersionUtils.randomVersionBetween(random(), Version.V_8_0_0, Version.CURRENT), true)); |
| 128 | + expectThrows(IllegalArgumentException.class, () -> doTestPrebuiltTokenizerDeprecation("edgeNGram", "edge_ngram", |
| 129 | + VersionUtils.randomVersionBetween(random(), Version.V_8_0_0, Version.CURRENT), true)); |
| 130 | + |
| 131 | + // same batch of tests for custom tokenizer definition in the settings |
| 132 | + doTestCustomTokenizerDeprecation("nGram", "ngram", |
| 133 | + VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, Version.V_7_5_2), false); |
| 134 | + doTestCustomTokenizerDeprecation("edgeNGram", "edge_ngram", |
| 135 | + VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, Version.V_7_5_2), false); |
| 136 | + doTestCustomTokenizerDeprecation("nGram", "ngram", |
| 137 | + VersionUtils.randomVersionBetween(random(), Version.V_7_6_0, |
| 138 | + Version.max(Version.V_7_6_0, VersionUtils.getPreviousVersion(Version.V_8_0_0))), |
| 139 | + true); |
| 140 | + doTestCustomTokenizerDeprecation("edgeNGram", "edge_ngram", |
| 141 | + VersionUtils.randomVersionBetween(random(), Version.V_7_6_0, |
| 142 | + Version.max(Version.V_7_6_0, VersionUtils.getPreviousVersion(Version.V_8_0_0))), true); |
| 143 | + expectThrows(IllegalArgumentException.class, () -> doTestCustomTokenizerDeprecation("nGram", "ngram", |
| 144 | + VersionUtils.randomVersionBetween(random(), Version.V_8_0_0, Version.CURRENT), true)); |
| 145 | + expectThrows(IllegalArgumentException.class, () -> doTestCustomTokenizerDeprecation("edgeNGram", "edge_ngram", |
| 146 | + VersionUtils.randomVersionBetween(random(), Version.V_8_0_0, Version.CURRENT), true)); |
| 147 | + } |
| 148 | + |
| 149 | + public void doTestPrebuiltTokenizerDeprecation(String deprecatedName, String replacement, Version version, boolean expectWarning) |
| 150 | + throws IOException { |
| 151 | + final Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) |
| 152 | + .put(IndexMetaData.SETTING_VERSION_CREATED, version).build(); |
| 153 | + |
| 154 | + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { |
| 155 | + Map<String, TokenizerFactory> tokenizers = createTestAnalysis( |
| 156 | + IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin).tokenizer; |
| 157 | + TokenizerFactory tokenizerFactory = tokenizers.get(deprecatedName); |
| 158 | + |
| 159 | + Tokenizer tokenizer = tokenizerFactory.create(); |
| 160 | + assertNotNull(tokenizer); |
| 161 | + if (expectWarning) { |
| 162 | + assertWarnings("The [" + deprecatedName + "] tokenizer name is deprecated and will be removed in a future version. " |
| 163 | + + "Please change the tokenizer name to [" + replacement + "] instead."); |
| 164 | + } |
| 165 | + } |
| 166 | + } |
| 167 | + |
| 168 | + public void doTestCustomTokenizerDeprecation(String deprecatedName, String replacement, Version version, boolean expectWarning) |
| 169 | + throws IOException { |
| 170 | + final Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) |
| 171 | + .put(IndexMetaData.SETTING_VERSION_CREATED, version) |
| 172 | + .put("index.analysis.analyzer.custom_analyzer.type", "custom") |
| 173 | + .put("index.analysis.analyzer.custom_analyzer.tokenizer", "my_tokenizer") |
| 174 | + .put("index.analysis.tokenizer.my_tokenizer.type", deprecatedName) |
| 175 | + .build(); |
| 176 | + |
| 177 | + try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { |
| 178 | + createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin); |
| 179 | + |
| 180 | + if (expectWarning) { |
| 181 | + assertWarnings("The [" + deprecatedName + "] tokenizer name is deprecated and will be removed in a future version. " |
| 182 | + + "Please change the tokenizer name to [" + replacement + "] instead."); |
| 183 | + } |
| 184 | + } |
| 185 | + } |
105 | 186 | } |
0 commit comments