Skip to content

Commit e808530

Browse files
author
Christoph Büscher
committed
Deprecate nGram and edgeNGram names for ngram filters (#27429)
The camel case name `nGram` should be removed in favour of `ngram` and similar for `edgeNGram` and `edge_ngram`. Before removal, we need to deprecate the camel case names first. This change adds deprecation warnings for indices with versions 6.4.0 and higher and logs deprecation warnings.
1 parent 5deda69 commit e808530

File tree

4 files changed

+146
-24
lines changed

4 files changed

+146
-24
lines changed

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -214,9 +214,14 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
214214
filters.add(PreConfiguredTokenFilter.singleton("dutch_stem", false, input -> new SnowballFilter(input, new DutchStemmer())));
215215
filters.add(PreConfiguredTokenFilter.singleton("edge_ngram", false, input ->
216216
new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE)));
217-
// TODO deprecate edgeNGram
218-
filters.add(PreConfiguredTokenFilter.singleton("edgeNGram", false, input ->
219-
new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE)));
217+
filters.add(PreConfiguredTokenFilter.singletonWithVersion("edgeNGram", false, (reader, version) -> {
218+
if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) {
219+
DEPRECATION_LOGGER.deprecatedAndMaybeLog("esgeNGram_deprecation",
220+
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
221+
+ "Please change the filter name to [edge_ngram] instead.");
222+
}
223+
return new EdgeNGramTokenFilter(reader, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
224+
}));
220225
filters.add(PreConfiguredTokenFilter.singleton("elision", true,
221226
input -> new ElisionFilter(input, FrenchAnalyzer.DEFAULT_ARTICLES)));
222227
filters.add(PreConfiguredTokenFilter.singleton("french_stem", false, input -> new SnowballFilter(input, new FrenchStemmer())));
@@ -233,8 +238,14 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
233238
LimitTokenCountFilterFactory.DEFAULT_MAX_TOKEN_COUNT,
234239
LimitTokenCountFilterFactory.DEFAULT_CONSUME_ALL_TOKENS)));
235240
filters.add(PreConfiguredTokenFilter.singleton("ngram", false, NGramTokenFilter::new));
236-
// TODO deprecate nGram
237-
filters.add(PreConfiguredTokenFilter.singleton("nGram", false, NGramTokenFilter::new));
241+
filters.add(PreConfiguredTokenFilter.singletonWithVersion("nGram", false, (reader, version) -> {
242+
if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) {
243+
DEPRECATION_LOGGER.deprecatedAndMaybeLog("nGram_deprecation",
244+
"The [nGram] token filter name is deprecated and will be removed in a future version. "
245+
+ "Please change the filter name to [ngram] instead.");
246+
}
247+
return new NGramTokenFilter(reader);
248+
}));
238249
filters.add(PreConfiguredTokenFilter.singleton("persian_normalization", true, PersianNormalizationFilter::new));
239250
filters.add(PreConfiguredTokenFilter.singleton("porter_stem", false, PorterStemFilter::new));
240251
filters.add(PreConfiguredTokenFilter.singleton("reverse", false, ReverseStringFilter::new));
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.analysis.common;
21+
22+
import org.apache.lucene.analysis.MockTokenizer;
23+
import org.apache.lucene.analysis.Tokenizer;
24+
import org.elasticsearch.Version;
25+
import org.elasticsearch.cluster.metadata.IndexMetaData;
26+
import org.elasticsearch.common.settings.Settings;
27+
import org.elasticsearch.env.Environment;
28+
import org.elasticsearch.index.IndexSettings;
29+
import org.elasticsearch.index.analysis.TokenFilterFactory;
30+
import org.elasticsearch.test.ESTestCase;
31+
import org.elasticsearch.test.IndexSettingsModule;
32+
import org.elasticsearch.test.VersionUtils;
33+
34+
import java.io.IOException;
35+
import java.io.StringReader;
36+
import java.util.Map;
37+
38+
public class CommonAnalysisPluginTest extends ESTestCase {
39+
40+
/**
41+
* Check that the deprecated name "nGram" issues a deprecation warning for indices created since 6.3.0
42+
*/
43+
public void testNGramDeprecationWarning() throws IOException {
44+
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
45+
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT))
46+
.build();
47+
48+
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
49+
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
50+
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
51+
TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram");
52+
Tokenizer tokenizer = new MockTokenizer();
53+
tokenizer.setReader(new StringReader("foo bar"));
54+
assertNotNull(tokenFilterFactory.create(tokenizer));
55+
assertWarnings(
56+
"The [nGram] token filter name is deprecated and will be removed in a future version. "
57+
+ "Please change the filter name to [ngram] instead.");
58+
}
59+
}
60+
61+
/**
62+
* Check that the deprecated name "nGram" does NOT issues a deprecation warning for indices created before 6.4.0
63+
*/
64+
public void testNGramNoDeprecationWarningPre6_4() throws IOException {
65+
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
66+
.put(IndexMetaData.SETTING_VERSION_CREATED,
67+
VersionUtils.randomVersionBetween(random(), Version.V_5_0_0, Version.V_6_3_0))
68+
.build();
69+
70+
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
71+
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
72+
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
73+
TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram");
74+
Tokenizer tokenizer = new MockTokenizer();
75+
tokenizer.setReader(new StringReader("foo bar"));
76+
assertNotNull(tokenFilterFactory.create(tokenizer));
77+
}
78+
}
79+
80+
/**
81+
* Check that the deprecated name "edgeNGram" issues a deprecation warning for indices created since 6.3.0
82+
*/
83+
public void testEdgeNGramDeprecationWarning() throws IOException {
84+
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
85+
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT))
86+
.build();
87+
88+
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
89+
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
90+
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
91+
TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram");
92+
Tokenizer tokenizer = new MockTokenizer();
93+
tokenizer.setReader(new StringReader("foo bar"));
94+
assertNotNull(tokenFilterFactory.create(tokenizer));
95+
assertWarnings(
96+
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
97+
+ "Please change the filter name to [edge_ngram] instead.");
98+
}
99+
}
100+
101+
/**
102+
* Check that the deprecated name "edgeNGram" does NOT issues a deprecation warning for indices created before 6.4.0
103+
*/
104+
public void testEdgeNGramNoDeprecationWarningPre6_4() throws IOException {
105+
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
106+
.put(IndexMetaData.SETTING_VERSION_CREATED,
107+
VersionUtils.randomVersionBetween(random(), Version.V_5_0_0, Version.V_6_3_0))
108+
.build();
109+
110+
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
111+
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
112+
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
113+
TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram");
114+
Tokenizer tokenizer = new MockTokenizer();
115+
tokenizer.setReader(new StringReader("foo bar"));
116+
assertNotNull(tokenFilterFactory.create(tokenizer));
117+
}
118+
}
119+
}

modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,11 @@
3434
import org.elasticsearch.index.analysis.NGramTokenizerFactory;
3535
import org.elasticsearch.test.ESTokenStreamTestCase;
3636
import org.elasticsearch.test.IndexSettingsModule;
37+
import org.elasticsearch.test.VersionUtils;
3738

3839
import java.io.IOException;
3940
import java.io.StringReader;
40-
import java.lang.reflect.Field;
41-
import java.lang.reflect.Modifier;
42-
import java.util.ArrayList;
4341
import java.util.Arrays;
44-
import java.util.List;
45-
import java.util.Random;
4642

4743
import static com.carrotsearch.randomizedtesting.RandomizedTest.scaledRandomIntBetween;
4844
import static org.hamcrest.Matchers.instanceOf;
@@ -131,7 +127,7 @@ public void testBackwardsCompatibilityEdgeNgramTokenFilter() throws Exception {
131127
for (int i = 0; i < iters; i++) {
132128
final Index index = new Index("test", "_na_");
133129
final String name = "ngr";
134-
Version v = randomVersion(random());
130+
Version v = VersionUtils.randomVersion(random());
135131
Builder builder = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3);
136132
boolean reverse = random().nextBoolean();
137133
if (reverse) {
@@ -152,7 +148,6 @@ public void testBackwardsCompatibilityEdgeNgramTokenFilter() throws Exception {
152148
}
153149
}
154150

155-
156151
/*`
157152
* test that throws an error when trying to get a NGramTokenizer where difference between max_gram and min_gram
158153
* is greater than the allowed value of max_ngram_diff
@@ -177,16 +172,4 @@ public void testMaxNGramDiffException() throws Exception{
177172
+ IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey() + "] index level setting.",
178173
ex.getMessage());
179174
}
180-
181-
private Version randomVersion(Random random) throws IllegalArgumentException, IllegalAccessException {
182-
Field[] declaredFields = Version.class.getFields();
183-
List<Field> versionFields = new ArrayList<>();
184-
for (Field field : declaredFields) {
185-
if ((field.getModifiers() & Modifier.STATIC) != 0 && field.getName().startsWith("V_") && field.getType() == Version.class) {
186-
versionFields.add(field);
187-
}
188-
}
189-
return (Version) versionFields.get(random.nextInt(versionFields.size())).get(Version.class);
190-
}
191-
192175
}

server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,15 @@ public static PreConfiguredTokenFilter singleton(String name, boolean useFilterF
4141
(tokenStream, version) -> create.apply(tokenStream));
4242
}
4343

44+
/**
45+
* Create a pre-configured token filter that may not vary at all.
46+
*/
47+
public static PreConfiguredTokenFilter singletonWithVersion(String name, boolean useFilterForMultitermQueries,
48+
BiFunction<TokenStream, Version, TokenStream> create) {
49+
return new PreConfiguredTokenFilter(name, useFilterForMultitermQueries, CachingStrategy.ONE,
50+
(tokenStream, version) -> create.apply(tokenStream, version));
51+
}
52+
4453
/**
4554
* Create a pre-configured token filter that may vary based on the Lucene version.
4655
*/

0 commit comments

Comments
 (0)