Skip to content

Commit 7bb2da1

Browse files
author
Christoph Büscher
authored
Remove nGram and edgeNGram token filter names (#38911)
In #30209 we deprecated the camel case `nGram` filter name in favour of `ngram` and did the same for `edgeNGram` and `edge_ngram`. Using these names has been deprecated since 6.4 and is issuing deprecation warnings since then. I think we can remove these filters in 8.0. In a backport of this PR I would change what was a dreprecation warning from 6.4. to an error starting with new indices created in 7.0.
1 parent 8ee9657 commit 7bb2da1

File tree

10 files changed

+43
-149
lines changed

10 files changed

+43
-149
lines changed

docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
[[analysis-edgengram-tokenfilter]]
22
=== Edge NGram Token Filter
33

4-
A token filter of type `edgeNGram`.
4+
A token filter of type `edge_ngram`.
55

6-
The following are settings that can be set for a `edgeNGram` token
6+
The following are settings that can be set for a `edge_ngram` token
77
filter type:
88

99
[cols="<,<",options="header",]

docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
[[analysis-ngram-tokenfilter]]
22
=== NGram Token Filter
33

4-
A token filter of type `nGram`.
4+
A token filter of type `ngram`.
55

6-
The following are settings that can be set for a `nGram` token filter
6+
The following are settings that can be set for a `ngram` token filter
77
type:
88

99
[cols="<,<",options="header",]

docs/reference/migration/migrate_8_0.asciidoc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,8 @@ your application to {es} 8.0.
99

1010
See also <<release-highlights>> and <<es-release-notes>>.
1111

12-
coming[8.0.0]
12+
coming[8.0.0]
13+
14+
* <<breaking_80_mappings_changes>>
15+
16+
include::migrate_8_0/mappings.asciidoc[]
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[float]
2+
[[breaking_80_mappings_changes]]
3+
=== Mapping changes
4+
5+
[float]
6+
==== The `nGram` and `edgeNGram` token filter names have been removed
7+
8+
The `nGram` and `edgeNGram` token filter names that have been deprecated since
9+
version 6.4 have been removed. Both token filters should be used by their
10+
alternative names `ngram` and `edge_ngram` instead.

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -414,14 +414,6 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
414414
filters.add(PreConfiguredTokenFilter.singleton("dutch_stem", false, input -> new SnowballFilter(input, new DutchStemmer())));
415415
filters.add(PreConfiguredTokenFilter.singleton("edge_ngram", false, false, input ->
416416
new EdgeNGramTokenFilter(input, 1)));
417-
filters.add(PreConfiguredTokenFilter.singletonWithVersion("edgeNGram", false, false, (reader, version) -> {
418-
if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) {
419-
deprecationLogger.deprecatedAndMaybeLog("edgeNGram_deprecation",
420-
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
421-
+ "Please change the filter name to [edge_ngram] instead.");
422-
}
423-
return new EdgeNGramTokenFilter(reader, 1);
424-
}));
425417
filters.add(PreConfiguredTokenFilter.singleton("elision", true,
426418
input -> new ElisionFilter(input, FrenchAnalyzer.DEFAULT_ARTICLES)));
427419
filters.add(PreConfiguredTokenFilter.singleton("french_stem", false, input -> new SnowballFilter(input, new FrenchStemmer())));
@@ -438,14 +430,6 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
438430
LimitTokenCountFilterFactory.DEFAULT_MAX_TOKEN_COUNT,
439431
LimitTokenCountFilterFactory.DEFAULT_CONSUME_ALL_TOKENS)));
440432
filters.add(PreConfiguredTokenFilter.singleton("ngram", false, false, reader -> new NGramTokenFilter(reader, 1, 2, false)));
441-
filters.add(PreConfiguredTokenFilter.singletonWithVersion("nGram", false, false, (reader, version) -> {
442-
if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) {
443-
deprecationLogger.deprecatedAndMaybeLog("nGram_deprecation",
444-
"The [nGram] token filter name is deprecated and will be removed in a future version. "
445-
+ "Please change the filter name to [ngram] instead.");
446-
}
447-
return new NGramTokenFilter(reader, 1, 2, false);
448-
}));
449433
filters.add(PreConfiguredTokenFilter.singleton("persian_normalization", true, PersianNormalizationFilter::new));
450434
filters.add(PreConfiguredTokenFilter.singleton("porter_stem", false, PorterStemFilter::new));
451435
filters.add(PreConfiguredTokenFilter.singleton("reverse", false, ReverseStringFilter::new));

modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,6 @@ protected Map<String, Class<?>> getPreConfiguredTokenFilters() {
185185
filters.put("delimited_payload", org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilterFactory.class);
186186
filters.put("dutch_stem", SnowballPorterFilterFactory.class);
187187
filters.put("edge_ngram", null);
188-
filters.put("edgeNGram", null);
189188
filters.put("elision", null);
190189
filters.put("french_stem", SnowballPorterFilterFactory.class);
191190
filters.put("german_stem", null);
@@ -197,7 +196,6 @@ protected Map<String, Class<?>> getPreConfiguredTokenFilters() {
197196
filters.put("length", null);
198197
filters.put("limit", LimitTokenCountFilterFactory.class);
199198
filters.put("ngram", null);
200-
filters.put("nGram", null);
201199
filters.put("persian_normalization", null);
202200
filters.put("porter_stem", null);
203201
filters.put("reverse", ReverseStringFilterFactory.class);

modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java

Lines changed: 0 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -20,107 +20,21 @@
2020
package org.elasticsearch.analysis.common;
2121

2222
import org.apache.lucene.analysis.Analyzer;
23-
import org.apache.lucene.analysis.MockTokenizer;
24-
import org.apache.lucene.analysis.Tokenizer;
2523
import org.elasticsearch.Version;
2624
import org.elasticsearch.cluster.metadata.IndexMetaData;
2725
import org.elasticsearch.common.settings.Settings;
2826
import org.elasticsearch.env.Environment;
2927
import org.elasticsearch.index.IndexSettings;
3028
import org.elasticsearch.index.analysis.IndexAnalyzers;
3129
import org.elasticsearch.index.analysis.NamedAnalyzer;
32-
import org.elasticsearch.index.analysis.TokenFilterFactory;
3330
import org.elasticsearch.test.ESTestCase;
3431
import org.elasticsearch.test.IndexSettingsModule;
3532
import org.elasticsearch.test.VersionUtils;
3633

3734
import java.io.IOException;
38-
import java.io.StringReader;
39-
import java.util.Map;
4035

4136
public class CommonAnalysisPluginTests extends ESTestCase {
4237

43-
/**
44-
* Check that the deprecated name "nGram" issues a deprecation warning for indices created since 6.3.0
45-
*/
46-
public void testNGramDeprecationWarning() throws IOException {
47-
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
48-
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT))
49-
.build();
50-
51-
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
52-
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
53-
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
54-
TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram");
55-
Tokenizer tokenizer = new MockTokenizer();
56-
tokenizer.setReader(new StringReader("foo bar"));
57-
assertNotNull(tokenFilterFactory.create(tokenizer));
58-
assertWarnings(
59-
"The [nGram] token filter name is deprecated and will be removed in a future version. "
60-
+ "Please change the filter name to [ngram] instead.");
61-
}
62-
}
63-
64-
/**
65-
* Check that the deprecated name "nGram" does NOT issues a deprecation warning for indices created before 6.4.0
66-
*/
67-
public void testNGramNoDeprecationWarningPre6_4() throws IOException {
68-
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
69-
.put(IndexMetaData.SETTING_VERSION_CREATED,
70-
VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, Version.V_6_3_0))
71-
.build();
72-
73-
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
74-
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
75-
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
76-
TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram");
77-
Tokenizer tokenizer = new MockTokenizer();
78-
tokenizer.setReader(new StringReader("foo bar"));
79-
assertNotNull(tokenFilterFactory.create(tokenizer));
80-
}
81-
}
82-
83-
/**
84-
* Check that the deprecated name "edgeNGram" issues a deprecation warning for indices created since 6.3.0
85-
*/
86-
public void testEdgeNGramDeprecationWarning() throws IOException {
87-
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
88-
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT))
89-
.build();
90-
91-
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
92-
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
93-
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
94-
TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram");
95-
Tokenizer tokenizer = new MockTokenizer();
96-
tokenizer.setReader(new StringReader("foo bar"));
97-
assertNotNull(tokenFilterFactory.create(tokenizer));
98-
assertWarnings(
99-
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
100-
+ "Please change the filter name to [edge_ngram] instead.");
101-
}
102-
}
103-
104-
/**
105-
* Check that the deprecated name "edgeNGram" does NOT issues a deprecation warning for indices created before 6.4.0
106-
*/
107-
public void testEdgeNGramNoDeprecationWarningPre6_4() throws IOException {
108-
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
109-
.put(IndexMetaData.SETTING_VERSION_CREATED,
110-
VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, Version.V_6_3_0))
111-
.build();
112-
113-
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
114-
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
115-
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
116-
TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram");
117-
Tokenizer tokenizer = new MockTokenizer();
118-
tokenizer.setReader(new StringReader("foo bar"));
119-
assertNotNull(tokenFilterFactory.create(tokenizer));
120-
}
121-
}
122-
123-
12438
/**
12539
* Check that the deprecated analyzer name "standard_html_strip" throws exception for indices created since 7.0.0
12640
*/

modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ public void testNgramHighlightingWithBrokenPositions() throws IOException {
8181
.put("analysis.tokenizer.autocomplete.max_gram", 20)
8282
.put("analysis.tokenizer.autocomplete.min_gram", 1)
8383
.put("analysis.tokenizer.autocomplete.token_chars", "letter,digit")
84-
.put("analysis.tokenizer.autocomplete.type", "nGram")
84+
.put("analysis.tokenizer.autocomplete.type", "ngram")
8585
.put("analysis.filter.wordDelimiter.type", "word_delimiter")
8686
.putList("analysis.filter.wordDelimiter.type_table",
8787
"& => ALPHANUM", "| => ALPHANUM", "! => ALPHANUM",

modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml

Lines changed: 22 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -23,38 +23,6 @@
2323
- match: { detail.tokenizer.tokens.0.token: Foo Bar! }
2424

2525
---
26-
"nGram":
27-
- do:
28-
indices.analyze:
29-
body:
30-
text: good
31-
explain: true
32-
tokenizer:
33-
type: nGram
34-
min_gram: 2
35-
max_gram: 2
36-
- length: { detail.tokenizer.tokens: 3 }
37-
- match: { detail.tokenizer.name: _anonymous_tokenizer }
38-
- match: { detail.tokenizer.tokens.0.token: go }
39-
- match: { detail.tokenizer.tokens.1.token: oo }
40-
- match: { detail.tokenizer.tokens.2.token: od }
41-
42-
---
43-
"nGram_exception":
44-
- skip:
45-
version: " - 6.99.99"
46-
reason: only starting from version 7.x this throws an error
47-
- do:
48-
catch: /The difference between max_gram and min_gram in NGram Tokenizer must be less than or equal to[:] \[1\] but was \[2\]\. This limit can be set by changing the \[index.max_ngram_diff\] index level setting\./
49-
indices.analyze:
50-
body:
51-
text: good
52-
explain: true
53-
tokenizer:
54-
type: nGram
55-
min_gram: 2
56-
max_gram: 4
57-
---
5826
"simple_pattern":
5927
- do:
6028
indices.analyze:
@@ -133,7 +101,7 @@
133101
text: "foobar"
134102
explain: true
135103
tokenizer:
136-
type: nGram
104+
type: ngram
137105
min_gram: 3
138106
max_gram: 3
139107
- length: { detail.tokenizer.tokens: 4 }
@@ -162,15 +130,31 @@
162130
body:
163131
text: "foo"
164132
explain: true
165-
tokenizer: nGram
133+
tokenizer: ngram
166134
- length: { detail.tokenizer.tokens: 5 }
167-
- match: { detail.tokenizer.name: nGram }
135+
- match: { detail.tokenizer.name: ngram }
168136
- match: { detail.tokenizer.tokens.0.token: f }
169137
- match: { detail.tokenizer.tokens.1.token: fo }
170138
- match: { detail.tokenizer.tokens.2.token: o }
171139
- match: { detail.tokenizer.tokens.3.token: oo }
172140
- match: { detail.tokenizer.tokens.4.token: o }
173141

142+
---
143+
"ngram_exception":
144+
- skip:
145+
version: " - 6.99.99"
146+
reason: only starting from version 7.x this throws an error
147+
- do:
148+
catch: /The difference between max_gram and min_gram in NGram Tokenizer must be less than or equal to[:] \[1\] but was \[2\]\. This limit can be set by changing the \[index.max_ngram_diff\] index level setting\./
149+
indices.analyze:
150+
body:
151+
text: good
152+
explain: true
153+
tokenizer:
154+
type: ngram
155+
min_gram: 2
156+
max_gram: 4
157+
174158
---
175159
"edge_ngram":
176160
- do:
@@ -194,7 +178,7 @@
194178
text: "foo"
195179
explain: true
196180
tokenizer:
197-
type: edgeNGram
181+
type: edge_ngram
198182
min_gram: 1
199183
max_gram: 3
200184
- length: { detail.tokenizer.tokens: 3 }
@@ -219,9 +203,9 @@
219203
body:
220204
text: "foo"
221205
explain: true
222-
tokenizer: edgeNGram
206+
tokenizer: edge_ngram
223207
- length: { detail.tokenizer.tokens: 2 }
224-
- match: { detail.tokenizer.name: edgeNGram }
208+
- match: { detail.tokenizer.name: edge_ngram }
225209
- match: { detail.tokenizer.tokens.0.token: f }
226210
- match: { detail.tokenizer.tokens.1.token: fo }
227211

modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@
7676
analysis:
7777
tokenizer:
7878
trigram:
79-
type: nGram
79+
type: ngram
8080
min_gram: 3
8181
max_gram: 3
8282
filter:

0 commit comments

Comments
 (0)