Skip to content

Commit 3f5167f

Browse files
author
Christoph Büscher
committed
Deprecate use of htmlStrip as name for HtmlStripCharFilter (#27429)
The camel case name `htmlStip` should be removed in favour of `html_strip`, but we need to deprecate it first. This change adds deprecation warnings for indices with version starting with 6.3.0 and logs deprecation warnings in this cases.
1 parent 9ef3a73 commit 3f5167f

File tree

4 files changed

+148
-2
lines changed

4 files changed

+148
-2
lines changed

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@
6767
import org.apache.lucene.analysis.standard.ClassicFilter;
6868
import org.apache.lucene.analysis.tr.ApostropheFilter;
6969
import org.apache.lucene.analysis.util.ElisionFilter;
70+
import org.elasticsearch.common.logging.DeprecationLogger;
71+
import org.elasticsearch.common.logging.Loggers;
7072
import org.elasticsearch.index.analysis.CharFilterFactory;
7173
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
7274
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
@@ -88,6 +90,9 @@
8890
import static org.elasticsearch.plugins.AnalysisPlugin.requriesAnalysisSettings;
8991

9092
public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
93+
94+
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(CommonAnalysisPlugin.class));
95+
9196
@Override
9297
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
9398
Map<String, AnalysisProvider<TokenFilterFactory>> filters = new TreeMap<>();
@@ -171,8 +176,14 @@ public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
171176
public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
172177
List<PreConfiguredCharFilter> filters = new ArrayList<>();
173178
filters.add(PreConfiguredCharFilter.singleton("html_strip", false, HTMLStripCharFilter::new));
174-
// TODO deprecate htmlStrip
175-
filters.add(PreConfiguredCharFilter.singleton("htmlStrip", false, HTMLStripCharFilter::new));
179+
filters.add(PreConfiguredCharFilter.singletonWithVersion("htmlStrip", false, (reader, version) -> {
180+
if (version.onOrAfter(org.elasticsearch.Version.V_6_3_0)) {
181+
DEPRECATION_LOGGER.deprecatedAndMaybeLog("htmlStrip_deprecation",
182+
"The [htmpStrip] char filter name is deprecated and will be removed in a future version. "
183+
+ "Please change the filter name to [html_strip] instead.");
184+
}
185+
return new HTMLStripCharFilter(reader);
186+
}));
176187
return filters;
177188
}
178189

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.analysis.common;
21+
22+
import org.elasticsearch.Version;
23+
import org.elasticsearch.cluster.metadata.IndexMetaData;
24+
import org.elasticsearch.common.settings.Settings;
25+
import org.elasticsearch.env.Environment;
26+
import org.elasticsearch.index.IndexSettings;
27+
import org.elasticsearch.index.analysis.CharFilterFactory;
28+
import org.elasticsearch.test.ESTestCase;
29+
import org.elasticsearch.test.IndexSettingsModule;
30+
import org.elasticsearch.test.VersionUtils;
31+
32+
import java.io.IOException;
33+
import java.io.StringReader;
34+
import java.util.Map;
35+
36+
37+
public class HtmlStripCharFilterFactoryTests extends ESTestCase {
38+
39+
/**
40+
* Check that the deprecated name "htmlStrip" issues a deprecation warning for indices created since 6.3.0
41+
*/
42+
public void testDeprecationWarning() throws IOException {
43+
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
44+
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_3_0, Version.CURRENT))
45+
.build();
46+
47+
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
48+
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
49+
Map<String, CharFilterFactory> charFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).charFilter;
50+
CharFilterFactory charFilterFactory = charFilters.get("htmlStrip");
51+
assertNotNull(charFilterFactory.create(new StringReader("input")));
52+
assertWarnings("The [htmpStrip] char filter name is deprecated and will be removed in a future version. "
53+
+ "Please change the filter name to [html_strip] instead.");
54+
}
55+
}
56+
57+
/**
58+
* Check that the deprecated name "htmlStrip" does NOT issues a deprecation warning for indices created before 6.3.0
59+
*/
60+
public void testNoDeprecationWarningPre6_3() throws IOException {
61+
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
62+
.put(IndexMetaData.SETTING_VERSION_CREATED,
63+
VersionUtils.randomVersionBetween(random(), Version.V_5_0_0, Version.V_6_2_4))
64+
.build();
65+
66+
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
67+
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
68+
Map<String, CharFilterFactory> charFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).charFilter;
69+
CharFilterFactory charFilterFactory = charFilters.get("htmlStrip");
70+
assertNotNull(charFilterFactory.create(new StringReader("")));
71+
}
72+
}
73+
}

modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,56 @@
1717
- match: { error.type: illegal_argument_exception }
1818
- match: { error.reason: "Custom normalizer may not use filter [word_delimiter]" }
1919

20+
---
21+
"htmlStrip_deprecated":
22+
- skip:
23+
version: " - 6.2.99"
24+
reason: deprecated in 6.3
25+
features: "warnings"
26+
27+
- do:
28+
indices.create:
29+
index: test_deprecated_htmlstrip
30+
body:
31+
settings:
32+
index:
33+
analysis:
34+
analyzer:
35+
my_htmlStripWithCharfilter:
36+
tokenizer: keyword
37+
char_filter: ["htmlStrip"]
38+
mappings:
39+
type:
40+
properties:
41+
name:
42+
type: text
43+
analyzer: my_htmlStripWithCharfilter
44+
45+
- do:
46+
warnings:
47+
- 'The [htmpStrip] char filter name is deprecated and will be removed in a future version. Please change the filter name to [html_strip] instead.'
48+
index:
49+
index: test_deprecated_htmlstrip
50+
type: type
51+
id: 1
52+
body: { "name": "foo bar" }
53+
54+
- do:
55+
warnings:
56+
- 'The [htmpStrip] char filter name is deprecated and will be removed in a future version. Please change the filter name to [html_strip] instead.'
57+
index:
58+
index: test_deprecated_htmlstrip
59+
type: type
60+
id: 2
61+
body: { "name": "foo baz" }
62+
63+
- do:
64+
warnings:
65+
- 'The [htmpStrip] char filter name is deprecated and will be removed in a future version. Please change the filter name to [html_strip] instead.'
66+
indices.analyze:
67+
index: test_deprecated_htmlstrip
68+
body:
69+
analyzer: "my_htmlStripWithCharfilter"
70+
text: "<html>foo</html>"
71+
- length: { tokens: 1 }
72+
- match: { tokens.0.token: "\nfoo\n" }

server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredCharFilter.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,15 @@ public static PreConfiguredCharFilter singleton(String name, boolean useFilterFo
4040
(reader, version) -> create.apply(reader));
4141
}
4242

43+
/**
44+
* Create a pre-configured char filter that may not vary at all, provide access to the elasticsearch verison
45+
*/
46+
public static PreConfiguredCharFilter singletonWithVersion(String name, boolean useFilterForMultitermQueries,
47+
BiFunction<Reader, org.elasticsearch.Version, Reader> create) {
48+
return new PreConfiguredCharFilter(name, CachingStrategy.ONE, useFilterForMultitermQueries,
49+
(reader, version) -> create.apply(reader, version));
50+
}
51+
4352
/**
4453
* Create a pre-configured token filter that may vary based on the Lucene version.
4554
*/

0 commit comments

Comments
 (0)