Skip to content

Commit 9b51025

Browse files
authored
[Analysis] Deprecate Standard Html Strip Analyzer in 6.x (#37292)
Backport #26719 to 6.x Related #4704 (cherry picked from commit 38b698d)
1 parent 3d8c4a3 commit 9b51025

File tree

7 files changed

+63
-4
lines changed

7 files changed

+63
-4
lines changed

docs/reference/migration/migrate_6_0/analysis.asciidoc

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,12 @@ is not set. A deprecation warning will be issued when an analyzed text exceeds 1
2828
[float]
2929
==== `standard` filter has been deprecated
3030
The `standard` token filter has been deprecated because it doesn't change anything in
31-
the stream. It will be removed in the next major version.
31+
the stream. It will be removed in the next major version.
32+
33+
[float]
34+
==== Deprecated standard_html_strip analyzer
35+
36+
The `standard_html_strip` analyzer has been deprecated, and should be replaced
37+
with a combination of the `standard` tokenizer and `html_strip` char_filter.
38+
Indexes created using this analyzer will still be readable in elasticsearch 7.0,
39+
but it will not be possible to create new indexes using it.

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ public List<ScriptContext<?>> getContexts() {
174174
public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
175175
Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> analyzers = new TreeMap<>();
176176
analyzers.put("fingerprint", FingerprintAnalyzerProvider::new);
177+
178+
// TODO remove in 8.0
177179
analyzers.put("standard_html_strip", StandardHtmlStripAnalyzerProvider::new);
178180
analyzers.put("pattern", PatternAnalyzerProvider::new);
179181
analyzers.put("snowball", SnowballAnalyzerProvider::new);
@@ -320,6 +322,7 @@ public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
320322
@Override
321323
public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() {
322324
List<PreBuiltAnalyzerProviderFactory> analyzers = new ArrayList<>();
325+
// TODO remove in 8.0
323326
analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.ELASTICSEARCH,
324327
() -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET)));
325328
analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH,

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StandardHtmlStripAnalyzer.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ public StandardHtmlStripAnalyzer() {
3939
super(StopAnalyzer.ENGLISH_STOP_WORDS_SET);
4040
}
4141

42+
/**
43+
* @deprecated in 6.7, can not create in 7.0, and we remove this in 8.0
44+
*/
45+
@Deprecated
4246
StandardHtmlStripAnalyzer(CharArraySet stopwords) {
4347
super(stopwords);
4448
}

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StandardHtmlStripAnalyzerProvider.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919

2020
package org.elasticsearch.analysis.common;
2121

22+
import org.apache.logging.log4j.LogManager;
2223
import org.apache.lucene.analysis.CharArraySet;
24+
import org.elasticsearch.common.logging.DeprecationLogger;
2325
import org.elasticsearch.common.settings.Settings;
2426
import org.elasticsearch.env.Environment;
2527
import org.elasticsearch.index.IndexSettings;
@@ -28,14 +30,24 @@
2830

2931
public class StandardHtmlStripAnalyzerProvider extends AbstractIndexAnalyzerProvider<StandardHtmlStripAnalyzer> {
3032

33+
private static final DeprecationLogger DEPRECATION_LOGGER =
34+
new DeprecationLogger(LogManager.getLogger(StandardHtmlStripAnalyzerProvider.class));
35+
3136
private final StandardHtmlStripAnalyzer analyzer;
3237

38+
/**
39+
* @deprecated in 6.7, can not create in 7.0, and we remove this in 8.0
40+
*/
41+
@Deprecated
3342
StandardHtmlStripAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
3443
super(indexSettings, name, settings);
3544
final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
3645
CharArraySet stopWords = Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, defaultStopwords);
3746
analyzer = new StandardHtmlStripAnalyzer(stopWords);
3847
analyzer.setVersion(version);
48+
DEPRECATION_LOGGER.deprecatedAndMaybeLog("standard_html_strip_deprecation",
49+
"Deprecated analyzer [standard_html_strip] used, " +
50+
"replace it with a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
3951
}
4052

4153
@Override

modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,16 @@
1919

2020
package org.elasticsearch.analysis.common;
2121

22+
import org.apache.lucene.analysis.Analyzer;
2223
import org.apache.lucene.analysis.MockTokenizer;
2324
import org.apache.lucene.analysis.Tokenizer;
2425
import org.elasticsearch.Version;
2526
import org.elasticsearch.cluster.metadata.IndexMetaData;
2627
import org.elasticsearch.common.settings.Settings;
2728
import org.elasticsearch.env.Environment;
2829
import org.elasticsearch.index.IndexSettings;
30+
import org.elasticsearch.index.analysis.IndexAnalyzers;
31+
import org.elasticsearch.index.analysis.NamedAnalyzer;
2932
import org.elasticsearch.index.analysis.TokenFilterFactory;
3033
import org.elasticsearch.test.ESTestCase;
3134
import org.elasticsearch.test.IndexSettingsModule;
@@ -116,4 +119,27 @@ public void testEdgeNGramNoDeprecationWarningPre6_4() throws IOException {
116119
assertNotNull(tokenFilterFactory.create(tokenizer));
117120
}
118121
}
122+
123+
124+
/**
125+
* Check that the deprecated analyzer name "standard_html_strip" issues a deprecation warning for indices created until 7
126+
*/
127+
public void testStandardHtmlStripAnalyzerDeprecationWarning() throws IOException {
128+
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
129+
.put(IndexMetaData.SETTING_VERSION_CREATED,
130+
VersionUtils.randomVersionBetween(random(), Version.V_5_0_0, Version.CURRENT))
131+
.put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip")
132+
.putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b")
133+
.build();
134+
135+
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
136+
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
137+
IndexAnalyzers analyzers = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).indexAnalyzers;
138+
Analyzer analyzer = analyzers.get("custom_analyzer");
139+
assertNotNull(((NamedAnalyzer) analyzer).analyzer());
140+
assertWarnings(
141+
"Deprecated analyzer [standard_html_strip] used, " +
142+
"replace it with a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
143+
}
144+
}
119145
}

modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/20_analyzers.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,15 @@
6969

7070
---
7171
"standard_html_strip":
72+
- skip:
73+
version: " - 6.99.99"
74+
reason: only starting from version 7.x this throws an error
7275
- do:
76+
catch: /\[standard_html_strip\] analyzer is not supported for new indices, use a custom analyzer using \[standard\] tokenizer and \[html_strip\] char_filter, plus \[lowercase\] filter/
7377
indices.analyze:
7478
body:
7579
text: <bold/> <italic/>
7680
analyzer: standard_html_strip
77-
- length: { tokens: 2 }
78-
- match: { tokens.0.token: bold }
79-
- match: { tokens.1.token: italic }
8081

8182
---
8283
"pattern":

server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,11 @@ public Analyzer getAnalyzer(String analyzer) throws IOException {
136136
throw new ElasticsearchException("failed to load analyzer for name " + key, ex);
137137
}}
138138
);
139+
} else if ("standard_html_strip".equals(analyzer)) {
140+
Logger logger = LogManager.getLogger(getClass());
141+
DeprecationLogger deprecationLogger = new DeprecationLogger(logger);
142+
deprecationLogger.deprecated("[standard_html_strip] analyzer is deprecated, use a custom analyzer using [standard] tokenizer " +
143+
"and [html_strip] char_filter, plus [lowercase] filter");
139144
}
140145
return analyzerProvider.get(environment, analyzer).get();
141146
}

0 commit comments

Comments
 (0)