Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion docs/plugins/analysis-phonetic.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ PUT phonetic_sample
"my_analyzer": {
"tokenizer": "standard",
"filter": [
"standard",
"lowercase",
"my_metaphone"
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,6 @@ PUT /standard_example
"rebuilt_standard": {
"tokenizer": "standard",
"filter": [
"standard",
"lowercase" <1>
]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ PUT /asciifold_example
"analyzer" : {
"default" : {
"tokenizer" : "standard",
"filter" : ["standard", "asciifolding"]
"filter" : ["asciifolding"]
}
}
}
Expand All @@ -37,7 +37,7 @@ PUT /asciifold_example
"analyzer" : {
"default" : {
"tokenizer" : "standard",
"filter" : ["standard", "my_ascii_folding"]
"filter" : ["my_ascii_folding"]
}
},
"filter" : {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ PUT /elision_example
"analyzer" : {
"default" : {
"tokenizer" : "standard",
"filter" : ["standard", "elision"]
"filter" : ["elision"]
}
},
"filter" : {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ PUT /keep_types_example
"analyzer" : {
"my_analyzer" : {
"tokenizer" : "standard",
"filter" : ["standard", "lowercase", "extract_numbers"]
"filter" : ["lowercase", "extract_numbers"]
}
},
"filter" : {
Expand Down Expand Up @@ -87,7 +87,7 @@ PUT /keep_types_exclude_example
"analyzer" : {
"my_analyzer" : {
"tokenizer" : "standard",
"filter" : ["standard", "lowercase", "remove_numbers"]
"filter" : ["lowercase", "remove_numbers"]
}
},
"filter" : {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ PUT /keep_words_example
"analyzer" : {
"example_1" : {
"tokenizer" : "standard",
"filter" : ["standard", "lowercase", "words_till_three"]
"filter" : ["lowercase", "words_till_three"]
},
"example_2" : {
"tokenizer" : "standard",
"filter" : ["standard", "lowercase", "words_in_file"]
"filter" : ["lowercase", "words_in_file"]
}
},
"filter" : {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ PUT /my_index
"analyzer" : {
"my_analyzer" : {
"tokenizer" : "standard",
"filter" : ["standard", "lowercase", "my_snow"]
"filter" : ["lowercase", "my_snow"]
}
},
"filter" : {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
[[analysis-standard-tokenfilter]]
=== Standard Token Filter

deprecated[6.5.0, This filter is deprecated and will be removed in the next
major version.]

A token filter of type `standard` that normalizes tokens extracted with
the
<<analysis-standard-tokenizer,Standard
Tokenizer>>.
the <<analysis-standard-tokenizer,Standard Tokenizer>>.

[TIP]
==================================================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ PUT /my_index
"analyzer" : {
"my_analyzer" : {
"tokenizer" : "standard",
"filter" : ["standard", "lowercase", "my_stemmer"]
"filter" : ["lowercase", "my_stemmer"]
}
},
"filter" : {
Expand Down
3 changes: 0 additions & 3 deletions docs/reference/mapping/types/percolator.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,6 @@ PUT my_queries1
"type": "custom",
"tokenizer": "standard",
"filter": [
"standard",
"lowercase",
"wildcard_edge_ngram"
]
Expand Down Expand Up @@ -597,7 +596,6 @@ PUT my_queries2
"type": "custom",
"tokenizer": "standard",
"filter": [
"standard",
"lowercase",
"reverse",
"wildcard_edge_ngram"
Expand All @@ -607,7 +605,6 @@ PUT my_queries2
"type": "custom",
"tokenizer": "standard",
"filter": [
"standard",
"lowercase",
"reverse"
]
Expand Down
6 changes: 5 additions & 1 deletion docs/reference/migration/migrate_6_0/analysis.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,8 @@ To protect against this, the maximum number of characters that to be analyzed wi
limited to 1000000 in the next major Elastic version. For this version, by default the limit
is not set. A deprecation warning will be issued when an analyzed text exceeds 1000000.
The limit can be set for a particular index with the index setting
`index.highlight.max_analyzed_offset`.
`index.highlight.max_analyzed_offset`.

==== `standard` filter has been deprecated
The `standard` token filter has been deprecated because it doesn't change anything in
the stream. It will be removed in the next major version.
4 changes: 2 additions & 2 deletions docs/reference/search/suggesters/phrase-suggest.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@ PUT test
"trigram": {
"type": "custom",
"tokenizer": "standard",
"filter": ["standard", "shingle"]
"filter": ["shingle"]
},
"reverse": {
"type": "custom",
"tokenizer": "standard",
"filter": ["standard", "reverse"]
"filter": ["reverse"]
}
},
"filter": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
analyzer:
my_analyzer:
tokenizer: standard
filter: ["standard", "lowercase", "my_collator"]
filter: ["lowercase", "my_collator"]
filter:
my_collator:
type: icu_collation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
analyzer:
my_analyzer:
tokenizer: standard
filter: ["standard", "lowercase", "my_metaphone"]
filter: ["lowercase", "my_metaphone"]
filter:
my_metaphone:
type: phonetic
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
analyzer:
my_analyzer:
tokenizer: standard
filter: ["standard", "lowercase", "my_metaphone"]
filter: ["lowercase", "my_metaphone"]
filter:
my_metaphone:
type: phonetic
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
analyzer:
my_analyzer:
tokenizer: standard
filter: ["standard", "lowercase", "beider_morse"]
filter: ["lowercase", "beider_morse"]
filter:
beider_morse:
type: phonetic
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
analyzer:
my_analyzer:
tokenizer: standard
filter: ["standard", "lowercase", "my_metaphone"]
filter: ["lowercase", "my_metaphone"]
filter:
my_metaphone:
type: phonetic
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
analyzer:
my_analyzer:
tokenizer: standard
filter: ["standard", "lowercase", "daitch_mokotoff"]
filter: ["lowercase", "daitch_mokotoff"]
filter:
daitch_mokotoff:
type: phonetic
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,17 @@

package org.elasticsearch.indices.analysis;

import org.apache.logging.log4j.LogManager;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.TokenStream;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.NamedRegistry;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.AnalysisRegistry;
import org.elasticsearch.index.analysis.AnalyzerProvider;
import org.elasticsearch.index.analysis.CharFilterFactory;
Expand All @@ -39,7 +42,6 @@
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
import org.elasticsearch.index.analysis.SimpleAnalyzerProvider;
import org.elasticsearch.index.analysis.StandardAnalyzerProvider;
import org.elasticsearch.index.analysis.StandardTokenFilterFactory;
import org.elasticsearch.index.analysis.StandardTokenizerFactory;
import org.elasticsearch.index.analysis.StopAnalyzerProvider;
import org.elasticsearch.index.analysis.StopTokenFilterFactory;
Expand Down Expand Up @@ -69,6 +71,8 @@ public final class AnalysisModule {

private static final IndexSettings NA_INDEX_SETTINGS;

private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(LogManager.getLogger(AnalysisModule.class));

private final HunspellService hunspellService;
private final AnalysisRegistry analysisRegistry;

Expand Down Expand Up @@ -116,7 +120,16 @@ private NamedRegistry<AnalysisProvider<TokenFilterFactory>> setupTokenFilters(Li
hunspellService) {
NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = new NamedRegistry<>("token_filter");
tokenFilters.register("stop", StopTokenFilterFactory::new);
tokenFilters.register("standard", StandardTokenFilterFactory::new);
tokenFilters.register("standard", (indexSettings, environment, name, settings) -> {
DEPRECATION_LOGGER.deprecatedAndMaybeLog("standard_deprecation",
"The [standard] token filter name is deprecated and will be removed in a future version.");
return new AbstractTokenFilterFactory(indexSettings, name, settings) {
@Override
public TokenStream create(TokenStream tokenStream) {
return tokenStream;
}
};
});
tokenFilters.register("shingle", ShingleTokenFilterFactory::new);
tokenFilters.register("hunspell", requiresAnalysisSettings((indexSettings, env, name, settings) -> new HunspellTokenFilterFactory
(indexSettings, name, settings, hunspellService)));
Expand Down Expand Up @@ -153,7 +166,12 @@ static Map<String, PreConfiguredTokenFilter> setupPreConfiguredTokenFilters(List

// Add filters available in lucene-core
preConfiguredTokenFilters.register("lowercase", PreConfiguredTokenFilter.singleton("lowercase", true, LowerCaseFilter::new));
preConfiguredTokenFilters.register("standard", PreConfiguredTokenFilter.singleton("standard", false, StandardFilter::new));
preConfiguredTokenFilters.register( "standard",
PreConfiguredTokenFilter.singletonWithVersion("standard", false, (reader, version) -> {
DEPRECATION_LOGGER.deprecatedAndMaybeLog("standard_deprecation",
"The [standard] token filter is deprecated and will be removed in a future version.");
return reader;
}));
/* Note that "stop" is available in lucene-core but it's pre-built
* version uses a set of English stop words that are in
* lucene-analyzers-common so "stop" is defined in the analysis-common
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,19 @@ public Map<String, Dictionary> getHunspellDictionaries() {
assertSame(dictionary, module.getHunspellService().getDictionary("foo"));
}

public void testStandardFilterDeprecation() throws IOException {
Version version = VersionUtils.randomVersionBetween(random(), Version.V_5_0_0, Version.CURRENT);
Settings settings = Settings.builder()
.put("index.analysis.analyzer.my_standard.tokenizer", "standard")
.put("index.analysis.analyzer.my_standard.filter", "standard")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(IndexMetaData.SETTING_VERSION_CREATED, version)
.build();
IndexAnalyzers analyzers = getIndexAnalyzers(settings);
assertTokenStreamContents(analyzers.get("my_standard").tokenStream("", "test"), new String[]{"test"});
assertWarnings("The [standard] token filter is deprecated and will be removed in a future version.");
}

// Simple char filter that appends text to the term
public static class AppendCharFilter extends CharFilter {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
import org.elasticsearch.index.analysis.StandardTokenFilterFactory;
import org.elasticsearch.index.analysis.StandardTokenizerFactory;
import org.elasticsearch.index.analysis.StopTokenFilterFactory;
import org.elasticsearch.index.analysis.SynonymGraphTokenFilterFactory;
Expand Down Expand Up @@ -167,7 +166,7 @@ private static String toCamelCase(String s) {
.put("soraninormalization", MovedToAnalysisCommon.class)
.put("soranistem", MovedToAnalysisCommon.class)
.put("spanishlightstem", MovedToAnalysisCommon.class)
.put("standard", StandardTokenFilterFactory.class)
.put("standard", Deprecated.class)
.put("stemmeroverride", MovedToAnalysisCommon.class)
.put("stop", StopTokenFilterFactory.class)
.put("swedishlightstem", MovedToAnalysisCommon.class)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
},
"czechAnalyzerWithStemmer":{
"tokenizer":"standard",
"filter":["standard", "lowercase", "stop", "czech_stem"]
"filter":["lowercase", "stop", "czech_stem"]
},
"decompoundingAnalyzer":{
"tokenizer":"standard",
Expand Down