elastic · jimczi · Sep 7, 2018 · Sep 6, 2018 · Sep 7, 2018
diff --git a/docs/plugins/analysis-phonetic.asciidoc b/docs/plugins/analysis-phonetic.asciidoc
@@ -38,7 +38,6 @@ PUT phonetic_sample
           "my_analyzer": {
             "tokenizer": "standard",
             "filter": [
-              "standard",
               "lowercase",
               "my_metaphone"
             ]

diff --git a/docs/reference/analysis/analyzers/standard-analyzer.asciidoc b/docs/reference/analysis/analyzers/standard-analyzer.asciidoc
@@ -292,7 +292,6 @@ PUT /standard_example
         "rebuilt_standard": {
           "tokenizer": "standard",
           "filter": [
-            "standard",
             "lowercase"       <1>
           ]
         }

diff --git a/docs/reference/analysis/tokenfilters/asciifolding-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/asciifolding-tokenfilter.asciidoc
@@ -15,7 +15,7 @@ PUT /asciifold_example
             "analyzer" : {
                 "default" : {
                     "tokenizer" : "standard",
-                    "filter" : ["standard", "asciifolding"]
+                    "filter" : ["asciifolding"]
                 }
             }
         }
@@ -37,7 +37,7 @@ PUT /asciifold_example
             "analyzer" : {
                 "default" : {
                     "tokenizer" : "standard",
-                    "filter" : ["standard", "my_ascii_folding"]
+                    "filter" : ["my_ascii_folding"]
                 }
             },
             "filter" : {

diff --git a/docs/reference/analysis/tokenfilters/elision-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/elision-tokenfilter.asciidoc
@@ -16,7 +16,7 @@ PUT /elision_example
             "analyzer" : {
                 "default" : {
                     "tokenizer" : "standard",
-                    "filter" : ["standard", "elision"]
+                    "filter" : ["elision"]
                 }
             },
             "filter" : {

diff --git a/docs/reference/analysis/tokenfilters/keep-types-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/keep-types-tokenfilter.asciidoc
@@ -26,7 +26,7 @@ PUT /keep_types_example
             "analyzer" : {
                 "my_analyzer" : {
                     "tokenizer" : "standard",
-                    "filter" : ["standard", "lowercase", "extract_numbers"]
+                    "filter" : ["lowercase", "extract_numbers"]
                 }
             },
             "filter" : {
@@ -87,7 +87,7 @@ PUT /keep_types_exclude_example
             "analyzer" : {
                 "my_analyzer" : {
                     "tokenizer" : "standard",
-                    "filter" : ["standard", "lowercase", "remove_numbers"]
+                    "filter" : ["lowercase", "remove_numbers"]
                 }
             },
             "filter" : {

diff --git a/docs/reference/analysis/tokenfilters/keep-words-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/keep-words-tokenfilter.asciidoc
@@ -27,11 +27,11 @@ PUT /keep_words_example
             "analyzer" : {
                 "example_1" : {
                     "tokenizer" : "standard",
-                    "filter" : ["standard", "lowercase", "words_till_three"]
+                    "filter" : ["lowercase", "words_till_three"]
                 },
                 "example_2" : {
                     "tokenizer" : "standard",
-                    "filter" : ["standard", "lowercase", "words_in_file"]
+                    "filter" : ["lowercase", "words_in_file"]
                 }
             },
             "filter" : {

diff --git a/docs/reference/analysis/tokenfilters/snowball-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/snowball-tokenfilter.asciidoc
@@ -19,7 +19,7 @@ PUT /my_index
             "analyzer" : {
                 "my_analyzer" : {
                     "tokenizer" : "standard",
-                    "filter" : ["standard", "lowercase", "my_snow"]
+                    "filter" : ["lowercase", "my_snow"]
                 }
             },
             "filter" : {

diff --git a/docs/reference/analysis/tokenfilters/standard-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/standard-tokenfilter.asciidoc
@@ -1,10 +1,11 @@
 [[analysis-standard-tokenfilter]]
 === Standard Token Filter
 
+deprecated[6.5.0, This filter is deprecated and will be removed in the next
+major version.]
+
 A token filter of type `standard` that normalizes tokens extracted with
-the
-<<analysis-standard-tokenizer,Standard
-Tokenizer>>.
+the <<analysis-standard-tokenizer,Standard Tokenizer>>.
 
 [TIP]
 ==================================================

diff --git a/docs/reference/analysis/tokenfilters/stemmer-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/stemmer-tokenfilter.asciidoc
@@ -13,7 +13,7 @@ PUT /my_index
             "analyzer" : {
                 "my_analyzer" : {
                     "tokenizer" : "standard",
-                    "filter" : ["standard", "lowercase", "my_stemmer"]
+                    "filter" : ["lowercase", "my_stemmer"]
                 }
             },
             "filter" : {

diff --git a/docs/reference/mapping/types/percolator.asciidoc b/docs/reference/mapping/types/percolator.asciidoc
@@ -446,7 +446,6 @@ PUT my_queries1
           "type": "custom",
           "tokenizer": "standard",
           "filter": [
-            "standard",
             "lowercase",
             "wildcard_edge_ngram"
           ]
@@ -597,7 +596,6 @@ PUT my_queries2
           "type": "custom",
           "tokenizer": "standard",
           "filter": [
-            "standard",
             "lowercase",
             "reverse",
             "wildcard_edge_ngram"
@@ -607,7 +605,6 @@ PUT my_queries2
           "type": "custom",
           "tokenizer": "standard",
           "filter": [
-            "standard",
             "lowercase",
             "reverse"
           ]

diff --git a/docs/reference/migration/migrate_6_0/analysis.asciidoc b/docs/reference/migration/migrate_6_0/analysis.asciidoc
@@ -20,4 +20,8 @@ To protect against this, the maximum number of characters that to be analyzed wi
 limited to 1000000 in the next major Elastic version. For this version, by default the limit
 is not set. A deprecation warning will be issued when an analyzed text exceeds 1000000.
  The limit can be set for a particular index with the index setting
-`index.highlight.max_analyzed_offset`.
+`index.highlight.max_analyzed_offset`.
+
+==== `standard` filter has been deprecated
+ The `standard` token filter has been deprecated because it doesn't change anything in
+ the stream. It will be removed in the next major version.
diff --git a/docs/reference/search/suggesters/phrase-suggest.asciidoc b/docs/reference/search/suggesters/phrase-suggest.asciidoc
@@ -33,12 +33,12 @@ PUT test
           "trigram": {
             "type": "custom",
             "tokenizer": "standard",
-            "filter": ["standard", "shingle"]
+            "filter": ["shingle"]
           },
           "reverse": {
             "type": "custom",
             "tokenizer": "standard",
-            "filter": ["standard", "reverse"]
+            "filter": ["reverse"]
           }
         },
         "filter": {

diff --git a/plugins/analysis-icu/src/test/resources/rest-api-spec/test/analysis_icu/20_search.yml b/plugins/analysis-icu/src/test/resources/rest-api-spec/test/analysis_icu/20_search.yml
@@ -12,7 +12,7 @@
                             analyzer:
                                 my_analyzer:
                                     tokenizer:  standard
-                                    filter: ["standard", "lowercase", "my_collator"]
+                                    filter: ["lowercase", "my_collator"]
                             filter:
                                 my_collator:
                                     type: icu_collation

diff --git a/...nalysis-phonetic/src/test/resources/rest-api-spec/test/analysis_phonetic/10_metaphone.yml b/...nalysis-phonetic/src/test/resources/rest-api-spec/test/analysis_phonetic/10_metaphone.yml
@@ -13,7 +13,7 @@
                             analyzer:
                                 my_analyzer:
                                     tokenizer:  standard
-                                    filter: ["standard", "lowercase", "my_metaphone"]
+                                    filter: ["lowercase", "my_metaphone"]
                             filter:
                                 my_metaphone:
                                     type: phonetic

diff --git a/...-phonetic/src/test/resources/rest-api-spec/test/analysis_phonetic/20_double_metaphone.yml b/...-phonetic/src/test/resources/rest-api-spec/test/analysis_phonetic/20_double_metaphone.yml
@@ -13,7 +13,7 @@
                             analyzer:
                                 my_analyzer:
                                     tokenizer:  standard
-                                    filter: ["standard", "lowercase", "my_metaphone"]
+                                    filter: ["lowercase", "my_metaphone"]
                             filter:
                                 my_metaphone:
                                     type: phonetic

diff --git a/...ysis-phonetic/src/test/resources/rest-api-spec/test/analysis_phonetic/30_beider_morse.yml b/...ysis-phonetic/src/test/resources/rest-api-spec/test/analysis_phonetic/30_beider_morse.yml
@@ -13,7 +13,7 @@
                             analyzer:
                                 my_analyzer:
                                     tokenizer:  standard
-                                    filter: ["standard", "lowercase", "beider_morse"]
+                                    filter: ["lowercase", "beider_morse"]
                             filter:
                                 beider_morse:
                                     type: phonetic

diff --git a/...s/analysis-phonetic/src/test/resources/rest-api-spec/test/analysis_phonetic/40_search.yml b/...s/analysis-phonetic/src/test/resources/rest-api-spec/test/analysis_phonetic/40_search.yml
@@ -12,7 +12,7 @@
                             analyzer:
                                 my_analyzer:
                                     tokenizer:  standard
-                                    filter: ["standard", "lowercase", "my_metaphone"]
+                                    filter: ["lowercase", "my_metaphone"]
                             filter:
                                 my_metaphone:
                                     type: phonetic

diff --git a/...s-phonetic/src/test/resources/rest-api-spec/test/analysis_phonetic/50_daitch_mokotoff.yml b/...s-phonetic/src/test/resources/rest-api-spec/test/analysis_phonetic/50_daitch_mokotoff.yml
@@ -13,7 +13,7 @@
                             analyzer:
                                 my_analyzer:
                                     tokenizer:  standard
-                                    filter: ["standard", "lowercase", "daitch_mokotoff"]
+                                    filter: ["lowercase", "daitch_mokotoff"]
                             filter:
                                 daitch_mokotoff:
                                     type: phonetic

diff --git a/server/src/main/java/org/elasticsearch/index/analysis/StandardTokenFilterFactory.java b/server/src/main/java/org/elasticsearch/index/analysis/StandardTokenFilterFactory.java
diff --git a/server/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java b/server/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
@@ -19,14 +19,17 @@
 
 package org.elasticsearch.indices.analysis;
 
+import org.apache.logging.log4j.LogManager;
 import org.apache.lucene.analysis.LowerCaseFilter;
-import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.TokenStream;
 import org.elasticsearch.Version;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.common.NamedRegistry;
+import org.elasticsearch.common.logging.DeprecationLogger;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
 import org.elasticsearch.index.analysis.AnalysisRegistry;
 import org.elasticsearch.index.analysis.AnalyzerProvider;
 import org.elasticsearch.index.analysis.CharFilterFactory;
@@ -39,7 +42,6 @@
 import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
 import org.elasticsearch.index.analysis.SimpleAnalyzerProvider;
 import org.elasticsearch.index.analysis.StandardAnalyzerProvider;
-import org.elasticsearch.index.analysis.StandardTokenFilterFactory;
 import org.elasticsearch.index.analysis.StandardTokenizerFactory;
 import org.elasticsearch.index.analysis.StopAnalyzerProvider;
 import org.elasticsearch.index.analysis.StopTokenFilterFactory;
@@ -69,6 +71,8 @@ public final class AnalysisModule {
 
     private static final IndexSettings NA_INDEX_SETTINGS;
 
+    private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(LogManager.getLogger(AnalysisModule.class));
+
     private final HunspellService hunspellService;
     private final AnalysisRegistry analysisRegistry;
 
@@ -116,7 +120,16 @@ private NamedRegistry<AnalysisProvider<TokenFilterFactory>> setupTokenFilters(Li
         hunspellService) {
         NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = new NamedRegistry<>("token_filter");
         tokenFilters.register("stop", StopTokenFilterFactory::new);
-        tokenFilters.register("standard", StandardTokenFilterFactory::new);
+        tokenFilters.register("standard", (indexSettings, environment, name, settings) -> {
+            DEPRECATION_LOGGER.deprecatedAndMaybeLog("standard_deprecation",
+                "The [standard] token filter name is deprecated and will be removed in a future version.");
+            return new AbstractTokenFilterFactory(indexSettings, name, settings) {
+                @Override
+                public TokenStream create(TokenStream tokenStream) {
+                    return tokenStream;
+                }
+            };
+        });
         tokenFilters.register("shingle", ShingleTokenFilterFactory::new);
         tokenFilters.register("hunspell", requiresAnalysisSettings((indexSettings, env, name, settings) -> new HunspellTokenFilterFactory
             (indexSettings, name, settings, hunspellService)));
@@ -153,7 +166,12 @@ static Map<String, PreConfiguredTokenFilter> setupPreConfiguredTokenFilters(List
 
         // Add filters available in lucene-core
         preConfiguredTokenFilters.register("lowercase", PreConfiguredTokenFilter.singleton("lowercase", true, LowerCaseFilter::new));
-        preConfiguredTokenFilters.register("standard", PreConfiguredTokenFilter.singleton("standard", false, StandardFilter::new));
+        preConfiguredTokenFilters.register( "standard",
+            PreConfiguredTokenFilter.singletonWithVersion("standard", false, (reader, version) -> {
+                DEPRECATION_LOGGER.deprecatedAndMaybeLog("standard_deprecation",
+                    "The [standard] token filter is deprecated and will be removed in a future version.");
+                return reader;
+            }));
         /* Note that "stop" is available in lucene-core but it's pre-built
          * version uses a set of English stop words that are in
          * lucene-analyzers-common so "stop" is defined in the analysis-common

diff --git a/server/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java b/server/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java
@@ -435,6 +435,19 @@ public Map<String, Dictionary> getHunspellDictionaries() {
         assertSame(dictionary, module.getHunspellService().getDictionary("foo"));
     }
 
+    public void testStandardFilterDeprecation() throws IOException {
+        Version version = VersionUtils.randomVersionBetween(random(), Version.V_5_0_0, Version.CURRENT);
+        Settings settings = Settings.builder()
+            .put("index.analysis.analyzer.my_standard.tokenizer", "standard")
+            .put("index.analysis.analyzer.my_standard.filter", "standard")
+            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+            .put(IndexMetaData.SETTING_VERSION_CREATED, version)
+            .build();
+        IndexAnalyzers analyzers = getIndexAnalyzers(settings);
+        assertTokenStreamContents(analyzers.get("my_standard").tokenStream("", "test"), new String[]{"test"});
+        assertWarnings("The [standard] token filter is deprecated and will be removed in a future version.");
+    }
+
     // Simple char filter that appends text to the term
     public static class AppendCharFilter extends CharFilter {
 

diff --git a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java
@@ -29,7 +29,6 @@
 import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
 import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
 import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
-import org.elasticsearch.index.analysis.StandardTokenFilterFactory;
 import org.elasticsearch.index.analysis.StandardTokenizerFactory;
 import org.elasticsearch.index.analysis.StopTokenFilterFactory;
 import org.elasticsearch.index.analysis.SynonymGraphTokenFilterFactory;
@@ -167,7 +166,7 @@ private static String toCamelCase(String s) {
         .put("soraninormalization",       MovedToAnalysisCommon.class)
         .put("soranistem",                MovedToAnalysisCommon.class)
         .put("spanishlightstem",          MovedToAnalysisCommon.class)
-        .put("standard",                  StandardTokenFilterFactory.class)
+        .put("standard",                  Deprecated.class)
         .put("stemmeroverride",           MovedToAnalysisCommon.class)
         .put("stop",                      StopTokenFilterFactory.class)
         .put("swedishlightstem",          MovedToAnalysisCommon.class)

diff --git a/test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.json b/test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.json
@@ -42,7 +42,7 @@
                 },
                 "czechAnalyzerWithStemmer":{
                     "tokenizer":"standard",
-                    "filter":["standard", "lowercase", "stop", "czech_stem"]
+                    "filter":["lowercase", "stop", "czech_stem"]
                 },
                 "decompoundingAnalyzer":{
                     "tokenizer":"standard",