Rename simple pattern tokenizers (#25300)

andyb-elastic · web-flow · commit 4c5bd57619c7 · 2017-06-19T13:48:43.000-07:00
Changed names to be snake case for consistency Related to #25159, original issue #23363
diff --git a/docs/reference/analysis/tokenizers.asciidoc b/docs/reference/analysis/tokenizers.asciidoc
@@ -99,14 +99,14 @@ terms.
 
 <<analysis-simplepattern-tokenizer,Simple Pattern Tokenizer>>::
 
-The `simplepattern` tokenizer uses a regular expression to capture matching
+The `simple_pattern` tokenizer uses a regular expression to capture matching
 text as terms. It uses a restricted subset of regular expression features
 and is generally faster than the `pattern` tokenizer.
 
 <<analysis-simplepatternsplit-tokenizer,Simple Pattern Split Tokenizer>>::
 
-The `simplepatternsplit` tokenizer uses the same restricted regular expression
-subset as the `simplepattern` tokenizer, but splits the input at matches rather
+The `simple_pattern_split` tokenizer uses the same restricted regular expression
+subset as the `simple_pattern` tokenizer, but splits the input at matches rather
 than returning the matches as terms.
 
 <<analysis-pathhierarchy-tokenizer,Path Tokenizer>>::
diff --git a/docs/reference/analysis/tokenizers/simplepattern-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/simplepattern-tokenizer.asciidoc
@@ -3,15 +3,15 @@
 
 experimental[]
 
-The `simplepattern` tokenizer uses a regular expression to capture matching
+The `simple_pattern` tokenizer uses a regular expression to capture matching
 text as terms. The set of regular expression features it supports is more
 limited than the <<analysis-pattern-tokenizer,`pattern`>> tokenizer, but the
 tokenization is generally faster.
 
 This tokenizer does not support splitting the input on a pattern match, unlike
 the <<analysis-pattern-tokenizer,`pattern`>> tokenizer. To split on pattern
 matches using the same restricted regular expression subset, see the
-<<analysis-simplepatternsplit-tokenizer,`simplepatternsplit`>> tokenizer.
+<<analysis-simplepatternsplit-tokenizer,`simple_pattern_split`>> tokenizer.
 
 This tokenizer uses {lucene-core-javadoc}/org/apache/lucene/util/automaton/RegExp.html[Lucene regular expressions].
 For an explanation of the supported features and syntax, see <<regexp-syntax,Regular Expression Syntax>>.
@@ -22,7 +22,7 @@ tokenizer should always be configured with a non-default pattern.
 [float]
 === Configuration
 
-The `simplepattern` tokenizer accepts the following parameters:
+The `simple_pattern` tokenizer accepts the following parameters:
 
 [horizontal]
 `pattern`::
@@ -31,7 +31,7 @@ The `simplepattern` tokenizer accepts the following parameters:
 [float]
 === Example configuration
 
-This example configures the `simplepattern` tokenizer to produce terms that are
+This example configures the `simple_pattern` tokenizer to produce terms that are
 three-digit numbers
 
 [source,js]
@@ -47,7 +47,7 @@ PUT my_index
       },
       "tokenizer": {
         "my_tokenizer": {
-          "type": "simplepattern",
+          "type": "simple_pattern",
           "pattern": "[0123456789]{3}"
         }
       }
diff --git a/docs/reference/analysis/tokenizers/simplepatternsplit-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/simplepatternsplit-tokenizer.asciidoc
@@ -3,14 +3,14 @@
 
 experimental[]
 
-The `simplepatternsplit` tokenizer uses a regular expression to split the
+The `simple_pattern_split` tokenizer uses a regular expression to split the
 input into terms at pattern matches. The set of regular expression features it
 supports is more limited than the <<analysis-pattern-tokenizer,`pattern`>>
 tokenizer, but the tokenization is generally faster.
 
 This tokenizer does not produce terms from the matches themselves. To produce
 terms from matches using patterns in the same restricted regular expression
-subset, see the <<analysis-simplepattern-tokenizer,`simplepattern`>>
+subset, see the <<analysis-simplepattern-tokenizer,`simple_pattern`>>
 tokenizer.
 
 This tokenizer uses {lucene-core-javadoc}/org/apache/lucene/util/automaton/RegExp.html[Lucene regular expressions].
@@ -23,7 +23,7 @@ pattern.
 [float]
 === Configuration
 
-The `simplepatternsplit` tokenizer accepts the following parameters:
+The `simple_pattern_split` tokenizer accepts the following parameters:
 
 [horizontal]
 `pattern`::
@@ -32,7 +32,7 @@ The `simplepatternsplit` tokenizer accepts the following parameters:
 [float]
 === Example configuration
 
-This example configures the `simplepatternsplit` tokenizer to split the input
+This example configures the `simple_pattern_split` tokenizer to split the input
 text on underscores.
 
 [source,js]
@@ -48,7 +48,7 @@ PUT my_index
       },
       "tokenizer": {
         "my_tokenizer": {
-          "type": "simplepatternsplit",
+          "type": "simple_pattern_split",
           "pattern": "_"
         }
       }
diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
@@ -122,8 +122,8 @@ public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
     @Override
     public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
         Map<String, AnalysisProvider<TokenizerFactory>> tokenizers = new TreeMap<>();
-        tokenizers.put("simplepattern", SimplePatternTokenizerFactory::new);
-        tokenizers.put("simplepatternsplit", SimplePatternSplitTokenizerFactory::new);
+        tokenizers.put("simple_pattern", SimplePatternTokenizerFactory::new);
+        tokenizers.put("simple_pattern_split", SimplePatternSplitTokenizerFactory::new);
         return tokenizers;
     }
 
diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml
@@ -27,29 +27,29 @@
     - match:  { detail.tokenizer.tokens.2.token: od }
 
 ---
-"simplepattern":
+"simple_pattern":
     - do:
         indices.analyze:
           body:
             text: "a6bf fooo ff61"
             explain: true
             tokenizer:
-              type: simplepattern
+              type: simple_pattern
               pattern: "[abcdef0123456789]{4}"
     - length: { detail.tokenizer.tokens: 2 }
     - match:  { detail.tokenizer.name: _anonymous_tokenizer }
     - match:  { detail.tokenizer.tokens.0.token: a6bf }
     - match:  { detail.tokenizer.tokens.1.token: ff61 }
 
 ---
-"simplepatternsplit":
+"simple_pattern_split":
     - do:
         indices.analyze:
           body:
             text: "foo==bar"
             explain: true
             tokenizer:
-              type: simplepatternsplit
+              type: simple_pattern_split
               pattern: ==
     - length: { detail.tokenizer.tokens: 2 }
     - match:  { detail.tokenizer.name: _anonymous_tokenizer }