diff --git a/buildSrc/src/main/resources/checkstyle_suppressions.xml b/buildSrc/src/main/resources/checkstyle_suppressions.xml
index 79e4e744445d4..4c62693a34a67 100644
--- a/buildSrc/src/main/resources/checkstyle_suppressions.xml
+++ b/buildSrc/src/main/resources/checkstyle_suppressions.xml
@@ -267,7 +267,6 @@
-
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/compound/AbstractCompoundWordTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/analysis/common/AbstractCompoundWordTokenFilterFactory.java
similarity index 93%
rename from core/src/main/java/org/elasticsearch/index/analysis/compound/AbstractCompoundWordTokenFilterFactory.java
rename to core/src/main/java/org/elasticsearch/analysis/common/AbstractCompoundWordTokenFilterFactory.java
index 91c984c7a6b70..b59cc166f09a5 100644
--- a/core/src/main/java/org/elasticsearch/index/analysis/compound/AbstractCompoundWordTokenFilterFactory.java
+++ b/core/src/main/java/org/elasticsearch/analysis/common/AbstractCompoundWordTokenFilterFactory.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.elasticsearch.index.analysis.compound;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.compound.CompoundWordTokenFilterBase;
@@ -38,7 +38,7 @@ public abstract class AbstractCompoundWordTokenFilterFactory extends AbstractTok
protected final boolean onlyLongestMatch;
protected final CharArraySet wordList;
- public AbstractCompoundWordTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+ protected AbstractCompoundWordTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
minWordSize = settings.getAsInt("min_word_size", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
index 9220c063715ad..2657c9f7981ec 100644
--- a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
+++ b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
@@ -55,7 +55,6 @@
import org.elasticsearch.index.analysis.DutchAnalyzerProvider;
import org.elasticsearch.index.analysis.DutchStemTokenFilterFactory;
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
-import org.elasticsearch.index.analysis.ElisionTokenFilterFactory;
import org.elasticsearch.index.analysis.EnglishAnalyzerProvider;
import org.elasticsearch.index.analysis.FingerprintAnalyzerProvider;
import org.elasticsearch.index.analysis.FingerprintTokenFilterFactory;
@@ -75,7 +74,6 @@
import org.elasticsearch.index.analysis.IndonesianAnalyzerProvider;
import org.elasticsearch.index.analysis.IrishAnalyzerProvider;
import org.elasticsearch.index.analysis.ItalianAnalyzerProvider;
-import org.elasticsearch.index.analysis.KStemTokenFilterFactory;
import org.elasticsearch.index.analysis.KeepTypesFilterFactory;
import org.elasticsearch.index.analysis.KeepWordFilterFactory;
import org.elasticsearch.index.analysis.KeywordAnalyzerProvider;
@@ -99,7 +97,6 @@
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
-import org.elasticsearch.index.analysis.ReverseTokenFilterFactory;
import org.elasticsearch.index.analysis.RomanianAnalyzerProvider;
import org.elasticsearch.index.analysis.RussianAnalyzerProvider;
import org.elasticsearch.index.analysis.RussianStemTokenFilterFactory;
@@ -116,8 +113,6 @@
import org.elasticsearch.index.analysis.StandardHtmlStripAnalyzerProvider;
import org.elasticsearch.index.analysis.StandardTokenFilterFactory;
import org.elasticsearch.index.analysis.StandardTokenizerFactory;
-import org.elasticsearch.index.analysis.StemmerOverrideTokenFilterFactory;
-import org.elasticsearch.index.analysis.StemmerTokenFilterFactory;
import org.elasticsearch.index.analysis.StopAnalyzerProvider;
import org.elasticsearch.index.analysis.StopTokenFilterFactory;
import org.elasticsearch.index.analysis.SwedishAnalyzerProvider;
@@ -125,13 +120,10 @@
import org.elasticsearch.index.analysis.ThaiTokenizerFactory;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
-import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
import org.elasticsearch.index.analysis.TurkishAnalyzerProvider;
import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
import org.elasticsearch.index.analysis.WhitespaceAnalyzerProvider;
import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory;
-import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
-import org.elasticsearch.index.analysis.compound.HyphenationCompoundWordTokenFilterFactory;
import org.elasticsearch.plugins.AnalysisPlugin;
import java.io.IOException;
@@ -201,23 +193,16 @@ private NamedRegistry> setupTokenFilters(Li
hunspellService) {
NamedRegistry> tokenFilters = new NamedRegistry<>("token_filter");
tokenFilters.register("stop", StopTokenFilterFactory::new);
- tokenFilters.register("reverse", ReverseTokenFilterFactory::new);
- tokenFilters.register("kstem", KStemTokenFilterFactory::new);
tokenFilters.register("standard", StandardTokenFilterFactory::new);
tokenFilters.register("shingle", ShingleTokenFilterFactory::new);
tokenFilters.register("min_hash", MinHashTokenFilterFactory::new);
- tokenFilters.register("truncate", requriesAnalysisSettings(TruncateTokenFilterFactory::new));
tokenFilters.register("limit", LimitTokenCountFilterFactory::new);
tokenFilters.register("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
- tokenFilters.register("stemmer", StemmerTokenFilterFactory::new);
tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
- tokenFilters.register("elision", ElisionTokenFilterFactory::new);
tokenFilters.register("keep", requriesAnalysisSettings(KeepWordFilterFactory::new));
tokenFilters.register("keep_types", requriesAnalysisSettings(KeepTypesFilterFactory::new));
tokenFilters.register("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
tokenFilters.register("pattern_replace", requriesAnalysisSettings(PatternReplaceTokenFilterFactory::new));
- tokenFilters.register("dictionary_decompounder", requriesAnalysisSettings(DictionaryCompoundWordTokenFilterFactory::new));
- tokenFilters.register("hyphenation_decompounder", requriesAnalysisSettings(HyphenationCompoundWordTokenFilterFactory::new));
tokenFilters.register("arabic_stem", ArabicStemTokenFilterFactory::new);
tokenFilters.register("brazilian_stem", BrazilianStemTokenFilterFactory::new);
tokenFilters.register("czech_stem", CzechStemTokenFilterFactory::new);
@@ -225,7 +210,6 @@ private NamedRegistry> setupTokenFilters(Li
tokenFilters.register("french_stem", FrenchStemTokenFilterFactory::new);
tokenFilters.register("german_stem", GermanStemTokenFilterFactory::new);
tokenFilters.register("russian_stem", RussianStemTokenFilterFactory::new);
- tokenFilters.register("stemmer_override", requriesAnalysisSettings(StemmerOverrideTokenFilterFactory::new));
tokenFilters.register("arabic_normalization", ArabicNormalizationFilterFactory::new);
tokenFilters.register("german_normalization", GermanNormalizationFilterFactory::new);
tokenFilters.register("hindi_normalization", HindiNormalizationFilterFactory::new);
diff --git a/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java b/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java
index b3394d4f4fade..a740f96cdd83f 100644
--- a/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java
+++ b/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java
@@ -47,7 +47,7 @@
import org.elasticsearch.index.analysis.StandardTokenizerFactory;
import org.elasticsearch.index.analysis.StopTokenFilterFactory;
import org.elasticsearch.index.analysis.TokenFilterFactory;
-import org.elasticsearch.index.analysis.filter1.MyFilterTokenFilterFactory;
+import org.elasticsearch.index.analysis.MyFilterTokenFilterFactory;
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.test.ESTestCase;
@@ -196,18 +196,6 @@ private void testSimpleConfiguration(Settings settings) throws IOException {
// assertThat(czechstemmeranalyzer.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class));
// assertThat(czechstemmeranalyzer.tokenFilters().length, equalTo(4));
// assertThat(czechstemmeranalyzer.tokenFilters()[3], instanceOf(CzechStemTokenFilterFactory.class));
-//
-// // check dictionary decompounder
-// analyzer = analysisService.analyzer("decompoundingAnalyzer").analyzer();
-// assertThat(analyzer, instanceOf(CustomAnalyzer.class));
-// CustomAnalyzer dictionaryDecompounderAnalyze = (CustomAnalyzer) analyzer;
-// assertThat(dictionaryDecompounderAnalyze.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class));
-// assertThat(dictionaryDecompounderAnalyze.tokenFilters().length, equalTo(1));
-// assertThat(dictionaryDecompounderAnalyze.tokenFilters()[0], instanceOf(DictionaryCompoundWordTokenFilterFactory.class));
-
- Set> wordList = Analysis.getWordSet(null, Version.CURRENT, settings, "index.analysis.filter.dict_dec.word_list");
- MatcherAssert.assertThat(wordList.size(), equalTo(6));
-// MatcherAssert.assertThat(wordList, hasItems("donau", "dampf", "schiff", "spargel", "creme", "suppe"));
}
public void testWordListPath() throws Exception {
diff --git a/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java b/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java
index dd556c56e308d..6e0c61c1544dd 100644
--- a/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java
+++ b/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java
@@ -93,16 +93,16 @@ public void testAnalyzeWithNoIndex() throws Exception {
assertThat(analyzeResponse.getTokens().size(), equalTo(1));
assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("this is a test"));
- analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("standard").addTokenFilter("lowercase").addTokenFilter("reverse").get();
+ analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("standard").addTokenFilter("lowercase").get();
assertThat(analyzeResponse.getTokens().size(), equalTo(4));
AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(0);
- assertThat(token.getTerm(), equalTo("siht"));
+ assertThat(token.getTerm(), equalTo("this"));
token = analyzeResponse.getTokens().get(1);
- assertThat(token.getTerm(), equalTo("si"));
+ assertThat(token.getTerm(), equalTo("is"));
token = analyzeResponse.getTokens().get(2);
assertThat(token.getTerm(), equalTo("a"));
token = analyzeResponse.getTokens().get(3);
- assertThat(token.getTerm(), equalTo("tset"));
+ assertThat(token.getTerm(), equalTo("test"));
analyzeResponse = client().admin().indices().prepareAnalyze("of course").setTokenizer("standard").addTokenFilter("stop").get();
assertThat(analyzeResponse.getTokens().size(), equalTo(1));
diff --git a/core/src/test/java/org/elasticsearch/search/suggest/SuggestSearchIT.java b/core/src/test/java/org/elasticsearch/search/suggest/SuggestSearchIT.java
index 035fd847ad243..5142c25229d58 100644
--- a/core/src/test/java/org/elasticsearch/search/suggest/SuggestSearchIT.java
+++ b/core/src/test/java/org/elasticsearch/search/suggest/SuggestSearchIT.java
@@ -445,8 +445,6 @@ public void testStopwordsOnlyPhraseSuggest() throws IOException {
public void testPrefixLength() throws IOException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder()
.put(SETTING_NUMBER_OF_SHARDS, 1)
- .put("index.analysis.analyzer.reverse.tokenizer", "standard")
- .putArray("index.analysis.analyzer.reverse.filter", "lowercase", "reverse")
.put("index.analysis.analyzer.body.tokenizer", "standard")
.putArray("index.analysis.analyzer.body.filter", "lowercase")
.put("index.analysis.analyzer.bigram.tokenizer", "standard")
@@ -458,7 +456,6 @@ public void testPrefixLength() throws IOException {
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties")
.startObject("body").field("type", "text").field("analyzer", "body").endObject()
- .startObject("body_reverse").field("type", "text").field("analyzer", "reverse").endObject()
.startObject("bigram").field("type", "text").field("analyzer", "bigram").endObject()
.endObject()
.endObject().endObject();
@@ -486,8 +483,6 @@ public void testPrefixLength() throws IOException {
public void testBasicPhraseSuggest() throws IOException, URISyntaxException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder()
.put(indexSettings())
- .put("index.analysis.analyzer.reverse.tokenizer", "standard")
- .putArray("index.analysis.analyzer.reverse.filter", "lowercase", "reverse")
.put("index.analysis.analyzer.body.tokenizer", "standard")
.putArray("index.analysis.analyzer.body.filter", "lowercase")
.put("index.analysis.analyzer.bigram.tokenizer", "standard")
@@ -503,10 +498,6 @@ public void testBasicPhraseSuggest() throws IOException, URISyntaxException {
field("type", "text").
field("analyzer", "body")
.endObject()
- .startObject("body_reverse").
- field("type", "text").
- field("analyzer", "reverse")
- .endObject()
.startObject("bigram").
field("type", "text").
field("analyzer", "bigram")
@@ -536,7 +527,7 @@ public void testBasicPhraseSuggest() throws IOException, URISyntaxException {
"Police sergeant who stops the film",
};
for (String line : strings) {
- index("test", "type1", line, "body", line, "body_reverse", line, "bigram", line);
+ index("test", "type1", line, "body", line, "bigram", line);
}
refresh();
@@ -576,14 +567,6 @@ public void testBasicPhraseSuggest() throws IOException, URISyntaxException {
searchSuggest = searchSuggest( "Arthur, King of the Britons", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons");
- //test reverse suggestions with pre & post filter
- phraseSuggest
- .addCandidateGenerator(candidateGenerator("body").minWordLength(1).suggestMode("always"))
- .addCandidateGenerator(candidateGenerator("body_reverse").minWordLength(1).suggestMode("always").preFilter("reverse")
- .postFilter("reverse"));
- searchSuggest = searchSuggest( "Artur, Ging of the Britons", "simple_phrase", phraseSuggest);
- assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons");
-
// set all mass to trigrams (not indexed)
phraseSuggest.clearCandidateGenerators()
.addCandidateGenerator(candidateGenerator("body").minWordLength(1).suggestMode("always"))
@@ -633,8 +616,6 @@ public void testBasicPhraseSuggest() throws IOException, URISyntaxException {
public void testSizeParam() throws IOException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder()
.put(SETTING_NUMBER_OF_SHARDS, 1)
- .put("index.analysis.analyzer.reverse.tokenizer", "standard")
- .putArray("index.analysis.analyzer.reverse.filter", "lowercase", "reverse")
.put("index.analysis.analyzer.body.tokenizer", "standard")
.putArray("index.analysis.analyzer.body.filter", "lowercase")
.put("index.analysis.analyzer.bigram.tokenizer", "standard")
@@ -652,10 +633,6 @@ public void testSizeParam() throws IOException {
.field("type", "text")
.field("analyzer", "body")
.endObject()
- .startObject("body_reverse")
- .field("type", "text")
- .field("analyzer", "reverse")
- .endObject()
.startObject("bigram")
.field("type", "text")
.field("analyzer", "bigram")
@@ -667,9 +644,9 @@ public void testSizeParam() throws IOException {
ensureGreen();
String line = "xorr the god jewel";
- index("test", "type1", "1", "body", line, "body_reverse", line, "bigram", line);
+ index("test", "type1", "1", "body", line, "bigram", line);
line = "I got it this time";
- index("test", "type1", "2", "body", line, "body_reverse", line, "bigram", line);
+ index("test", "type1", "2", "body", line, "bigram", line);
refresh();
PhraseSuggestionBuilder phraseSuggestion = phraseSuggestion("bigram")
diff --git a/core/src/test/resources/org/elasticsearch/index/analysis/test1.json b/core/src/test/resources/org/elasticsearch/index/analysis/test1.json
index 38937a9b5af93..f2b600177212c 100644
--- a/core/src/test/resources/org/elasticsearch/index/analysis/test1.json
+++ b/core/src/test/resources/org/elasticsearch/index/analysis/test1.json
@@ -17,10 +17,6 @@
},
"my":{
"type":"myfilter"
- },
- "dict_dec":{
- "type":"dictionary_decompounder",
- "word_list":["donau", "dampf", "schiff", "spargel", "creme", "suppe"]
}
},
"analyzer":{
@@ -43,10 +39,6 @@
"czechAnalyzerWithStemmer":{
"tokenizer":"standard",
"filter":["standard", "lowercase", "stop", "czech_stem"]
- },
- "decompoundingAnalyzer":{
- "tokenizer":"standard",
- "filter":["dict_dec"]
}
}
}
diff --git a/core/src/test/resources/org/elasticsearch/index/analysis/test1.yml b/core/src/test/resources/org/elasticsearch/index/analysis/test1.yml
index f7a57d14dbe3d..e9965467251e2 100644
--- a/core/src/test/resources/org/elasticsearch/index/analysis/test1.yml
+++ b/core/src/test/resources/org/elasticsearch/index/analysis/test1.yml
@@ -12,9 +12,6 @@ index :
stopwords : [stop2-1, stop2-2]
my :
type : myfilter
- dict_dec :
- type : dictionary_decompounder
- word_list : [donau, dampf, schiff, spargel, creme, suppe]
analyzer :
standard :
type : standard
@@ -34,6 +31,3 @@ index :
czechAnalyzerWithStemmer :
tokenizer : standard
filter : [standard, lowercase, stop, czech_stem]
- decompoundingAnalyzer :
- tokenizer : standard
- filter : [dict_dec]
diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
index 0299e37affc09..18e34d381a1f5 100644
--- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
@@ -107,6 +107,14 @@ public Map> getTokenFilters() {
filters.put("ngram", NGramTokenFilterFactory::new);
filters.put("edgeNGram", EdgeNGramTokenFilterFactory::new);
filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new);
+ filters.put("stemmer", StemmerTokenFilterFactory::new);
+ filters.put("stemmer_override", requriesAnalysisSettings(StemmerOverrideTokenFilterFactory::new));
+ filters.put("kstem", KStemTokenFilterFactory::new);
+ filters.put("dictionary_decompounder", requriesAnalysisSettings(DictionaryCompoundWordTokenFilterFactory::new));
+ filters.put("hyphenation_decompounder", requriesAnalysisSettings(HyphenationCompoundWordTokenFilterFactory::new));
+ filters.put("reverse", ReverseTokenFilterFactory::new);
+ filters.put("elision", ElisionTokenFilterFactory::new);
+ filters.put("truncate", requriesAnalysisSettings(TruncateTokenFilterFactory::new));
return filters;
}
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/DictionaryCompoundWordTokenFilterFactory.java
similarity index 90%
rename from core/src/main/java/org/elasticsearch/index/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java
rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/DictionaryCompoundWordTokenFilterFactory.java
index fc9719d36b128..e9e690e0b0197 100644
--- a/core/src/main/java/org/elasticsearch/index/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/DictionaryCompoundWordTokenFilterFactory.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.elasticsearch.index.analysis.compound;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.compound.DictionaryCompoundWordTokenFilter;
@@ -33,7 +33,7 @@
*/
public class DictionaryCompoundWordTokenFilterFactory extends AbstractCompoundWordTokenFilterFactory {
- public DictionaryCompoundWordTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+ DictionaryCompoundWordTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, env, name, settings);
}
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/ElisionTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ElisionTokenFilterFactory.java
similarity index 82%
rename from core/src/main/java/org/elasticsearch/index/analysis/ElisionTokenFilterFactory.java
rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ElisionTokenFilterFactory.java
index 401f2caf03fdb..94fc52165dd23 100644
--- a/core/src/main/java/org/elasticsearch/index/analysis/ElisionTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ElisionTokenFilterFactory.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
@@ -25,12 +25,15 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
+import org.elasticsearch.index.analysis.Analysis;
+import org.elasticsearch.index.analysis.MultiTermAwareComponent;
public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
private final CharArraySet articles;
- public ElisionTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+ ElisionTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
this.articles = Analysis.parseArticles(env, indexSettings.getIndexVersionCreated(), settings);
}
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/compound/HyphenationCompoundWordTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/HyphenationCompoundWordTokenFilterFactory.java
similarity index 88%
rename from core/src/main/java/org/elasticsearch/index/analysis/compound/HyphenationCompoundWordTokenFilterFactory.java
rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/HyphenationCompoundWordTokenFilterFactory.java
index 152d4395ef3d7..b24eb2c4fbc4d 100644
--- a/core/src/main/java/org/elasticsearch/index/analysis/compound/HyphenationCompoundWordTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/HyphenationCompoundWordTokenFilterFactory.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.elasticsearch.index.analysis.compound;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.compound.HyphenationCompoundWordTokenFilter;
@@ -27,6 +27,7 @@
import org.elasticsearch.index.IndexSettings;
import org.xml.sax.InputSource;
+import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
@@ -39,7 +40,7 @@ public class HyphenationCompoundWordTokenFilterFactory extends AbstractCompoundW
private final HyphenationTree hyphenationTree;
- public HyphenationCompoundWordTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+ HyphenationCompoundWordTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, env, name, settings);
String hyphenationPatternsPath = settings.get("hyphenation_patterns_path", null);
@@ -50,7 +51,8 @@ public HyphenationCompoundWordTokenFilterFactory(IndexSettings indexSettings, En
Path hyphenationPatternsFile = env.configFile().resolve(hyphenationPatternsPath);
try {
- hyphenationTree = HyphenationCompoundWordTokenFilter.getHyphenationTree(new InputSource(Files.newInputStream(hyphenationPatternsFile)));
+ InputStream in = Files.newInputStream(hyphenationPatternsFile);
+ hyphenationTree = HyphenationCompoundWordTokenFilter.getHyphenationTree(new InputSource(in));
} catch (Exception e) {
throw new IllegalArgumentException("Exception while reading hyphenation_patterns_path.", e);
}
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/KStemTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/KStemTokenFilterFactory.java
similarity index 84%
rename from core/src/main/java/org/elasticsearch/index/analysis/KStemTokenFilterFactory.java
rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/KStemTokenFilterFactory.java
index 24f92ece1016b..2100e02fb61ad 100644
--- a/core/src/main/java/org/elasticsearch/index/analysis/KStemTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/KStemTokenFilterFactory.java
@@ -17,17 +17,18 @@
* under the License.
*/
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.en.KStemFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
public class KStemTokenFilterFactory extends AbstractTokenFilterFactory {
- public KStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
+ KStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
}
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/ReverseTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ReverseTokenFilterFactory.java
similarity index 85%
rename from core/src/main/java/org/elasticsearch/index/analysis/ReverseTokenFilterFactory.java
rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ReverseTokenFilterFactory.java
index 1719841098d37..125e1e496b99e 100644
--- a/core/src/main/java/org/elasticsearch/index/analysis/ReverseTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ReverseTokenFilterFactory.java
@@ -17,17 +17,18 @@
* under the License.
*/
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
public class ReverseTokenFilterFactory extends AbstractTokenFilterFactory {
- public ReverseTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
+ ReverseTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
}
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/StemmerOverrideTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerOverrideTokenFilterFactory.java
similarity index 90%
rename from core/src/main/java/org/elasticsearch/index/analysis/StemmerOverrideTokenFilterFactory.java
rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerOverrideTokenFilterFactory.java
index 66643cc2396e9..f95b4ed76e713 100644
--- a/core/src/main/java/org/elasticsearch/index/analysis/StemmerOverrideTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerOverrideTokenFilterFactory.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
@@ -26,6 +26,8 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
+import org.elasticsearch.index.analysis.Analysis;
import java.io.IOException;
import java.util.List;
@@ -34,7 +36,7 @@ public class StemmerOverrideTokenFilterFactory extends AbstractTokenFilterFactor
private final StemmerOverrideMap overrideMap;
- public StemmerOverrideTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) throws IOException {
+ StemmerOverrideTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) throws IOException {
super(indexSettings, name, settings);
List rules = Analysis.getWordList(env, settings, "rules");
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java
similarity index 98%
rename from core/src/main/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactory.java
rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java
index bf83876259bad..c94a449afd2c1 100644
--- a/core/src/main/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ar.ArabicStemFilter;
@@ -57,6 +57,7 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.tartarus.snowball.ext.ArmenianStemmer;
import org.tartarus.snowball.ext.BasqueStemmer;
import org.tartarus.snowball.ext.CatalanStemmer;
@@ -86,7 +87,7 @@ public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {
private String language;
- public StemmerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
+ StemmerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
this.language = Strings.capitalize(settings.get("language", settings.get("name", "porter")));
}
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/TruncateTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/TruncateTokenFilterFactory.java
similarity index 86%
rename from core/src/main/java/org/elasticsearch/index/analysis/TruncateTokenFilterFactory.java
rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/TruncateTokenFilterFactory.java
index 49ea7d6940d67..823119646642f 100644
--- a/core/src/main/java/org/elasticsearch/index/analysis/TruncateTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/TruncateTokenFilterFactory.java
@@ -17,19 +17,20 @@
* under the License.
*/
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.TruncateTokenFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
public class TruncateTokenFilterFactory extends AbstractTokenFilterFactory {
private final int length;
- public TruncateTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
+ TruncateTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
this.length = settings.getAsInt("length", -1);
if (length <= 0) {
diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java
index a7dd261445294..37bf407df03ae 100644
--- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java
+++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java
@@ -26,6 +26,7 @@
import org.apache.lucene.analysis.reverse.ReverseStringFilterFactory;
import org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory;
import org.elasticsearch.index.analysis.HtmlStripCharFilterFactory;
+import org.elasticsearch.index.analysis.SynonymTokenFilterFactory;
import org.elasticsearch.indices.analysis.AnalysisFactoryTestCase;
import java.util.List;
@@ -67,6 +68,39 @@ protected Map> getTokenFilters() {
filters.put("uppercase", UpperCaseTokenFilterFactory.class);
filters.put("ngram", NGramTokenFilterFactory.class);
filters.put("edgengram", EdgeNGramTokenFilterFactory.class);
+ filters.put("bulgarianstem", StemmerTokenFilterFactory.class);
+ filters.put("englishminimalstem", StemmerTokenFilterFactory.class);
+ filters.put("englishpossessive", StemmerTokenFilterFactory.class);
+ filters.put("finnishlightstem", StemmerTokenFilterFactory.class);
+ filters.put("frenchlightstem", StemmerTokenFilterFactory.class);
+ filters.put("frenchminimalstem", StemmerTokenFilterFactory.class);
+ filters.put("galicianminimalstem", StemmerTokenFilterFactory.class);
+ filters.put("galicianstem", StemmerTokenFilterFactory.class);
+ filters.put("germanlightstem", StemmerTokenFilterFactory.class);
+ filters.put("germanminimalstem", StemmerTokenFilterFactory.class);
+ filters.put("greekstem", StemmerTokenFilterFactory.class);
+ filters.put("hindistem", StemmerTokenFilterFactory.class);
+ filters.put("hungarianlightstem", StemmerTokenFilterFactory.class);
+ filters.put("indonesianstem", StemmerTokenFilterFactory.class);
+ filters.put("italianlightstem", StemmerTokenFilterFactory.class);
+ filters.put("latvianstem", StemmerTokenFilterFactory.class);
+ filters.put("norwegianlightstem", StemmerTokenFilterFactory.class);
+ filters.put("norwegianminimalstem", StemmerTokenFilterFactory.class);
+ filters.put("portuguesestem", StemmerTokenFilterFactory.class);
+ filters.put("portugueselightstem", StemmerTokenFilterFactory.class);
+ filters.put("portugueseminimalstem", StemmerTokenFilterFactory.class);
+ filters.put("russianlightstem", StemmerTokenFilterFactory.class);
+ filters.put("soranistem", StemmerTokenFilterFactory.class);
+ filters.put("spanishlightstem", StemmerTokenFilterFactory.class);
+ filters.put("swedishlightstem", StemmerTokenFilterFactory.class);
+ filters.put("stemmeroverride", StemmerOverrideTokenFilterFactory.class);
+ filters.put("kstem", KStemTokenFilterFactory.class);
+ filters.put("synonym", SynonymTokenFilterFactory.class);
+ filters.put("dictionarycompoundword", DictionaryCompoundWordTokenFilterFactory.class);
+ filters.put("hyphenationcompoundword", HyphenationCompoundWordTokenFilterFactory.class);
+ filters.put("reversestring", ReverseTokenFilterFactory.class);
+ filters.put("elision", ElisionTokenFilterFactory.class);
+ filters.put("truncate", TruncateTokenFilterFactory.class);
return filters;
}
diff --git a/core/src/test/java/org/elasticsearch/index/analysis/CompoundAnalysisTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CompoundAnalysisTests.java
similarity index 84%
rename from core/src/test/java/org/elasticsearch/index/analysis/CompoundAnalysisTests.java
rename to modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CompoundAnalysisTests.java
index e8734331167d5..13b512f86e0a5 100644
--- a/core/src/test/java/org/elasticsearch/index/analysis/CompoundAnalysisTests.java
+++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CompoundAnalysisTests.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@@ -29,8 +29,9 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
-import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
-import org.elasticsearch.index.analysis.filter1.MyFilterTokenFilterFactory;
+import org.elasticsearch.index.analysis.IndexAnalyzers;
+import org.elasticsearch.index.analysis.MyFilterTokenFilterFactory;
+import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.indices.analysis.AnalysisModule;
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
import org.elasticsearch.plugins.AnalysisPlugin;
@@ -40,10 +41,10 @@
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
import java.util.Map;
-import static java.util.Collections.singletonList;
import static java.util.Collections.singletonMap;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasItems;
@@ -53,12 +54,7 @@ public class CompoundAnalysisTests extends ESTestCase {
public void testDefaultsCompoundAnalysis() throws Exception {
Settings settings = getJsonSettings();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
- AnalysisModule analysisModule = new AnalysisModule(new Environment(settings), singletonList(new AnalysisPlugin() {
- @Override
- public Map> getTokenFilters() {
- return singletonMap("myfilter", MyFilterTokenFilterFactory::new);
- }
- }));
+ AnalysisModule analysisModule = createAnalysisModule(settings);
TokenFilterFactory filterFactory = analysisModule.getAnalysisRegistry().buildTokenFilterFactories(idxSettings).get("dict_dec");
MatcherAssert.assertThat(filterFactory, instanceOf(DictionaryCompoundWordTokenFilterFactory.class));
}
@@ -75,12 +71,7 @@ public void testDictionaryDecompounder() throws Exception {
private List analyze(Settings settings, String analyzerName, String text) throws IOException {
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
- AnalysisModule analysisModule = new AnalysisModule(new Environment(settings), singletonList(new AnalysisPlugin() {
- @Override
- public Map> getTokenFilters() {
- return singletonMap("myfilter", MyFilterTokenFilterFactory::new);
- }
- }));
+ AnalysisModule analysisModule = createAnalysisModule(settings);
IndexAnalyzers indexAnalyzers = analysisModule.getAnalysisRegistry().build(idxSettings);
Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer();
@@ -99,8 +90,18 @@ public Map> getTokenFilters() {
return terms;
}
+ private AnalysisModule createAnalysisModule(Settings settings) throws IOException {
+ CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin();
+ return new AnalysisModule(new Environment(settings), Arrays.asList(commonAnalysisPlugin, new AnalysisPlugin() {
+ @Override
+ public Map> getTokenFilters() {
+ return singletonMap("myfilter", MyFilterTokenFilterFactory::new);
+ }
+ }));
+ }
+
private Settings getJsonSettings() throws IOException {
- String json = "/org/elasticsearch/index/analysis/test1.json";
+ String json = "/org/elasticsearch/analysis/common/test1.json";
return Settings.builder()
.loadFromStream(json, getClass().getResourceAsStream(json))
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
@@ -109,7 +110,7 @@ private Settings getJsonSettings() throws IOException {
}
private Settings getYamlSettings() throws IOException {
- String yaml = "/org/elasticsearch/index/analysis/test1.yml";
+ String yaml = "/org/elasticsearch/analysis/common/test1.yml";
return Settings.builder()
.loadFromStream(yaml, getClass().getResourceAsStream(yaml))
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
diff --git a/core/src/test/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactoryTests.java
similarity index 90%
rename from core/src/test/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactoryTests.java
rename to modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactoryTests.java
index c4632e5749012..10f7653c52c47 100644
--- a/core/src/test/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactoryTests.java
+++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactoryTests.java
@@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
@@ -26,6 +26,10 @@
import org.elasticsearch.Version;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
+import org.elasticsearch.index.analysis.AnalysisTestsHelper;
+import org.elasticsearch.index.analysis.IndexAnalyzers;
+import org.elasticsearch.index.analysis.NamedAnalyzer;
+import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.ESTokenStreamTestCase;
import org.elasticsearch.test.VersionUtils;
@@ -38,6 +42,9 @@
import static org.hamcrest.Matchers.instanceOf;
public class StemmerTokenFilterFactoryTests extends ESTokenStreamTestCase {
+
+ private static final CommonAnalysisPlugin PLUGIN = new CommonAnalysisPlugin();
+
public void testEnglishFilterFactory() throws IOException {
int iters = scaledRandomIntBetween(20, 100);
for (int i = 0; i < iters; i++) {
@@ -51,7 +58,7 @@ public void testEnglishFilterFactory() throws IOException {
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
- ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
+ ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_english");
assertThat(tokenFilter, instanceOf(StemmerTokenFilterFactory.class));
Tokenizer tokenizer = new WhitespaceTokenizer();
@@ -79,7 +86,7 @@ public void testPorter2FilterFactory() throws IOException {
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
- ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
+ ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_porter2");
assertThat(tokenFilter, instanceOf(StemmerTokenFilterFactory.class));
Tokenizer tokenizer = new WhitespaceTokenizer();
diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml
index 1d3075e28f84a..2283634a80a7e 100644
--- a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml
+++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml
@@ -392,3 +392,179 @@
- match: { tokens.1.token: foob }
- match: { tokens.2.token: fooba }
- match: { tokens.3.token: foobar }
+
+---
+"kstem":
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ analysis:
+ filter:
+ my_kstem:
+ type: kstem
+ - do:
+ indices.analyze:
+ index: test
+ body:
+ text: bricks
+ tokenizer: keyword
+ filter: [my_kstem]
+ - length: { tokens: 1 }
+ - match: { tokens.0.token: brick }
+
+ # use preconfigured token filter:
+ - do:
+ indices.analyze:
+ body:
+ text: bricks
+ tokenizer: keyword
+ filter: [kstem]
+ - length: { tokens: 1 }
+ - match: { tokens.0.token: brick }
+
+---
+"reverse":
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ analysis:
+ filter:
+ my_reverse:
+ type: reverse
+ - do:
+ indices.analyze:
+ index: test
+ body:
+ text: foobar
+ tokenizer: keyword
+ filter: [my_reverse]
+ - length: { tokens: 1 }
+ - match: { tokens.0.token: raboof }
+
+ # use preconfigured token filter:
+ - do:
+ indices.analyze:
+ body:
+ text: foobar
+ tokenizer: keyword
+ filter: [reverse]
+ - length: { tokens: 1 }
+ - match: { tokens.0.token: raboof }
+
+---
+"elision":
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ analysis:
+ filter:
+ my_elision:
+ type: elision
+ articles: ["l", "m", "t", "qu", "n", "s", "j"]
+ - do:
+ indices.analyze:
+ index: test
+ body:
+ text: "l'avion"
+ tokenizer: keyword
+ filter: [my_elision]
+ - length: { tokens: 1 }
+ - match: { tokens.0.token: avion }
+
+---
+"stemmer":
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ analysis:
+ filter:
+ my_stemmer:
+ type: stemmer
+ language: dutch
+ - do:
+ indices.analyze:
+ index: test
+ body:
+ text: zoeken
+ tokenizer: keyword
+ filter: [my_stemmer]
+ - length: { tokens: 1 }
+ - match: { tokens.0.token: zoek }
+---
+"stemmer_override":
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ analysis:
+ filter:
+ my_stemmer:
+ type: stemmer
+ language: dutch
+ my_stemmer_override:
+ type: stemmer_override
+ rules: ["zoeken => override"]
+ - do:
+ indices.analyze:
+ index: test
+ body:
+ text: zoeken
+ tokenizer: keyword
+ filter: [my_stemmer_override, my_stemmer]
+ - length: { tokens: 1 }
+ - match: { tokens.0.token: override }
+
+---
+"decompounder":
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ analysis:
+ filter:
+ my_decompounder:
+ type: dictionary_decompounder
+ word_list: [foo, bar]
+ - do:
+ indices.analyze:
+ index: test
+ body:
+ text: foobar
+ tokenizer: keyword
+ filter: [my_decompounder]
+ - length: { tokens: 3 }
+ - match: { tokens.0.token: foobar }
+ - match: { tokens.1.token: foo }
+ - match: { tokens.2.token: bar }
+
+---
+"truncate":
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ analysis:
+ filter:
+ my_truncate:
+ type: truncate
+ length: 3
+ - do:
+ indices.analyze:
+ index: test
+ body:
+ text: foobar
+ tokenizer: keyword
+ filter: [my_truncate]
+ - length: { tokens: 1 }
+ - match: { tokens.0.token: foo }
diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/search.suggest/20_phrase.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/search.suggest/20_phrase.yml
index cf5ebcea42edb..18c3c8146255f 100644
--- a/modules/analysis-common/src/test/resources/rest-api-spec/test/search.suggest/20_phrase.yml
+++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/search.suggest/20_phrase.yml
@@ -19,6 +19,9 @@ setup:
ngram:
tokenizer: standard
filter: [lowercase, ngram]
+ reverse:
+ tokenizer: standard
+ filter: [lowercase, reverse]
filter:
bigram:
type: shingle
@@ -43,6 +46,9 @@ setup:
ngram:
type: text
analyzer: ngram
+ reverse:
+ type: text
+ analyzer: reverse
- do:
bulk:
@@ -54,6 +60,40 @@ setup:
{ "body": "Xorr the God-Jewel" }
{ "index": {} }
{ "body": "Xorn" }
+ { "index": {} }
+ { "body": "Arthur, King of the Britons" }
+ { "index": {} }
+ { "body": "Sir Lancelot the Brave" }
+ { "index": {} }
+ { "body": "Patsy, Arthur's Servant" }
+ { "index": {} }
+ { "body": "Sir Robin the Not-Quite-So-Brave-as-Sir-Lancelot" }
+ { "index": {} }
+ { "body": "Sir Bedevere the Wise" }
+ { "index": {} }
+ { "body": "Sir Galahad the Pure" }
+ { "index": {} }
+ { "body": "Miss Islington, the Witch" }
+ { "index": {} }
+ { "body": "Zoot" }
+ { "index": {} }
+ { "body": "Leader of Robin's Minstrels" }
+ { "index": {} }
+ { "body": "Old Crone" }
+ { "index": {} }
+ { "body": "Frank, the Historian" }
+ { "index": {} }
+ { "body": "Frank's Wife" }
+ { "index": {} }
+ { "body": "Dr. Piglet" }
+ { "index": {} }
+ { "body": "Dr. Winston" }
+ { "index": {} }
+ { "body": "Sir Robin (Stand-in)" }
+ { "index": {} }
+ { "body": "Knight Who Says Ni" }
+ { "index": {} }
+ { "body": "Police sergeant who stops the film" }
---
"sorts by score":
@@ -156,3 +196,27 @@ setup:
field: body.bigram
analyzer: bigram
force_unigrams: false
+
+---
+"reverse suggestions":
+ - do:
+ search:
+ size: 0
+ index: test
+ body:
+ suggest:
+ text: Artur, Ging of the Britons
+ test:
+ phrase:
+ field: body.ngram
+ force_unigrams: true
+ max_errors: 0.5
+ direct_generator:
+ - field: body.reverse
+ min_word_length: 1
+ suggest_mode: always
+ pre_filter: reverse
+ post_filter: reverse
+
+ - match: {suggest.test.0.options.0.text: arthur king of the britons}
+
diff --git a/core/src/test/java/org/elasticsearch/index/analysis/filter1/MyFilterTokenFilterFactory.java b/test/framework/src/main/java/org/elasticsearch/index/analysis/MyFilterTokenFilterFactory.java
similarity index 96%
rename from core/src/test/java/org/elasticsearch/index/analysis/filter1/MyFilterTokenFilterFactory.java
rename to test/framework/src/main/java/org/elasticsearch/index/analysis/MyFilterTokenFilterFactory.java
index 1c9a4798139ce..921a09e98e691 100644
--- a/core/src/test/java/org/elasticsearch/index/analysis/filter1/MyFilterTokenFilterFactory.java
+++ b/test/framework/src/main/java/org/elasticsearch/index/analysis/MyFilterTokenFilterFactory.java
@@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.elasticsearch.index.analysis.filter1;
+package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
diff --git a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java
index 76d170f7c2c78..97035623a6ce0 100644
--- a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java
+++ b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java
@@ -36,13 +36,11 @@
import org.elasticsearch.index.analysis.DecimalDigitFilterFactory;
import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
-import org.elasticsearch.index.analysis.ElisionTokenFilterFactory;
import org.elasticsearch.index.analysis.GermanNormalizationFilterFactory;
import org.elasticsearch.index.analysis.GermanStemTokenFilterFactory;
import org.elasticsearch.index.analysis.HindiNormalizationFilterFactory;
import org.elasticsearch.index.analysis.HunspellTokenFilterFactory;
import org.elasticsearch.index.analysis.IndicNormalizationFilterFactory;
-import org.elasticsearch.index.analysis.KStemTokenFilterFactory;
import org.elasticsearch.index.analysis.KeepTypesFilterFactory;
import org.elasticsearch.index.analysis.KeepWordFilterFactory;
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
@@ -60,7 +58,6 @@
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
-import org.elasticsearch.index.analysis.ReverseTokenFilterFactory;
import org.elasticsearch.index.analysis.ScandinavianFoldingFilterFactory;
import org.elasticsearch.index.analysis.ScandinavianNormalizationFilterFactory;
import org.elasticsearch.index.analysis.SerbianNormalizationFilterFactory;
@@ -68,17 +65,12 @@
import org.elasticsearch.index.analysis.SoraniNormalizationFilterFactory;
import org.elasticsearch.index.analysis.StandardTokenFilterFactory;
import org.elasticsearch.index.analysis.StandardTokenizerFactory;
-import org.elasticsearch.index.analysis.StemmerOverrideTokenFilterFactory;
-import org.elasticsearch.index.analysis.StemmerTokenFilterFactory;
import org.elasticsearch.index.analysis.StopTokenFilterFactory;
import org.elasticsearch.index.analysis.SynonymGraphTokenFilterFactory;
import org.elasticsearch.index.analysis.SynonymTokenFilterFactory;
import org.elasticsearch.index.analysis.ThaiTokenizerFactory;
-import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory;
-import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
-import org.elasticsearch.index.analysis.compound.HyphenationCompoundWordTokenFilterFactory;
import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.test.ESTestCase;
@@ -147,7 +139,7 @@ private static String toCamelCase(String s) {
.put("arabicstem", ArabicStemTokenFilterFactory.class)
.put("asciifolding", MovedToAnalysisCommon.class)
.put("brazilianstem", BrazilianStemTokenFilterFactory.class)
- .put("bulgarianstem", StemmerTokenFilterFactory.class)
+ .put("bulgarianstem", MovedToAnalysisCommon.class)
.put("cjkbigram", CJKBigramFilterFactory.class)
.put("cjkwidth", CJKWidthFilterFactory.class)
.put("classic", ClassicFilterFactory.class)
@@ -156,50 +148,50 @@ private static String toCamelCase(String s) {
.put("czechstem", CzechStemTokenFilterFactory.class)
.put("decimaldigit", DecimalDigitFilterFactory.class)
.put("delimitedpayload", DelimitedPayloadTokenFilterFactory.class)
- .put("dictionarycompoundword", DictionaryCompoundWordTokenFilterFactory.class)
+ .put("dictionarycompoundword", MovedToAnalysisCommon.class)
.put("edgengram", MovedToAnalysisCommon.class)
- .put("elision", ElisionTokenFilterFactory.class)
- .put("englishminimalstem", StemmerTokenFilterFactory.class)
- .put("englishpossessive", StemmerTokenFilterFactory.class)
- .put("finnishlightstem", StemmerTokenFilterFactory.class)
- .put("frenchlightstem", StemmerTokenFilterFactory.class)
- .put("frenchminimalstem", StemmerTokenFilterFactory.class)
- .put("galicianminimalstem", StemmerTokenFilterFactory.class)
- .put("galicianstem", StemmerTokenFilterFactory.class)
+ .put("elision", MovedToAnalysisCommon.class)
+ .put("englishminimalstem", MovedToAnalysisCommon.class)
+ .put("englishpossessive", MovedToAnalysisCommon.class)
+ .put("finnishlightstem", MovedToAnalysisCommon.class)
+ .put("frenchlightstem", MovedToAnalysisCommon.class)
+ .put("frenchminimalstem", MovedToAnalysisCommon.class)
+ .put("galicianminimalstem", MovedToAnalysisCommon.class)
+ .put("galicianstem", MovedToAnalysisCommon.class)
.put("germanstem", GermanStemTokenFilterFactory.class)
- .put("germanlightstem", StemmerTokenFilterFactory.class)
- .put("germanminimalstem", StemmerTokenFilterFactory.class)
+ .put("germanlightstem", MovedToAnalysisCommon.class)
+ .put("germanminimalstem", MovedToAnalysisCommon.class)
.put("germannormalization", GermanNormalizationFilterFactory.class)
.put("greeklowercase", MovedToAnalysisCommon.class)
- .put("greekstem", StemmerTokenFilterFactory.class)
+ .put("greekstem", MovedToAnalysisCommon.class)
.put("hindinormalization", HindiNormalizationFilterFactory.class)
- .put("hindistem", StemmerTokenFilterFactory.class)
- .put("hungarianlightstem", StemmerTokenFilterFactory.class)
+ .put("hindistem", MovedToAnalysisCommon.class)
+ .put("hungarianlightstem", MovedToAnalysisCommon.class)
.put("hunspellstem", HunspellTokenFilterFactory.class)
- .put("hyphenationcompoundword", HyphenationCompoundWordTokenFilterFactory.class)
+ .put("hyphenationcompoundword", MovedToAnalysisCommon.class)
.put("indicnormalization", IndicNormalizationFilterFactory.class)
.put("irishlowercase", MovedToAnalysisCommon.class)
- .put("indonesianstem", StemmerTokenFilterFactory.class)
- .put("italianlightstem", StemmerTokenFilterFactory.class)
+ .put("indonesianstem", MovedToAnalysisCommon.class)
+ .put("italianlightstem", MovedToAnalysisCommon.class)
.put("keepword", KeepWordFilterFactory.class)
.put("keywordmarker", MovedToAnalysisCommon.class)
- .put("kstem", KStemTokenFilterFactory.class)
- .put("latvianstem", StemmerTokenFilterFactory.class)
+ .put("kstem", MovedToAnalysisCommon.class)
+ .put("latvianstem", MovedToAnalysisCommon.class)
.put("length", MovedToAnalysisCommon.class)
.put("limittokencount", LimitTokenCountFilterFactory.class)
.put("lowercase", MovedToAnalysisCommon.class)
.put("ngram", MovedToAnalysisCommon.class)
- .put("norwegianlightstem", StemmerTokenFilterFactory.class)
- .put("norwegianminimalstem", StemmerTokenFilterFactory.class)
+ .put("norwegianlightstem", MovedToAnalysisCommon.class)
+ .put("norwegianminimalstem", MovedToAnalysisCommon.class)
.put("patterncapturegroup", PatternCaptureGroupTokenFilterFactory.class)
.put("patternreplace", PatternReplaceTokenFilterFactory.class)
.put("persiannormalization", PersianNormalizationFilterFactory.class)
.put("porterstem", MovedToAnalysisCommon.class)
- .put("portuguesestem", StemmerTokenFilterFactory.class)
- .put("portugueselightstem", StemmerTokenFilterFactory.class)
- .put("portugueseminimalstem", StemmerTokenFilterFactory.class)
- .put("reversestring", ReverseTokenFilterFactory.class)
- .put("russianlightstem", StemmerTokenFilterFactory.class)
+ .put("portuguesestem", MovedToAnalysisCommon.class)
+ .put("portugueselightstem", MovedToAnalysisCommon.class)
+ .put("portugueseminimalstem", MovedToAnalysisCommon.class)
+ .put("reversestring", MovedToAnalysisCommon.class)
+ .put("russianlightstem", MovedToAnalysisCommon.class)
.put("scandinavianfolding", ScandinavianFoldingFilterFactory.class)
.put("scandinaviannormalization", ScandinavianNormalizationFilterFactory.class)
.put("serbiannormalization", SerbianNormalizationFilterFactory.class)
@@ -207,16 +199,16 @@ private static String toCamelCase(String s) {
.put("minhash", MinHashTokenFilterFactory.class)
.put("snowballporter", MovedToAnalysisCommon.class)
.put("soraninormalization", SoraniNormalizationFilterFactory.class)
- .put("soranistem", StemmerTokenFilterFactory.class)
- .put("spanishlightstem", StemmerTokenFilterFactory.class)
+ .put("soranistem", MovedToAnalysisCommon.class)
+ .put("spanishlightstem", MovedToAnalysisCommon.class)
.put("standard", StandardTokenFilterFactory.class)
- .put("stemmeroverride", StemmerOverrideTokenFilterFactory.class)
+ .put("stemmeroverride", MovedToAnalysisCommon.class)
.put("stop", StopTokenFilterFactory.class)
- .put("swedishlightstem", StemmerTokenFilterFactory.class)
+ .put("swedishlightstem", MovedToAnalysisCommon.class)
.put("synonym", SynonymTokenFilterFactory.class)
.put("synonymgraph", SynonymGraphTokenFilterFactory.class)
.put("trim", MovedToAnalysisCommon.class)
- .put("truncate", TruncateTokenFilterFactory.class)
+ .put("truncate", MovedToAnalysisCommon.class)
.put("turkishlowercase", MovedToAnalysisCommon.class)
.put("type", KeepTypesFilterFactory.class)
.put("uppercase", MovedToAnalysisCommon.class)
diff --git a/test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.json b/test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.json
new file mode 100644
index 0000000000000..38937a9b5af93
--- /dev/null
+++ b/test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.json
@@ -0,0 +1,54 @@
+{
+ "index":{
+ "analysis":{
+ "tokenizer":{
+ "standard":{
+ "type":"standard"
+ }
+ },
+ "filter":{
+ "stop":{
+ "type":"stop",
+ "stopwords":["test-stop"]
+ },
+ "stop2":{
+ "type":"stop",
+ "stopwords":["stop2-1", "stop2-2"]
+ },
+ "my":{
+ "type":"myfilter"
+ },
+ "dict_dec":{
+ "type":"dictionary_decompounder",
+ "word_list":["donau", "dampf", "schiff", "spargel", "creme", "suppe"]
+ }
+ },
+ "analyzer":{
+ "standard":{
+ "type":"standard",
+ "stopwords":["test1", "test2", "test3"]
+ },
+ "custom1":{
+ "tokenizer":"standard",
+ "filter":["stop", "stop2"]
+ },
+ "custom4":{
+ "tokenizer":"standard",
+ "filter":["my"]
+ },
+ "custom6":{
+ "tokenizer":"standard",
+ "position_increment_gap": 256
+ },
+ "czechAnalyzerWithStemmer":{
+ "tokenizer":"standard",
+ "filter":["standard", "lowercase", "stop", "czech_stem"]
+ },
+ "decompoundingAnalyzer":{
+ "tokenizer":"standard",
+ "filter":["dict_dec"]
+ }
+ }
+ }
+ }
+}
diff --git a/test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.yml b/test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.yml
new file mode 100644
index 0000000000000..f7a57d14dbe3d
--- /dev/null
+++ b/test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.yml
@@ -0,0 +1,39 @@
+index :
+ analysis :
+ tokenizer :
+ standard :
+ type : standard
+ filter :
+ stop :
+ type : stop
+ stopwords : [test-stop]
+ stop2 :
+ type : stop
+ stopwords : [stop2-1, stop2-2]
+ my :
+ type : myfilter
+ dict_dec :
+ type : dictionary_decompounder
+ word_list : [donau, dampf, schiff, spargel, creme, suppe]
+ analyzer :
+ standard :
+ type : standard
+ stopwords : [test1, test2, test3]
+ custom1 :
+ tokenizer : standard
+ filter : [stop, stop2]
+ custom4 :
+ tokenizer : standard
+ filter : [my]
+ custom6 :
+ tokenizer : standard
+ position_increment_gap: 256
+ custom7 :
+ type : standard
+ version: 3.6
+ czechAnalyzerWithStemmer :
+ tokenizer : standard
+ filter : [standard, lowercase, stop, czech_stem]
+ decompoundingAnalyzer :
+ tokenizer : standard
+ filter : [dict_dec]