diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index 175935258ad6e..bf99eacf4f33f 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -276,6 +276,8 @@ public Map> getTokenFilters() { filters.put("sorani_normalization", SoraniNormalizationFilterFactory::new); filters.put("stemmer_override", requiresAnalysisSettings(StemmerOverrideTokenFilterFactory::new)); filters.put("stemmer", StemmerTokenFilterFactory::new); + filters.put("synonym", requiresAnalysisSettings(SynonymTokenFilterFactory::new)); + filters.put("synonym_graph", requiresAnalysisSettings(SynonymGraphTokenFilterFactory::new)); filters.put("trim", TrimTokenFilterFactory::new); filters.put("truncate", requiresAnalysisSettings(TruncateTokenFilterFactory::new)); filters.put("unique", UniqueTokenFilterFactory::new); diff --git a/server/src/main/java/org/elasticsearch/index/analysis/ESSolrSynonymParser.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ESSolrSynonymParser.java similarity index 98% rename from server/src/main/java/org/elasticsearch/index/analysis/ESSolrSynonymParser.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ESSolrSynonymParser.java index 006973dd9b6bc..256e05982c67d 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/ESSolrSynonymParser.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ESSolrSynonymParser.java @@ -17,7 +17,7 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.LogManager; diff --git a/server/src/main/java/org/elasticsearch/index/analysis/ESWordnetSynonymParser.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ESWordnetSynonymParser.java similarity index 98% rename from server/src/main/java/org/elasticsearch/index/analysis/ESWordnetSynonymParser.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ESWordnetSynonymParser.java index ebcd84e39d7a2..1e09011af675c 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/ESWordnetSynonymParser.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ESWordnetSynonymParser.java @@ -17,7 +17,7 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.LogManager; diff --git a/server/src/main/java/org/elasticsearch/index/analysis/SynonymGraphTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SynonymGraphTokenFilterFactory.java similarity index 89% rename from server/src/main/java/org/elasticsearch/index/analysis/SynonymGraphTokenFilterFactory.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SynonymGraphTokenFilterFactory.java index 200e426fbd492..832dbc1fae9bc 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/SynonymGraphTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SynonymGraphTokenFilterFactory.java @@ -17,7 +17,7 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; @@ -26,16 +26,18 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.CharFilterFactory; +import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.index.analysis.TokenizerFactory; -import java.io.IOException; import java.util.List; import java.util.function.Function; public class SynonymGraphTokenFilterFactory extends SynonymTokenFilterFactory { - public SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry, - String name, Settings settings) throws IOException { - super(indexSettings, env, analysisRegistry, name, settings); + public SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, + String name, Settings settings) { + super(indexSettings, env, name, settings); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/analysis/SynonymTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SynonymTokenFilterFactory.java similarity index 92% rename from server/src/main/java/org/elasticsearch/index/analysis/SynonymTokenFilterFactory.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SynonymTokenFilterFactory.java index c18e8c94310f2..794a19b57f36e 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/SynonymTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SynonymTokenFilterFactory.java @@ -17,7 +17,7 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; @@ -26,8 +26,13 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; +import org.elasticsearch.index.analysis.Analysis; +import org.elasticsearch.index.analysis.CharFilterFactory; +import org.elasticsearch.index.analysis.CustomAnalyzer; +import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.index.analysis.TokenizerFactory; -import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.util.List; @@ -41,8 +46,8 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory { protected final Settings settings; protected final Environment environment; - public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry, - String name, Settings settings) throws IOException { + public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, + String name, Settings settings) { super(indexSettings, name, settings); this.settings = settings; diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java index 5084306587847..ae6852a2f3e2c 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java @@ -25,7 +25,6 @@ import org.apache.lucene.analysis.reverse.ReverseStringFilterFactory; import org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory; import org.elasticsearch.index.analysis.SoraniNormalizationFilterFactory; -import org.elasticsearch.index.analysis.SynonymTokenFilterFactory; import org.elasticsearch.indices.analysis.AnalysisFactoryTestCase; import java.util.List; diff --git a/server/src/test/java/org/elasticsearch/index/analysis/ESSolrSynonymParserTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESSolrSynonymParserTests.java similarity index 98% rename from server/src/test/java/org/elasticsearch/index/analysis/ESSolrSynonymParserTests.java rename to modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESSolrSynonymParserTests.java index 31aa1a9be2512..e6ed9b0385505 100644 --- a/server/src/test/java/org/elasticsearch/index/analysis/ESSolrSynonymParserTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESSolrSynonymParserTests.java @@ -17,7 +17,7 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.StopFilter; diff --git a/server/src/test/java/org/elasticsearch/index/analysis/ESWordnetSynonymParserTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESWordnetSynonymParserTests.java similarity index 98% rename from server/src/test/java/org/elasticsearch/index/analysis/ESWordnetSynonymParserTests.java rename to modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESWordnetSynonymParserTests.java index 6d0fd8944d4c4..18eaaedb5d071 100644 --- a/server/src/test/java/org/elasticsearch/index/analysis/ESWordnetSynonymParserTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESWordnetSynonymParserTests.java @@ -17,7 +17,7 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.StopFilter; diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml index 150fa39dcb956..4106237f2cca6 100644 --- a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml @@ -223,6 +223,68 @@ - match: { tokens.0.token: Foo } - match: { tokens.1.token: Bar! } +--- +"synonym": + - do: + indices.create: + index: test + body: + settings: + analysis: + filter: + my_synonym: + type: synonym + synonyms: ["car,auto"] + + - do: + indices.analyze: + index: test + body: + text: what car magazine + tokenizer: whitespace + filter: [ my_synonym ] + - length: { tokens: 4 } + - match: { tokens.0.token: what } + - match: { tokens.0.position: 0 } + - match: { tokens.1.token: car } + - match: { tokens.1.position: 1 } + - match: { tokens.2.token: auto } + - match: { tokens.2.position: 1 } + - match: { tokens.3.token: magazine } + - match: { tokens.3.position: 2 } + +--- +"synonym_graph": + - do: + indices.create: + index: test + body: + settings: + analysis: + filter: + my_graph_synonym: + type: synonym_graph + synonyms: [ "guinea pig,cavy" ] + + - do: + indices.analyze: + index: test + body: + text: my guinea pig snores + tokenizer: whitespace + filter: [ my_graph_synonym ] + - length: { tokens: 5 } + - match: { tokens.0.token: my } + - match: { tokens.1.token: cavy } + - match: { tokens.1.position: 1 } + - match: { tokens.1.positionLength: 2 } + - match: { tokens.2.token: guinea } + - match: { tokens.2.position: 1 } + - match: { tokens.3.token: pig } + - match: { tokens.3.position: 2 } + - match: { tokens.4.token: snores } + - match: { tokens.4.position: 3 } + --- "synonym_graph and flatten_graph": - do: diff --git a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java index d7a283f315840..86b9c5a65c503 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java @@ -158,16 +158,8 @@ public IndexAnalyzers build(IndexSettings indexSettings) throws IOException { public Map buildTokenFilterFactories(IndexSettings indexSettings) throws IOException { final Map tokenFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_FILTER); - Map> tokenFilters = new HashMap<>(this.tokenFilters); - /* - * synonym and synonym_graph are different than everything else since they need access to the tokenizer factories for the index. - * instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and - * hide internal data-structures as much as possible. - */ - tokenFilters.put("synonym", requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings))); - tokenFilters.put("synonym_graph", requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings))); - - return buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.preConfiguredTokenFilters); + return buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings, + Collections.unmodifiableMap(this.tokenFilters), prebuiltAnalysis.preConfiguredTokenFilters); } public Map buildTokenizerFactories(IndexSettings indexSettings) throws IOException { @@ -222,18 +214,7 @@ public AnalysisProvider getTokenFilterProvider(String tokenF if (tokenFilterSettings.containsKey(tokenFilter)) { Settings currentSettings = tokenFilterSettings.get(tokenFilter); String typeName = currentSettings.get("type"); - /* - * synonym and synonym_graph are different than everything else since they need access to the tokenizer factories for the index. - * instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and - * hide internal data-structures as much as possible. - */ - if ("synonym".equals(typeName)) { - return requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)); - } else if ("synonym_graph".equals(typeName)) { - return requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings)); - } else { - return getAnalysisProvider(Component.FILTER, tokenFilters, tokenFilter, typeName); - } + return getAnalysisProvider(Component.FILTER, tokenFilters, tokenFilter, typeName); } else { return getTokenFilterProvider(tokenFilter); } @@ -257,19 +238,6 @@ public AnalysisProvider getCharFilterProvider(String charFilt } } - private static AnalysisModule.AnalysisProvider requiresAnalysisSettings(AnalysisModule.AnalysisProvider provider) { - return new AnalysisModule.AnalysisProvider() { - @Override - public T get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException { - return provider.get(indexSettings, environment, name, settings); - } - @Override - public boolean requiresAnalysisSettings() { - return true; - } - }; - } - enum Component { ANALYZER { @Override diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index 4736cbe471289..e3c32bba27ed7 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.mapper; +import org.apache.lucene.analysis.MockSynonymAnalyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.FieldType; @@ -55,6 +56,7 @@ import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType; import org.elasticsearch.index.query.MatchPhraseQueryBuilder; import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.index.search.MatchQuery; import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESSingleNodeTestCase; @@ -82,10 +84,6 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase { @Before public void setup() { Settings settings = Settings.builder() - .put("index.analysis.filter.mySynonyms.type", "synonym") - .putList("index.analysis.filter.mySynonyms.synonyms", Collections.singletonList("car, auto")) - .put("index.analysis.analyzer.synonym.tokenizer", "standard") - .put("index.analysis.analyzer.synonym.filter", "mySynonyms") // Stop filter remains in server as it is part of lucene-core .put("index.analysis.analyzer.my_stop_analyzer.tokenizer", "standard") .put("index.analysis.analyzer.my_stop_analyzer.filter", "stop") @@ -739,7 +737,7 @@ public void testFastPhraseMapping() throws IOException { .endObject() .startObject("synfield") .field("type", "text") - .field("analyzer", "synonym") + .field("analyzer", "standard") // will be replaced with MockSynonymAnalyzer .field("index_phrases", true) .endObject() .endObject() @@ -766,11 +764,13 @@ public void testFastPhraseMapping() throws IOException { assertThat(q5, is(new PhraseQuery.Builder().add(new Term("field", "sparkle")).add(new Term("field", "stopword"), 2).build())); - Query q6 = new MatchPhraseQueryBuilder("synfield", "motor car").toQuery(queryShardContext); + MatchQuery matchQuery = new MatchQuery(queryShardContext); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + Query q6 = matchQuery.parse(MatchQuery.Type.PHRASE, "synfield", "motor dogs"); assertThat(q6, is(new MultiPhraseQuery.Builder() .add(new Term[]{ - new Term("synfield._index_phrase", "motor car"), - new Term("synfield._index_phrase", "motor auto")}) + new Term("synfield._index_phrase", "motor dogs"), + new Term("synfield._index_phrase", "motor dog")}) .build())); ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", BytesReference diff --git a/server/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java b/server/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java index 184d54f43b845..1087bbbf9fd8f 100644 --- a/server/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java +++ b/server/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java @@ -73,11 +73,7 @@ protected Collection> getPlugins() { @Before public void setup() throws IOException { - Settings settings = Settings.builder() - .put("index.analysis.filter.syns.type","synonym") - .putList("index.analysis.filter.syns.synonyms","quick,fast") - .put("index.analysis.analyzer.syns.tokenizer","standard") - .put("index.analysis.analyzer.syns.filter","syns").build(); + Settings settings = Settings.builder().build(); IndexService indexService = createIndex("test", settings); MapperService mapperService = indexService.mapperService(); String mapping = "{\n" + @@ -87,11 +83,11 @@ public void setup() throws IOException { " \"properties\":{\n" + " \"first\": {\n" + " \"type\":\"text\",\n" + - " \"analyzer\":\"syns\"\n" + + " \"analyzer\":\"standard\"\n" + " }," + " \"last\": {\n" + " \"type\":\"text\",\n" + - " \"analyzer\":\"syns\"\n" + + " \"analyzer\":\"standard\"\n" + " }" + " }" + " }\n" + @@ -221,25 +217,27 @@ public void testMultiMatchCrossFieldsWithSynonyms() throws IOException { QueryShardContext queryShardContext = indexService.newQueryShardContext( randomInt(20), null, () -> { throw new UnsupportedOperationException(); }, null); + MultiMatchQuery parser = new MultiMatchQuery(queryShardContext); + parser.setAnalyzer(new MockSynonymAnalyzer()); + Map fieldNames = new HashMap<>(); + fieldNames.put("name.first", 1.0f); + // check that synonym query is used for a single field - Query parsedQuery = - multiMatchQuery("quick").field("name.first") - .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).toQuery(queryShardContext); + Query parsedQuery = parser.parse(MultiMatchQueryBuilder.Type.CROSS_FIELDS, fieldNames, "dogs", null); Term[] terms = new Term[2]; - terms[0] = new Term("name.first", "quick"); - terms[1] = new Term("name.first", "fast"); + terms[0] = new Term("name.first", "dog"); + terms[1] = new Term("name.first", "dogs"); Query expectedQuery = new SynonymQuery(terms); assertThat(parsedQuery, equalTo(expectedQuery)); // check that blended term query is used for multiple fields - parsedQuery = - multiMatchQuery("quick").field("name.first").field("name.last") - .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).toQuery(queryShardContext); + fieldNames.put("name.last", 1.0f); + parsedQuery = parser.parse(MultiMatchQueryBuilder.Type.CROSS_FIELDS, fieldNames, "dogs", null); terms = new Term[4]; - terms[0] = new Term("name.first", "quick"); - terms[1] = new Term("name.first", "fast"); - terms[2] = new Term("name.last", "quick"); - terms[3] = new Term("name.last", "fast"); + terms[0] = new Term("name.first", "dog"); + terms[1] = new Term("name.first", "dogs"); + terms[2] = new Term("name.last", "dog"); + terms[3] = new Term("name.last", "dogs"); float[] boosts = new float[4]; Arrays.fill(boosts, 1.0f); expectedQuery = BlendedTermQuery.dismaxBlendedQuery(terms, boosts, 1.0f); diff --git a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java index 2164fe32a3945..70a42032ea469 100644 --- a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java @@ -31,8 +31,6 @@ import org.elasticsearch.index.analysis.ShingleTokenFilterFactory; import org.elasticsearch.index.analysis.StandardTokenizerFactory; import org.elasticsearch.index.analysis.StopTokenFilterFactory; -import org.elasticsearch.index.analysis.SynonymGraphTokenFilterFactory; -import org.elasticsearch.index.analysis.SynonymTokenFilterFactory; import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.test.ESTestCase; @@ -169,8 +167,8 @@ private static String toCamelCase(String s) { .put("stemmeroverride", MovedToAnalysisCommon.class) .put("stop", StopTokenFilterFactory.class) .put("swedishlightstem", MovedToAnalysisCommon.class) - .put("synonym", SynonymTokenFilterFactory.class) - .put("synonymgraph", SynonymGraphTokenFilterFactory.class) + .put("synonym", MovedToAnalysisCommon.class) + .put("synonymgraph", MovedToAnalysisCommon.class) .put("trim", MovedToAnalysisCommon.class) .put("truncate", MovedToAnalysisCommon.class) .put("turkishlowercase", MovedToAnalysisCommon.class)