Skip to content

Commit f243d75

Browse files
authored
Remove special-casing of Synonym filters in AnalysisRegistry (#34034)
The synonym filters no longer need access to the AnalysisRegistry in their constructors, so we can remove the special-case code and move them to the common analysis module. This commit means that synonyms are no longer available for `server` integration tests, so several of these are either rewritten or migrated to the common analysis module as rest-spec-api tests
1 parent 9129948 commit f243d75

File tree

29 files changed

+1023
-876
lines changed

29 files changed

+1023
-876
lines changed

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,8 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
275275
filters.put("sorani_normalization", SoraniNormalizationFilterFactory::new);
276276
filters.put("stemmer_override", requiresAnalysisSettings(StemmerOverrideTokenFilterFactory::new));
277277
filters.put("stemmer", StemmerTokenFilterFactory::new);
278+
filters.put("synonym", requiresAnalysisSettings(SynonymTokenFilterFactory::new));
279+
filters.put("synonym_graph", requiresAnalysisSettings(SynonymGraphTokenFilterFactory::new));
278280
filters.put("trim", TrimTokenFilterFactory::new);
279281
filters.put("truncate", requiresAnalysisSettings(TruncateTokenFilterFactory::new));
280282
filters.put("unique", UniqueTokenFilterFactory::new);

server/src/main/java/org/elasticsearch/index/analysis/ESSolrSynonymParser.java renamed to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ESSolrSynonymParser.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
* under the License.
1818
*/
1919

20-
package org.elasticsearch.index.analysis;
20+
package org.elasticsearch.analysis.common;
2121

2222
import org.apache.logging.log4j.Logger;
2323
import org.apache.logging.log4j.LogManager;

server/src/main/java/org/elasticsearch/index/analysis/ESWordnetSynonymParser.java renamed to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ESWordnetSynonymParser.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
* under the License.
1818
*/
1919

20-
package org.elasticsearch.index.analysis;
20+
package org.elasticsearch.analysis.common;
2121

2222
import org.apache.logging.log4j.Logger;
2323
import org.apache.logging.log4j.LogManager;

server/src/main/java/org/elasticsearch/index/analysis/SynonymGraphTokenFilterFactory.java renamed to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SynonymGraphTokenFilterFactory.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
* under the License.
1818
*/
1919

20-
package org.elasticsearch.index.analysis;
20+
package org.elasticsearch.analysis.common;
2121

2222
import org.apache.lucene.analysis.Analyzer;
2323
import org.apache.lucene.analysis.TokenStream;
@@ -26,16 +26,18 @@
2626
import org.elasticsearch.common.settings.Settings;
2727
import org.elasticsearch.env.Environment;
2828
import org.elasticsearch.index.IndexSettings;
29+
import org.elasticsearch.index.analysis.CharFilterFactory;
30+
import org.elasticsearch.index.analysis.TokenFilterFactory;
31+
import org.elasticsearch.index.analysis.TokenizerFactory;
2932

30-
import java.io.IOException;
3133
import java.util.List;
3234
import java.util.function.Function;
3335

3436
public class SynonymGraphTokenFilterFactory extends SynonymTokenFilterFactory {
3537

36-
public SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry,
37-
String name, Settings settings) throws IOException {
38-
super(indexSettings, env, analysisRegistry, name, settings);
38+
SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env,
39+
String name, Settings settings) {
40+
super(indexSettings, env, name, settings);
3941
}
4042

4143
@Override

server/src/main/java/org/elasticsearch/index/analysis/SynonymTokenFilterFactory.java renamed to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SynonymTokenFilterFactory.java

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
* under the License.
1818
*/
1919

20-
package org.elasticsearch.index.analysis;
20+
package org.elasticsearch.analysis.common;
2121

2222
import org.apache.lucene.analysis.Analyzer;
2323
import org.apache.lucene.analysis.TokenStream;
@@ -26,23 +26,28 @@
2626
import org.elasticsearch.common.settings.Settings;
2727
import org.elasticsearch.env.Environment;
2828
import org.elasticsearch.index.IndexSettings;
29+
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
30+
import org.elasticsearch.index.analysis.Analysis;
31+
import org.elasticsearch.index.analysis.CharFilterFactory;
32+
import org.elasticsearch.index.analysis.CustomAnalyzer;
33+
import org.elasticsearch.index.analysis.TokenFilterFactory;
34+
import org.elasticsearch.index.analysis.TokenizerFactory;
2935

30-
import java.io.IOException;
3136
import java.io.Reader;
3237
import java.io.StringReader;
3338
import java.util.List;
3439
import java.util.function.Function;
3540

3641
public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
3742

38-
protected final String format;
39-
protected final boolean expand;
40-
protected final boolean lenient;
43+
private final String format;
44+
private final boolean expand;
45+
private final boolean lenient;
4146
protected final Settings settings;
4247
protected final Environment environment;
4348

44-
public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry,
45-
String name, Settings settings) throws IOException {
49+
SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env,
50+
String name, Settings settings) {
4651
super(indexSettings, name, settings);
4752
this.settings = settings;
4853

@@ -83,15 +88,15 @@ public TokenStream create(TokenStream tokenStream) {
8388
};
8489
}
8590

86-
protected Analyzer buildSynonymAnalyzer(TokenizerFactory tokenizer, List<CharFilterFactory> charFilters,
87-
List<TokenFilterFactory> tokenFilters) {
91+
Analyzer buildSynonymAnalyzer(TokenizerFactory tokenizer, List<CharFilterFactory> charFilters,
92+
List<TokenFilterFactory> tokenFilters) {
8893
return new CustomAnalyzer("synonyms", tokenizer, charFilters.toArray(new CharFilterFactory[0]),
8994
tokenFilters.stream()
9095
.map(TokenFilterFactory::getSynonymFilter)
9196
.toArray(TokenFilterFactory[]::new));
9297
}
9398

94-
protected SynonymMap buildSynonyms(Analyzer analyzer, Reader rules) {
99+
SynonymMap buildSynonyms(Analyzer analyzer, Reader rules) {
95100
try {
96101
SynonymMap.Builder parser;
97102
if ("wordnet".equalsIgnoreCase(format)) {
@@ -107,7 +112,7 @@ protected SynonymMap buildSynonyms(Analyzer analyzer, Reader rules) {
107112
}
108113
}
109114

110-
protected Reader getRulesFromSettings(Environment env) {
115+
Reader getRulesFromSettings(Environment env) {
111116
Reader rulesReader;
112117
if (settings.getAsList("synonyms", null) != null) {
113118
List<String> rulesList = Analysis.getWordList(env, settings, "synonyms");

modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilterFactory;
2525
import org.apache.lucene.analysis.reverse.ReverseStringFilterFactory;
2626
import org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory;
27-
import org.elasticsearch.index.analysis.SynonymTokenFilterFactory;
2827
import org.elasticsearch.indices.analysis.AnalysisFactoryTestCase;
2928

3029
import java.util.List;
@@ -106,6 +105,7 @@ protected Map<String, Class<?>> getTokenFilters() {
106105
filters.put("stemmeroverride", StemmerOverrideTokenFilterFactory.class);
107106
filters.put("kstem", KStemTokenFilterFactory.class);
108107
filters.put("synonym", SynonymTokenFilterFactory.class);
108+
filters.put("synonymgraph", SynonymGraphTokenFilterFactory.class);
109109
filters.put("dictionarycompoundword", DictionaryCompoundWordTokenFilterFactory.class);
110110
filters.put("hyphenationcompoundword", HyphenationCompoundWordTokenFilterFactory.class);
111111
filters.put("reversestring", ReverseTokenFilterFactory.class);

server/src/test/java/org/elasticsearch/index/analysis/ESSolrSynonymParserTests.java renamed to modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESSolrSynonymParserTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
* under the License.
1818
*/
1919

20-
package org.elasticsearch.index.analysis;
20+
package org.elasticsearch.analysis.common;
2121

2222
import org.apache.lucene.analysis.CharArraySet;
2323
import org.apache.lucene.analysis.StopFilter;

server/src/test/java/org/elasticsearch/index/analysis/ESWordnetSynonymParserTests.java renamed to modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESWordnetSynonymParserTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
* under the License.
1818
*/
1919

20-
package org.elasticsearch.index.analysis;
20+
package org.elasticsearch.analysis.common;
2121

2222
import org.apache.lucene.analysis.CharArraySet;
2323
import org.apache.lucene.analysis.StopFilter;

modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,21 +21,31 @@
2121

2222
import org.elasticsearch.action.search.SearchResponse;
2323
import org.elasticsearch.common.settings.Settings;
24+
import org.elasticsearch.common.xcontent.XContentBuilder;
25+
import org.elasticsearch.common.xcontent.XContentFactory;
2426
import org.elasticsearch.index.IndexSettings;
2527
import org.elasticsearch.index.query.Operator;
2628
import org.elasticsearch.plugins.Plugin;
29+
import org.elasticsearch.search.builder.SearchSourceBuilder;
2730
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
2831
import org.elasticsearch.test.ESIntegTestCase;
2932

3033
import java.io.IOException;
3134
import java.util.Arrays;
3235
import java.util.Collection;
3336

37+
import static org.elasticsearch.client.Requests.searchRequest;
3438
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
39+
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
40+
import static org.elasticsearch.index.query.QueryBuilders.matchPhrasePrefixQuery;
3541
import static org.elasticsearch.index.query.QueryBuilders.matchPhraseQuery;
3642
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
43+
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
44+
import static org.elasticsearch.search.builder.SearchSourceBuilder.highlight;
45+
import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource;
3746
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
3847
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHighlight;
48+
import static org.hamcrest.Matchers.anyOf;
3949
import static org.hamcrest.Matchers.equalTo;
4050
import static org.hamcrest.Matchers.startsWith;
4151

@@ -153,4 +163,165 @@ public void testMultiPhraseCutoff() throws IOException {
153163
+ "<em>http://www.facebook.com</em> <em>http://elasticsearch.org</em> "
154164
+ "<em>http://xing.com</em> <em>http://cnn.com</em> http://quora.com"));
155165
}
166+
167+
public void testSynonyms() throws IOException {
168+
Settings.Builder builder = Settings.builder()
169+
.put(indexSettings())
170+
.put("index.analysis.analyzer.synonym.tokenizer", "standard")
171+
.putList("index.analysis.analyzer.synonym.filter", "synonym", "lowercase")
172+
.put("index.analysis.filter.synonym.type", "synonym")
173+
.putList("index.analysis.filter.synonym.synonyms", "fast,quick");
174+
175+
assertAcked(prepareCreate("test").setSettings(builder.build())
176+
.addMapping("type1", "field1",
177+
"type=text,term_vector=with_positions_offsets,search_analyzer=synonym," +
178+
"analyzer=standard,index_options=offsets"));
179+
ensureGreen();
180+
181+
client().prepareIndex("test", "type1", "0").setSource(
182+
"field1", "The quick brown fox jumps over the lazy dog").get();
183+
refresh();
184+
for (String highlighterType : new String[] {"plain", "fvh", "unified"}) {
185+
logger.info("--> highlighting (type=" + highlighterType + ") and searching on field1");
186+
SearchSourceBuilder source = searchSource()
187+
.query(matchQuery("field1", "quick brown fox").operator(Operator.AND))
188+
.highlighter(
189+
highlight()
190+
.field("field1")
191+
.order("score")
192+
.preTags("<x>")
193+
.postTags("</x>")
194+
.highlighterType(highlighterType));
195+
SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet();
196+
assertHighlight(searchResponse, 0, "field1", 0, 1,
197+
equalTo("The <x>quick</x> <x>brown</x> <x>fox</x> jumps over the lazy dog"));
198+
199+
source = searchSource()
200+
.query(matchQuery("field1", "fast brown fox").operator(Operator.AND))
201+
.highlighter(highlight().field("field1").order("score").preTags("<x>").postTags("</x>"));
202+
searchResponse = client().search(searchRequest("test").source(source)).actionGet();
203+
assertHighlight(searchResponse, 0, "field1", 0, 1,
204+
equalTo("The <x>quick</x> <x>brown</x> <x>fox</x> jumps over the lazy dog"));
205+
}
206+
}
207+
208+
public void testPhrasePrefix() throws IOException {
209+
Settings.Builder builder = Settings.builder()
210+
.put(indexSettings())
211+
.put("index.analysis.analyzer.synonym.tokenizer", "standard")
212+
.putList("index.analysis.analyzer.synonym.filter", "synonym", "lowercase")
213+
.put("index.analysis.filter.synonym.type", "synonym")
214+
.putList("index.analysis.filter.synonym.synonyms", "quick => fast");
215+
216+
assertAcked(prepareCreate("first_test_index").setSettings(builder.build()).addMapping("type1", type1TermVectorMapping()));
217+
218+
ensureGreen();
219+
220+
client().prepareIndex("first_test_index", "type1", "0").setSource(
221+
"field0", "The quick brown fox jumps over the lazy dog",
222+
"field1", "The quick brown fox jumps over the lazy dog").get();
223+
client().prepareIndex("first_test_index", "type1", "1").setSource("field1",
224+
"The quick browse button is a fancy thing, right bro?").get();
225+
refresh();
226+
logger.info("--> highlighting and searching on field0");
227+
228+
SearchSourceBuilder source = searchSource()
229+
.query(matchPhrasePrefixQuery("field0", "bro"))
230+
.highlighter(highlight().field("field0").order("score").preTags("<x>").postTags("</x>"));
231+
SearchResponse searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet();
232+
233+
assertHighlight(searchResponse, 0, "field0", 0, 1, equalTo("The quick <x>brown</x> fox jumps over the lazy dog"));
234+
235+
source = searchSource()
236+
.query(matchPhrasePrefixQuery("field0", "quick bro"))
237+
.highlighter(highlight().field("field0").order("score").preTags("<x>").postTags("</x>"));
238+
239+
searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet();
240+
assertHighlight(searchResponse, 0, "field0", 0, 1,
241+
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));
242+
243+
logger.info("--> highlighting and searching on field1");
244+
source = searchSource()
245+
.query(boolQuery()
246+
.should(matchPhrasePrefixQuery("field1", "test"))
247+
.should(matchPhrasePrefixQuery("field1", "bro"))
248+
)
249+
.highlighter(highlight().field("field1").order("score").preTags("<x>").postTags("</x>"));
250+
251+
searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet();
252+
assertThat(searchResponse.getHits().totalHits, equalTo(2L));
253+
for (int i = 0; i < 2; i++) {
254+
assertHighlight(searchResponse, i, "field1", 0, 1, anyOf(
255+
equalTo("The quick <x>browse</x> button is a fancy thing, right <x>bro</x>?"),
256+
equalTo("The quick <x>brown</x> fox jumps over the lazy dog")));
257+
}
258+
259+
source = searchSource()
260+
.query(matchPhrasePrefixQuery("field1", "quick bro"))
261+
.highlighter(highlight().field("field1").order("score").preTags("<x>").postTags("</x>"));
262+
263+
searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet();
264+
265+
assertHighlight(searchResponse, 0, "field1", 0, 1, anyOf(
266+
equalTo("The <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
267+
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog")));
268+
assertHighlight(searchResponse, 1, "field1", 0, 1, anyOf(
269+
equalTo("The <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
270+
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog")));
271+
272+
assertAcked(prepareCreate("second_test_index").setSettings(builder.build()).addMapping("doc",
273+
"field4", "type=text,term_vector=with_positions_offsets,analyzer=synonym",
274+
"field3", "type=text,analyzer=synonym"));
275+
// with synonyms
276+
client().prepareIndex("second_test_index", "doc", "0").setSource(
277+
"type", "type2",
278+
"field4", "The quick brown fox jumps over the lazy dog",
279+
"field3", "The quick brown fox jumps over the lazy dog").get();
280+
client().prepareIndex("second_test_index", "doc", "1").setSource(
281+
"type", "type2",
282+
"field4", "The quick browse button is a fancy thing, right bro?").get();
283+
client().prepareIndex("second_test_index", "doc", "2").setSource(
284+
"type", "type2",
285+
"field4", "a quick fast blue car").get();
286+
refresh();
287+
288+
source = searchSource().postFilter(termQuery("type", "type2")).query(matchPhrasePrefixQuery("field3", "fast bro"))
289+
.highlighter(highlight().field("field3").order("score").preTags("<x>").postTags("</x>"));
290+
291+
searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet();
292+
293+
assertHighlight(searchResponse, 0, "field3", 0, 1,
294+
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));
295+
296+
logger.info("--> highlighting and searching on field4");
297+
source = searchSource().postFilter(termQuery("type", "type2")).query(matchPhrasePrefixQuery("field4", "the fast bro"))
298+
.highlighter(highlight().field("field4").order("score").preTags("<x>").postTags("</x>"));
299+
searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet();
300+
301+
assertHighlight(searchResponse, 0, "field4", 0, 1, anyOf(
302+
equalTo("<x>The</x> <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
303+
equalTo("<x>The</x> <x>quick</x> <x>brown</x> fox jumps over the lazy dog")));
304+
assertHighlight(searchResponse, 1, "field4", 0, 1, anyOf(
305+
equalTo("<x>The</x> <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
306+
equalTo("<x>The</x> <x>quick</x> <x>brown</x> fox jumps over the lazy dog")));
307+
308+
logger.info("--> highlighting and searching on field4");
309+
source = searchSource().postFilter(termQuery("type", "type2"))
310+
.query(matchPhrasePrefixQuery("field4", "a fast quick blue ca"))
311+
.highlighter(highlight().field("field4").order("score").preTags("<x>").postTags("</x>"));
312+
searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet();
313+
314+
assertHighlight(searchResponse, 0, "field4", 0, 1,
315+
anyOf(equalTo("<x>a quick fast blue car</x>"),
316+
equalTo("<x>a</x> <x>quick</x> <x>fast</x> <x>blue</x> <x>car</x>")));
317+
}
318+
319+
public static XContentBuilder type1TermVectorMapping() throws IOException {
320+
return XContentFactory.jsonBuilder().startObject().startObject("type1")
321+
.startObject("properties")
322+
.startObject("field1").field("type", "text").field("term_vector", "with_positions_offsets").endObject()
323+
.startObject("field2").field("type", "text").field("term_vector", "with_positions_offsets").endObject()
324+
.endObject()
325+
.endObject().endObject();
326+
}
156327
}

0 commit comments

Comments
 (0)