|
20 | 20 | package org.elasticsearch.indices.analysis; |
21 | 21 |
|
22 | 22 | import org.apache.lucene.analysis.Analyzer; |
| 23 | +import org.apache.lucene.analysis.TokenFilter; |
23 | 24 | import org.apache.lucene.analysis.TokenStream; |
24 | 25 | import org.apache.lucene.analysis.Tokenizer; |
25 | 26 | import org.apache.lucene.analysis.ar.ArabicNormalizationFilter; |
|
28 | 29 | import org.apache.lucene.analysis.hunspell.Dictionary; |
29 | 30 | import org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilter; |
30 | 31 | import org.apache.lucene.analysis.standard.StandardAnalyzer; |
| 32 | +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
31 | 33 | import org.apache.lucene.store.Directory; |
32 | 34 | import org.apache.lucene.store.SimpleFSDirectory; |
33 | 35 | import org.elasticsearch.Version; |
|
43 | 45 | import org.elasticsearch.index.analysis.CustomAnalyzer; |
44 | 46 | import org.elasticsearch.index.analysis.IndexAnalyzers; |
45 | 47 | import org.elasticsearch.index.analysis.NamedAnalyzer; |
| 48 | +import org.elasticsearch.index.analysis.PreConfiguredTokenFilter; |
46 | 49 | import org.elasticsearch.index.analysis.StandardTokenizerFactory; |
47 | 50 | import org.elasticsearch.index.analysis.StopTokenFilterFactory; |
48 | 51 | import org.elasticsearch.index.analysis.TokenFilterFactory; |
|
61 | 64 | import java.nio.charset.StandardCharsets; |
62 | 65 | import java.nio.file.Files; |
63 | 66 | import java.nio.file.Path; |
| 67 | +import java.util.Arrays; |
| 68 | +import java.util.List; |
64 | 69 | import java.util.Map; |
65 | 70 | import java.util.Set; |
66 | 71 |
|
67 | 72 | import static java.util.Collections.singletonList; |
68 | 73 | import static java.util.Collections.singletonMap; |
| 74 | +import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; |
69 | 75 | import static org.hamcrest.Matchers.either; |
70 | 76 | import static org.hamcrest.Matchers.equalTo; |
71 | 77 | import static org.hamcrest.Matchers.instanceOf; |
72 | 78 | import static org.hamcrest.Matchers.is; |
73 | 79 |
|
74 | 80 | public class AnalysisModuleTests extends ESTestCase { |
| 81 | + private final Settings emptyNodeSettings = Settings.builder() |
| 82 | + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) |
| 83 | + .build(); |
75 | 84 |
|
76 | 85 | public IndexAnalyzers getIndexAnalyzers(Settings settings) throws IOException { |
77 | 86 | return getIndexAnalyzers(getNewRegistry(settings), settings); |
@@ -264,6 +273,71 @@ public void testUnderscoreInAnalyzerName() throws IOException { |
264 | 273 | } |
265 | 274 | } |
266 | 275 |
|
| 276 | + /** |
| 277 | + * Tests that plugins can register pre-configured token filters that vary in behavior based on Elasticsearch version, Lucene version, |
| 278 | + * and that do not vary based on version at all. |
| 279 | + */ |
| 280 | + public void testPluginPreConfiguredTokenFilters() throws IOException { |
| 281 | + // Simple token filter that appends text to the term |
| 282 | + final class AppendTokenFilter extends TokenFilter { |
| 283 | + private final CharTermAttribute term = addAttribute(CharTermAttribute.class); |
| 284 | + private final char[] appendMe; |
| 285 | + |
| 286 | + protected AppendTokenFilter(TokenStream input, String appendMe) { |
| 287 | + super(input); |
| 288 | + this.appendMe = appendMe.toCharArray(); |
| 289 | + } |
| 290 | + |
| 291 | + @Override |
| 292 | + public boolean incrementToken() throws IOException { |
| 293 | + if (false == input.incrementToken()) { |
| 294 | + return false; |
| 295 | + } |
| 296 | + term.resizeBuffer(term.length() + appendMe.length); |
| 297 | + System.arraycopy(appendMe, 0, term.buffer(), term.length(), appendMe.length); |
| 298 | + term.setLength(term.length() + appendMe.length); |
| 299 | + return true; |
| 300 | + } |
| 301 | + } |
| 302 | + boolean noVersionSupportsMultiTerm = randomBoolean(); |
| 303 | + boolean luceneVersionSupportsMultiTerm = randomBoolean(); |
| 304 | + boolean elasticsearchVersionSupportsMultiTerm = randomBoolean(); |
| 305 | + AnalysisRegistry registry = new AnalysisModule(new Environment(emptyNodeSettings), singletonList(new AnalysisPlugin() { |
| 306 | + @Override |
| 307 | + public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() { |
| 308 | + return Arrays.asList( |
| 309 | + PreConfiguredTokenFilter.singleton("no_version", noVersionSupportsMultiTerm, |
| 310 | + tokenStream -> new AppendTokenFilter(tokenStream, "no_version")), |
| 311 | + PreConfiguredTokenFilter.luceneVersion("lucene_version", luceneVersionSupportsMultiTerm, |
| 312 | + (tokenStream, luceneVersion) -> new AppendTokenFilter(tokenStream, luceneVersion.toString())), |
| 313 | + PreConfiguredTokenFilter.elasticsearchVersion("elasticsearch_version", elasticsearchVersionSupportsMultiTerm, |
| 314 | + (tokenStream, esVersion) -> new AppendTokenFilter(tokenStream, esVersion.toString())) |
| 315 | + ); |
| 316 | + } |
| 317 | + })).getAnalysisRegistry(); |
| 318 | + |
| 319 | + Version version = VersionUtils.randomVersion(random()); |
| 320 | + IndexAnalyzers analyzers = getIndexAnalyzers(registry, Settings.builder() |
| 321 | + .put("index.analysis.analyzer.no_version.tokenizer", "keyword") |
| 322 | + .put("index.analysis.analyzer.no_version.filter", "no_version") |
| 323 | + .put("index.analysis.analyzer.lucene_version.tokenizer", "keyword") |
| 324 | + .put("index.analysis.analyzer.lucene_version.filter", "lucene_version") |
| 325 | + .put("index.analysis.analyzer.elasticsearch_version.tokenizer", "keyword") |
| 326 | + .put("index.analysis.analyzer.elasticsearch_version.filter", "elasticsearch_version") |
| 327 | + .put(IndexMetaData.SETTING_VERSION_CREATED, version) |
| 328 | + .build()); |
| 329 | + assertTokenStreamContents(analyzers.get("no_version").tokenStream("", "test"), new String[] {"testno_version"}); |
| 330 | + assertTokenStreamContents(analyzers.get("lucene_version").tokenStream("", "test"), new String[] {"test" + version.luceneVersion}); |
| 331 | + assertTokenStreamContents(analyzers.get("elasticsearch_version").tokenStream("", "test"), new String[] {"test" + version}); |
| 332 | + |
| 333 | + assertEquals("test" + (noVersionSupportsMultiTerm ? "no_version" : ""), |
| 334 | + analyzers.get("no_version").normalize("", "test").utf8ToString()); |
| 335 | + assertEquals("test" + (luceneVersionSupportsMultiTerm ? version.luceneVersion.toString() : ""), |
| 336 | + analyzers.get("lucene_version").normalize("", "test").utf8ToString()); |
| 337 | + assertEquals("test" + (elasticsearchVersionSupportsMultiTerm ? version.toString() : ""), |
| 338 | + analyzers.get("elasticsearch_version").normalize("", "test").utf8ToString()); |
| 339 | + } |
| 340 | + |
267 | 341 | public void testRegisterHunspellDictionary() throws Exception { |
268 | 342 | Settings settings = Settings.builder() |
269 | 343 | .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) |
|
0 commit comments