From 7569ffefaff9abe82cbe86e2a1a99b58b10bbf6e Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 18 Jul 2018 09:15:10 +0100 Subject: [PATCH 1/2] Call setReferences() on custom referring tokenfilters in _analyze When building custom tokenfilters without an index in the _analyze endpoint, we need to ensure that referring filters are correctly built by calling their #setReferences() method Fixes #32154 --- .../test/analysis-common/40_token_filters.yml | 15 +++++++++++ .../analyze/TransportAnalyzeAction.java | 26 ++++++++++++++++++- .../indices/TransportAnalyzeActionTests.java | 1 + 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml index 3dca3bfd7770c..150fa39dcb956 100644 --- a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml @@ -1557,3 +1557,18 @@ filter: [my_bengali_stem] - length: { tokens: 1 } - match: { tokens.0.token: কর } + +--- +"multiplexer": + - do: + indices.analyze: + body: + text: "The quick fox" + tokenizer: "standard" + filter: + - type: multiplexer + filters: [ lowercase, uppercase ] + preserve_original: false + - length: { tokens: 6 } + - match: { tokens.0.token: the } + - match: { tokens.1.token: THE } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java index 35f1f725b65ad..5c5da62571f66 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java @@ -52,6 +52,7 @@ import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.MultiTermAwareComponent; import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.analysis.ReferringFilterFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.index.mapper.KeywordFieldMapper; @@ -574,6 +575,7 @@ private static List parseTokenFilterFactories(AnalyzeRequest Environment environment, Tuple tokenizerFactory, List charFilterFactoryList, boolean normalizer) throws IOException { List tokenFilterFactoryList = new ArrayList<>(); + List referringFilters = new ArrayList<>(); if (request.tokenFilters() != null && request.tokenFilters().size() > 0) { List tokenFilters = request.tokenFilters(); for (AnalyzeRequest.NameOrDefinition tokenFilter : tokenFilters) { @@ -594,7 +596,9 @@ private static List parseTokenFilterFactories(AnalyzeRequest tokenFilterFactory = tokenFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenfilter", settings); tokenFilterFactory = CustomAnalyzerProvider.checkAndApplySynonymFilter(tokenFilterFactory, tokenizerFactory.v1(), tokenizerFactory.v2(), tokenFilterFactoryList, charFilterFactoryList, environment); - + if (tokenFilterFactory instanceof ReferringFilterFactory) { + referringFilters.add((ReferringFilterFactory)tokenFilterFactory); + } } else { AnalysisModule.AnalysisProvider tokenFilterFactoryFactory; @@ -629,6 +633,26 @@ private static List parseTokenFilterFactories(AnalyzeRequest tokenFilterFactoryList.add(tokenFilterFactory); } } + if (referringFilters.isEmpty() == false) { + // The request included at least one custom referring tokenfilter that has not already been built by the + // analysis registry, so we need to set its references. Note that this will only apply pre-built + // tokenfilters + if (indexSettings == null) { + Settings settings = Settings.builder() + .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()) + .build(); + IndexMetaData metaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(settings).build(); + indexSettings = new IndexSettings(metaData, Settings.EMPTY); + } + Map prebuiltFilters = analysisRegistry.buildTokenFilterFactories(indexSettings); + for (ReferringFilterFactory rff : referringFilters) { + rff.setReferences(prebuiltFilters); + } + + } return tokenFilterFactoryList; } diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java index c0404a47ab237..70b370dd4994f 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java @@ -181,6 +181,7 @@ public void testNoIndexAnalyzers() throws IOException { assertEquals("qu1ck", tokens.get(1).getTerm()); assertEquals("brown", tokens.get(2).getTerm()); assertEquals("foxbar", tokens.get(3).getTerm()); + } public void testFillsAttributes() throws IOException { From 84b60b3de2ff0459beedbc4903328881a3b47265 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 18 Jul 2018 09:19:19 +0100 Subject: [PATCH 2/2] stray whitespace --- .../action/admin/indices/TransportAnalyzeActionTests.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java index 70b370dd4994f..c0404a47ab237 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java @@ -181,7 +181,6 @@ public void testNoIndexAnalyzers() throws IOException { assertEquals("qu1ck", tokens.get(1).getTerm()); assertEquals("brown", tokens.get(2).getTerm()); assertEquals("foxbar", tokens.get(3).getTerm()); - } public void testFillsAttributes() throws IOException {