Skip to content

Commit 2646e3a

Browse files
author
Christoph Büscher
authored
Fix case sensitivity rules for wildcard queries on text fields (#71751) (#72216)
Wildcard queries on text fields should not apply the fields analyzer to the search query. However, we accidentally enabled this in #53127 by moving the query normalization to the StringFieldType super type. This change fixes this by separating the notion of normalization and case insensitivity (as implemented in the `case_insensitive` flag). This is done because we still need to maintain normalization of the query sting when the wildcard query method on the field type is requested from the `query_string` query parser. Wildcard queries on keyword fields should also continue to apply the fields normalizer, regardless of whether the `case_insensitive` is set, because normalization could involve something else than lowercasing (e.g. substituting umlauts like in the GermanNormalizationFilter). Closes #71403
1 parent 5fbfe9b commit 2646e3a

File tree

9 files changed

+96
-5
lines changed

9 files changed

+96
-5
lines changed

plugins/analysis-icu/src/main/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapper.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import com.ibm.icu.text.RawCollationKey;
1313
import com.ibm.icu.text.RuleBasedCollator;
1414
import com.ibm.icu.util.ULocale;
15+
1516
import org.apache.lucene.document.Field;
1617
import org.apache.lucene.document.FieldType;
1718
import org.apache.lucene.document.SortedSetDocValuesField;

server/src/internalClusterTest/java/org/elasticsearch/search/query/SearchQueryIT.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1924,7 +1924,7 @@ public void testWildcardQueryNormalizationOnKeywordField() {
19241924
}
19251925

19261926
/**
1927-
* Test that wildcard queries on text fields get normalized
1927+
* Test that wildcard queries on text fields don't get normalized
19281928
*/
19291929
public void testWildcardQueryNormalizationOnTextField() {
19301930
assertAcked(prepareCreate("test")
@@ -1940,6 +1940,11 @@ public void testWildcardQueryNormalizationOnTextField() {
19401940
{
19411941
WildcardQueryBuilder wildCardQuery = wildcardQuery("field1", "Bb*");
19421942
SearchResponse searchResponse = client().prepareSearch().setQuery(wildCardQuery).get();
1943+
assertHitCount(searchResponse, 0L);
1944+
1945+
// the following works not because of normalization but because of the `case_insensitive` parameter
1946+
wildCardQuery = wildcardQuery("field1", "Bb*").caseInsensitive(true);
1947+
searchResponse = client().prepareSearch().setQuery(wildCardQuery).get();
19431948
assertHitCount(searchResponse, 1L);
19441949

19451950
wildCardQuery = wildcardQuery("field1", "bb*");

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
import org.apache.lucene.document.FieldType;
1515
import org.apache.lucene.document.SortedSetDocValuesField;
1616
import org.apache.lucene.index.IndexOptions;
17+
import org.apache.lucene.search.MultiTermQuery;
18+
import org.apache.lucene.search.Query;
1719
import org.apache.lucene.util.BytesRef;
1820
import org.elasticsearch.common.lucene.Lucene;
1921
import org.elasticsearch.common.xcontent.XContentParser;
@@ -288,6 +290,19 @@ protected BytesRef indexedValueForSearch(Object value) {
288290
return getTextSearchInfo().getSearchAnalyzer().normalize(name(), value.toString());
289291
}
290292

293+
/**
294+
* Wildcard queries on keyword fields use the normalizer of the underlying field, regardless of their case sensitivity option
295+
*/
296+
@Override
297+
public Query wildcardQuery(
298+
String value,
299+
MultiTermQuery.RewriteMethod method,
300+
boolean caseInsensitive,
301+
SearchExecutionContext context
302+
) {
303+
return super.wildcardQuery(value, method, caseInsensitive, true, context);
304+
}
305+
291306
@Override
292307
public CollapseType collapseType() {
293308
return CollapseType.KEYWORD;
@@ -298,6 +313,7 @@ public CollapseType collapseType() {
298313
public int ignoreAbove() {
299314
return ignoreAbove;
300315
}
316+
301317
}
302318

303319
private final boolean indexed;

server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@
3636
import org.elasticsearch.index.fielddata.IndexFieldData;
3737
import org.elasticsearch.index.query.DistanceFeatureQueryBuilder;
3838
import org.elasticsearch.index.query.QueryRewriteContext;
39-
import org.elasticsearch.index.query.SearchExecutionContext;
4039
import org.elasticsearch.index.query.QueryShardException;
40+
import org.elasticsearch.index.query.SearchExecutionContext;
4141
import org.elasticsearch.search.DocValueFormat;
4242
import org.elasticsearch.search.fetch.subphase.FetchFieldsPhase;
4343
import org.elasticsearch.search.lookup.SearchLookup;
@@ -250,6 +250,11 @@ public Query wildcardQuery(String value,
250250
+ "] which is of type [" + typeName() + "]");
251251
}
252252

253+
public Query normalizedWildcardQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, SearchExecutionContext context) {
254+
throw new QueryShardException(context, "Can only use wildcard queries on keyword, text and wildcard fields - not on [" + name
255+
+ "] which is of type [" + typeName() + "]");
256+
}
257+
253258
public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxDeterminizedStates,
254259
@Nullable MultiTermQuery.RewriteMethod method, SearchExecutionContext context) {
255260
throw new QueryShardException(context, "Can only use regexp queries on keyword and text fields - not on [" + name

server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,14 +113,30 @@ public static final String normalizeWildcardPattern(String fieldname, String val
113113

114114
@Override
115115
public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, boolean caseInsensitive, SearchExecutionContext context) {
116+
return wildcardQuery(value, method, caseInsensitive, false, context);
117+
}
118+
119+
120+
@Override
121+
public Query normalizedWildcardQuery(String value, MultiTermQuery.RewriteMethod method, SearchExecutionContext context) {
122+
return wildcardQuery(value, method, false, true, context);
123+
}
124+
125+
protected Query wildcardQuery(
126+
String value,
127+
MultiTermQuery.RewriteMethod method,
128+
boolean caseInsensitive,
129+
boolean shouldNormalize,
130+
SearchExecutionContext context
131+
) {
116132
failIfNotIndexed();
117133
if (context.allowExpensiveQueries() == false) {
118134
throw new ElasticsearchException("[wildcard] queries cannot be executed when '" +
119135
ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to false.");
120136
}
121137

122138
Term term;
123-
if (getTextSearchInfo().getSearchAnalyzer() != null) {
139+
if (getTextSearchInfo().getSearchAnalyzer() != null && shouldNormalize) {
124140
value = normalizeWildcardPattern(name(), value, getTextSearchInfo().getSearchAnalyzer());
125141
term = new Term(name(), value);
126142
} else {

server/src/main/java/org/elasticsearch/index/search/QueryStringQueryParser.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -681,7 +681,7 @@ private Query getWildcardQuerySingle(String field, String termStr) throws ParseE
681681
if (getAllowLeadingWildcard() == false && (termStr.startsWith("*") || termStr.startsWith("?"))) {
682682
throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery");
683683
}
684-
return currentFieldType.wildcardQuery(termStr, getMultiTermRewriteMethod(), context);
684+
return currentFieldType.normalizedWildcardQuery(termStr, getMultiTermRewriteMethod(), context);
685685
} catch (RuntimeException e) {
686686
if (lenient) {
687687
return newLenientFieldQuery(field, e);

server/src/test/java/org/elasticsearch/index/mapper/IndexFieldTypeTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
import org.elasticsearch.common.regex.Regex;
1515
import org.elasticsearch.common.settings.Settings;
1616
import org.elasticsearch.index.IndexSettings;
17-
import org.elasticsearch.index.query.SearchExecutionContext;
1817
import org.elasticsearch.index.query.QueryShardException;
18+
import org.elasticsearch.index.query.SearchExecutionContext;
1919
import org.elasticsearch.test.ESTestCase;
2020

2121
import java.util.function.Predicate;

server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,11 @@
1919
import org.apache.lucene.search.TermInSetQuery;
2020
import org.apache.lucene.search.TermQuery;
2121
import org.apache.lucene.search.TermRangeQuery;
22+
import org.apache.lucene.search.WildcardQuery;
2223
import org.apache.lucene.util.BytesRef;
2324
import org.apache.lucene.util.automaton.Automata;
2425
import org.apache.lucene.util.automaton.Automaton;
26+
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
2527
import org.apache.lucene.util.automaton.Operations;
2628
import org.elasticsearch.ElasticsearchException;
2729
import org.elasticsearch.common.lucene.BytesRefs;
@@ -153,8 +155,49 @@ public void testIndexPrefixes() {
153155

154156
public void testFetchSourceValue() throws IOException {
155157
TextFieldType fieldType = createFieldType();
158+
156159
assertEquals(Collections.singletonList("value"), fetchSourceValue(fieldType, "value"));
157160
assertEquals(Collections.singletonList("42"), fetchSourceValue(fieldType, 42L));
158161
assertEquals(Collections.singletonList("true"), fetchSourceValue(fieldType, true));
159162
}
163+
164+
public void testWildcardQuery() {
165+
TextFieldType ft = createFieldType();
166+
167+
// case sensitive
168+
AutomatonQuery actual = (AutomatonQuery) ft.wildcardQuery("*Butterflies*", null, false, MOCK_CONTEXT);
169+
AutomatonQuery expected = new WildcardQuery(new Term("field", new BytesRef("*Butterflies*")));
170+
assertEquals(expected, actual);
171+
assertFalse(new CharacterRunAutomaton(actual.getAutomaton()).run("some butterflies somewhere"));
172+
173+
// case insensitive
174+
actual = (AutomatonQuery) ft.wildcardQuery("*Butterflies*", null, true, MOCK_CONTEXT);
175+
expected = AutomatonQueries.caseInsensitiveWildcardQuery(new Term("field", new BytesRef("*Butterflies*")));
176+
assertEquals(expected, actual);
177+
assertTrue(new CharacterRunAutomaton(actual.getAutomaton()).run("some butterflies somewhere"));
178+
assertTrue(new CharacterRunAutomaton(actual.getAutomaton()).run("some Butterflies somewhere"));
179+
180+
ElasticsearchException ee = expectThrows(ElasticsearchException.class,
181+
() -> ft.wildcardQuery("valu*", null, MOCK_CONTEXT_DISALLOW_EXPENSIVE));
182+
assertEquals("[wildcard] queries cannot be executed when 'search.allow_expensive_queries' is set to false.",
183+
ee.getMessage());
184+
}
185+
186+
/**
187+
* we use this e.g. in query string query parser to normalize terms on text fields
188+
*/
189+
public void testNormalizedWildcardQuery() {
190+
TextFieldType ft = createFieldType();
191+
192+
AutomatonQuery actual = (AutomatonQuery) ft.normalizedWildcardQuery("*Butterflies*", null, MOCK_CONTEXT);
193+
AutomatonQuery expected = new WildcardQuery(new Term("field", new BytesRef("*butterflies*")));
194+
assertEquals(expected, actual);
195+
assertTrue(new CharacterRunAutomaton(actual.getAutomaton()).run("some butterflies somewhere"));
196+
assertFalse(new CharacterRunAutomaton(actual.getAutomaton()).run("some Butterflies somewhere"));
197+
198+
ElasticsearchException ee = expectThrows(ElasticsearchException.class,
199+
() -> ft.wildcardQuery("valu*", null, MOCK_CONTEXT_DISALLOW_EXPENSIVE));
200+
assertEquals("[wildcard] queries cannot be executed when 'search.allow_expensive_queries' is set to false.",
201+
ee.getMessage());
202+
}
160203
}

x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,11 @@ private WildcardFieldType(String name, String nullValue, int ignoreAbove,
265265
this.ignoreAbove = ignoreAbove;
266266
}
267267

268+
@Override
269+
public Query normalizedWildcardQuery(String value, MultiTermQuery.RewriteMethod method, SearchExecutionContext context) {
270+
return wildcardQuery(value, method, false, context);
271+
}
272+
268273
@Override
269274
public Query wildcardQuery(String wildcardPattern, RewriteMethod method, boolean caseInsensitive, SearchExecutionContext context) {
270275

0 commit comments

Comments
 (0)