Skip to content

Commit 0519e37

Browse files
author
Christoph Büscher
authored
Fix case sensitivity rules for wildcard queries on text fields (#71751)
Wildcard queries on text fields should not apply the fields analyzer to the search query. However, we accidentally enabled this in #53127 by moving the query normalization to the StringFieldType super type. This change fixes this by separating the notion of normalization and case insensitivity (as implemented in the `case_insensitive` flag). This is done because we still need to maintain normalization of the query sting when the wildcard query method on the field type is requested from the `query_string` query parser. Wildcard queries on keyword fields should also continue to apply the fields normalizer, regardless of whether the `case_insensitive` is set, because normalization could involve something else than lowercasing (e.g. substituting umlauts like in the GermanNormalizationFilter). Closes #71403
1 parent b4f1851 commit 0519e37

File tree

9 files changed

+95
-5
lines changed

9 files changed

+95
-5
lines changed

plugins/analysis-icu/src/main/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapper.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import com.ibm.icu.text.RawCollationKey;
1313
import com.ibm.icu.text.RuleBasedCollator;
1414
import com.ibm.icu.util.ULocale;
15+
1516
import org.apache.lucene.document.Field;
1617
import org.apache.lucene.document.FieldType;
1718
import org.apache.lucene.document.SortedSetDocValuesField;

server/src/internalClusterTest/java/org/elasticsearch/search/query/SearchQueryIT.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1790,7 +1790,7 @@ public void testWildcardQueryNormalizationOnKeywordField() {
17901790
}
17911791

17921792
/**
1793-
* Test that wildcard queries on text fields get normalized
1793+
* Test that wildcard queries on text fields don't get normalized
17941794
*/
17951795
public void testWildcardQueryNormalizationOnTextField() {
17961796
assertAcked(prepareCreate("test")
@@ -1806,6 +1806,11 @@ public void testWildcardQueryNormalizationOnTextField() {
18061806
{
18071807
WildcardQueryBuilder wildCardQuery = wildcardQuery("field1", "Bb*");
18081808
SearchResponse searchResponse = client().prepareSearch().setQuery(wildCardQuery).get();
1809+
assertHitCount(searchResponse, 0L);
1810+
1811+
// the following works not because of normalization but because of the `case_insensitive` parameter
1812+
wildCardQuery = wildcardQuery("field1", "Bb*").caseInsensitive(true);
1813+
searchResponse = client().prepareSearch().setQuery(wildCardQuery).get();
18091814
assertHitCount(searchResponse, 1L);
18101815

18111816
wildCardQuery = wildcardQuery("field1", "bb*");

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
import org.apache.lucene.document.SortedSetDocValuesField;
1616
import org.apache.lucene.index.IndexOptions;
1717
import org.apache.lucene.index.LeafReaderContext;
18+
import org.apache.lucene.search.MultiTermQuery;
19+
import org.apache.lucene.search.Query;
1820
import org.apache.lucene.util.BytesRef;
1921
import org.elasticsearch.common.lucene.Lucene;
2022
import org.elasticsearch.common.xcontent.XContentParser;
@@ -322,6 +324,19 @@ protected BytesRef indexedValueForSearch(Object value) {
322324
return getTextSearchInfo().getSearchAnalyzer().normalize(name(), value.toString());
323325
}
324326

327+
/**
328+
* Wildcard queries on keyword fields use the normalizer of the underlying field, regardless of their case sensitivity option
329+
*/
330+
@Override
331+
public Query wildcardQuery(
332+
String value,
333+
MultiTermQuery.RewriteMethod method,
334+
boolean caseInsensitive,
335+
SearchExecutionContext context
336+
) {
337+
return super.wildcardQuery(value, method, caseInsensitive, true, context);
338+
}
339+
325340
@Override
326341
public CollapseType collapseType() {
327342
return CollapseType.KEYWORD;
@@ -332,6 +347,7 @@ public CollapseType collapseType() {
332347
public int ignoreAbove() {
333348
return ignoreAbove;
334349
}
350+
335351
}
336352

337353
private final boolean indexed;

server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@
3535
import org.elasticsearch.index.fielddata.IndexFieldData;
3636
import org.elasticsearch.index.query.DistanceFeatureQueryBuilder;
3737
import org.elasticsearch.index.query.QueryRewriteContext;
38-
import org.elasticsearch.index.query.SearchExecutionContext;
3938
import org.elasticsearch.index.query.QueryShardException;
39+
import org.elasticsearch.index.query.SearchExecutionContext;
4040
import org.elasticsearch.search.DocValueFormat;
4141
import org.elasticsearch.search.fetch.subphase.FetchFieldsPhase;
4242
import org.elasticsearch.search.lookup.SearchLookup;
@@ -236,6 +236,11 @@ public Query wildcardQuery(String value,
236236
+ "] which is of type [" + typeName() + "]");
237237
}
238238

239+
public Query normalizedWildcardQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, SearchExecutionContext context) {
240+
throw new QueryShardException(context, "Can only use wildcard queries on keyword, text and wildcard fields - not on [" + name
241+
+ "] which is of type [" + typeName() + "]");
242+
}
243+
239244
public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxDeterminizedStates,
240245
@Nullable MultiTermQuery.RewriteMethod method, SearchExecutionContext context) {
241246
throw new QueryShardException(context, "Can only use regexp queries on keyword and text fields - not on [" + name

server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,14 +113,30 @@ public static final String normalizeWildcardPattern(String fieldname, String val
113113

114114
@Override
115115
public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, boolean caseInsensitive, SearchExecutionContext context) {
116+
return wildcardQuery(value, method, caseInsensitive, false, context);
117+
}
118+
119+
120+
@Override
121+
public Query normalizedWildcardQuery(String value, MultiTermQuery.RewriteMethod method, SearchExecutionContext context) {
122+
return wildcardQuery(value, method, false, true, context);
123+
}
124+
125+
protected Query wildcardQuery(
126+
String value,
127+
MultiTermQuery.RewriteMethod method,
128+
boolean caseInsensitive,
129+
boolean shouldNormalize,
130+
SearchExecutionContext context
131+
) {
116132
failIfNotIndexed();
117133
if (context.allowExpensiveQueries() == false) {
118134
throw new ElasticsearchException("[wildcard] queries cannot be executed when '" +
119135
ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to false.");
120136
}
121137

122138
Term term;
123-
if (getTextSearchInfo().getSearchAnalyzer() != null) {
139+
if (getTextSearchInfo().getSearchAnalyzer() != null && shouldNormalize) {
124140
value = normalizeWildcardPattern(name(), value, getTextSearchInfo().getSearchAnalyzer());
125141
term = new Term(name(), value);
126142
} else {

server/src/main/java/org/elasticsearch/index/search/QueryStringQueryParser.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -682,7 +682,7 @@ private Query getWildcardQuerySingle(String field, String termStr) throws ParseE
682682
if (getAllowLeadingWildcard() == false && (termStr.startsWith("*") || termStr.startsWith("?"))) {
683683
throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery");
684684
}
685-
return currentFieldType.wildcardQuery(termStr, getMultiTermRewriteMethod(), context);
685+
return currentFieldType.normalizedWildcardQuery(termStr, getMultiTermRewriteMethod(), context);
686686
} catch (RuntimeException e) {
687687
if (lenient) {
688688
return newLenientFieldQuery(field, e);

server/src/test/java/org/elasticsearch/index/mapper/IndexFieldTypeTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
import org.elasticsearch.common.regex.Regex;
1515
import org.elasticsearch.common.settings.Settings;
1616
import org.elasticsearch.index.IndexSettings;
17-
import org.elasticsearch.index.query.SearchExecutionContext;
1817
import org.elasticsearch.index.query.QueryShardException;
18+
import org.elasticsearch.index.query.SearchExecutionContext;
1919
import org.elasticsearch.test.ESTestCase;
2020

2121
import java.util.function.Predicate;

server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,11 @@
2121
import org.apache.lucene.search.TermInSetQuery;
2222
import org.apache.lucene.search.TermQuery;
2323
import org.apache.lucene.search.TermRangeQuery;
24+
import org.apache.lucene.search.WildcardQuery;
2425
import org.apache.lucene.util.BytesRef;
2526
import org.apache.lucene.util.automaton.Automata;
2627
import org.apache.lucene.util.automaton.Automaton;
28+
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
2729
import org.apache.lucene.util.automaton.Operations;
2830
import org.elasticsearch.ElasticsearchException;
2931
import org.elasticsearch.common.lucene.BytesRefs;
@@ -161,6 +163,46 @@ public void testFetchSourceValue() throws IOException {
161163
assertEquals(List.of("true"), fetchSourceValue(fieldType, true));
162164
}
163165

166+
public void testWildcardQuery() {
167+
TextFieldType ft = createFieldType();
168+
169+
// case sensitive
170+
AutomatonQuery actual = (AutomatonQuery) ft.wildcardQuery("*Butterflies*", null, false, MOCK_CONTEXT);
171+
AutomatonQuery expected = new WildcardQuery(new Term("field", new BytesRef("*Butterflies*")));
172+
assertEquals(expected, actual);
173+
assertFalse(new CharacterRunAutomaton(actual.getAutomaton()).run("some butterflies somewhere"));
174+
175+
// case insensitive
176+
actual = (AutomatonQuery) ft.wildcardQuery("*Butterflies*", null, true, MOCK_CONTEXT);
177+
expected = AutomatonQueries.caseInsensitiveWildcardQuery(new Term("field", new BytesRef("*Butterflies*")));
178+
assertEquals(expected, actual);
179+
assertTrue(new CharacterRunAutomaton(actual.getAutomaton()).run("some butterflies somewhere"));
180+
assertTrue(new CharacterRunAutomaton(actual.getAutomaton()).run("some Butterflies somewhere"));
181+
182+
ElasticsearchException ee = expectThrows(ElasticsearchException.class,
183+
() -> ft.wildcardQuery("valu*", null, MOCK_CONTEXT_DISALLOW_EXPENSIVE));
184+
assertEquals("[wildcard] queries cannot be executed when 'search.allow_expensive_queries' is set to false.",
185+
ee.getMessage());
186+
}
187+
188+
/**
189+
* we use this e.g. in query string query parser to normalize terms on text fields
190+
*/
191+
public void testNormalizedWildcardQuery() {
192+
TextFieldType ft = createFieldType();
193+
194+
AutomatonQuery actual = (AutomatonQuery) ft.normalizedWildcardQuery("*Butterflies*", null, MOCK_CONTEXT);
195+
AutomatonQuery expected = new WildcardQuery(new Term("field", new BytesRef("*butterflies*")));
196+
assertEquals(expected, actual);
197+
assertTrue(new CharacterRunAutomaton(actual.getAutomaton()).run("some butterflies somewhere"));
198+
assertFalse(new CharacterRunAutomaton(actual.getAutomaton()).run("some Butterflies somewhere"));
199+
200+
ElasticsearchException ee = expectThrows(ElasticsearchException.class,
201+
() -> ft.wildcardQuery("valu*", null, MOCK_CONTEXT_DISALLOW_EXPENSIVE));
202+
assertEquals("[wildcard] queries cannot be executed when 'search.allow_expensive_queries' is set to false.",
203+
ee.getMessage());
204+
}
205+
164206
public void testTermIntervals() throws IOException {
165207
MappedFieldType ft = createFieldType();
166208
IntervalsSource termIntervals = ft.termIntervals(new BytesRef("foo"), MOCK_CONTEXT);

x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,11 @@ private WildcardFieldType(String name, String nullValue, int ignoreAbove,
265265
this.ignoreAbove = ignoreAbove;
266266
}
267267

268+
@Override
269+
public Query normalizedWildcardQuery(String value, MultiTermQuery.RewriteMethod method, SearchExecutionContext context) {
270+
return wildcardQuery(value, method, false, context);
271+
}
272+
268273
@Override
269274
public Query wildcardQuery(String wildcardPattern, RewriteMethod method, boolean caseInsensitive, SearchExecutionContext context) {
270275

0 commit comments

Comments
 (0)