Skip to content

Commit 2f9c731

Browse files
authored
Search - make wildcard field use constant scoring queries for wildcard queries and caching fix (#70452)
* Make wildcard field use constant scoring queries for wildcard queries. Add a note about ignoring rewrite parameters on wildcard queries. Also fixes caching issue where case sensitive and case insensitive results were cached as the same Closes #69604
1 parent 1db2b85 commit 2f9c731

File tree

4 files changed

+62
-43
lines changed

4 files changed

+62
-43
lines changed

docs/reference/mapping/types/wildcard.asciidoc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
=== Wildcard field type
66

77
The `wildcard` field type is a specialized keyword field for unstructured
8-
machine-generated content you plan to search using grep-like
8+
machine-generated content you plan to search using grep-like
99
<<query-dsl-wildcard-query,`wildcard`>> and <<query-dsl-regexp-query,`regexp`>>
1010
queries. The `wildcard` type is optimized for fields with large values or high
1111
cardinality.
@@ -130,4 +130,5 @@ The following parameters are accepted by `wildcard` fields:
130130
==== Limitations
131131

132132
* `wildcard` fields are untokenized like keyword fields, so do not support queries that rely on word positions such as phrase queries.
133+
* When running `wildcard` queries any `rewrite` parameter is ignored. The scoring is always a constant score.
133134

x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/AutomatonQueryOnBinaryDv.java

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525

2626
import java.io.IOException;
2727
import java.util.Objects;
28-
import java.util.function.Supplier;
2928

3029
/**
3130
* Query that runs an Automaton across all binary doc values.
@@ -35,20 +34,16 @@ public class AutomatonQueryOnBinaryDv extends Query {
3534

3635
private final String field;
3736
private final String matchPattern;
38-
private final Supplier<Automaton> automatonSupplier;
37+
private final ByteRunAutomaton bytesMatcher;
3938

40-
public AutomatonQueryOnBinaryDv(String field, String matchPattern, Supplier<Automaton> automatonSupplier) {
39+
public AutomatonQueryOnBinaryDv(String field, String matchPattern, Automaton automaton) {
4140
this.field = field;
4241
this.matchPattern = matchPattern;
43-
this.automatonSupplier = automatonSupplier;
42+
bytesMatcher = new ByteRunAutomaton(automaton);
4443
}
4544

4645
@Override
4746
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
48-
49-
50-
ByteRunAutomaton bytesMatcher = new ByteRunAutomaton(automatonSupplier.get());
51-
5247
return new ConstantScoreWeight(this, boost) {
5348

5449
@Override
@@ -99,12 +94,13 @@ public boolean equals(Object obj) {
9994
return false;
10095
}
10196
AutomatonQueryOnBinaryDv other = (AutomatonQueryOnBinaryDv) obj;
102-
return Objects.equals(field, other.field) && Objects.equals(matchPattern, other.matchPattern);
97+
return Objects.equals(field, other.field) && Objects.equals(matchPattern, other.matchPattern)
98+
&& Objects.equals(bytesMatcher, other.bytesMatcher);
10399
}
104100

105101
@Override
106102
public int hashCode() {
107-
return Objects.hash(field, matchPattern);
103+
return Objects.hash(field, matchPattern, bytesMatcher);
108104
}
109105

110106
}

x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java

Lines changed: 19 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -322,21 +322,17 @@ public Query wildcardQuery(String wildcardPattern, RewriteMethod method, boolean
322322
addClause(string, rewritten, Occur.MUST);
323323
clauseCount++;
324324
}
325-
Supplier<Automaton> deferredAutomatonSupplier = () -> {
326-
if(caseInsensitive) {
327-
return AutomatonQueries.toCaseInsensitiveWildcardAutomaton(new Term(name(), wildcardPattern), Integer.MAX_VALUE);
328-
} else {
329-
return WildcardQuery.toAutomaton(new Term(name(), wildcardPattern));
330-
}
331-
};
332-
AutomatonQueryOnBinaryDv verifyingQuery = new AutomatonQueryOnBinaryDv(name(), wildcardPattern, deferredAutomatonSupplier);
325+
Automaton automaton = caseInsensitive
326+
? AutomatonQueries.toCaseInsensitiveWildcardAutomaton(new Term(name(), wildcardPattern), Integer.MAX_VALUE)
327+
: WildcardQuery.toAutomaton(new Term(name(), wildcardPattern));
328+
AutomatonQueryOnBinaryDv verifyingQuery = new AutomatonQueryOnBinaryDv(name(), wildcardPattern, automaton);
333329
if (clauseCount > 0) {
334330
// We can accelerate execution with the ngram query
335331
BooleanQuery approxQuery = rewritten.build();
336332
BooleanQuery.Builder verifyingBuilder = new BooleanQuery.Builder();
337333
verifyingBuilder.add(new BooleanClause(approxQuery, Occur.MUST));
338334
verifyingBuilder.add(new BooleanClause(verifyingQuery, Occur.MUST));
339-
return verifyingBuilder.build();
335+
return new ConstantScoreQuery(verifyingBuilder.build());
340336
} else if (numWildcardChars == 0 || numWildcardStrings > 0) {
341337
// We have no concrete characters and we're not a pure length query e.g. ???
342338
return new DocValuesFieldExistsQuery(name());
@@ -362,12 +358,9 @@ public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxD
362358
if (approxNgramQuery instanceof MatchAllDocsQuery) {
363359
return existsQuery(context);
364360
}
365-
Supplier<Automaton> deferredAutomatonSupplier = ()-> {
366-
RegExp regex = new RegExp(value, syntaxFlags, matchFlags);
367-
return regex.toAutomaton(maxDeterminizedStates);
368-
};
369-
370-
AutomatonQueryOnBinaryDv verifyingQuery = new AutomatonQueryOnBinaryDv(name(), value, deferredAutomatonSupplier);
361+
RegExp regex = new RegExp(value, syntaxFlags, matchFlags);
362+
Automaton automaton = regex.toAutomaton(maxDeterminizedStates);
363+
AutomatonQueryOnBinaryDv verifyingQuery = new AutomatonQueryOnBinaryDv(name(), value, automaton);
371364

372365
// MatchAllButRequireVerificationQuery is a special case meaning the regex is reduced to a single
373366
// clause which we can't accelerate at all and needs verification. Example would be ".."
@@ -746,9 +739,8 @@ public Query rangeQuery(
746739
}
747740
}
748741
}
749-
Supplier <Automaton> deferredAutomatonSupplier
750-
= () -> TermRangeQuery.toAutomaton(lower, upper, includeLower, includeUpper);
751-
AutomatonQueryOnBinaryDv slowQuery = new AutomatonQueryOnBinaryDv(name(), lower + "-" + upper, deferredAutomatonSupplier);
742+
Automaton automaton = TermRangeQuery.toAutomaton(lower, upper, includeLower, includeUpper);
743+
AutomatonQueryOnBinaryDv slowQuery = new AutomatonQueryOnBinaryDv(name(), lower + "-" + upper, automaton);
752744

753745
if (accelerationQuery == null) {
754746
return slowQuery;
@@ -831,18 +823,15 @@ public Query fuzzyQuery(
831823
bqBuilder.add(ngramQ, Occur.MUST);
832824
}
833825

834-
Supplier <Automaton> deferredAutomatonSupplier = ()->{
835-
// Verification query
836-
FuzzyQuery fq = new FuzzyQuery(
837-
new Term(name(), searchTerm),
838-
fuzziness.asDistance(searchTerm),
839-
prefixLength,
840-
maxExpansions,
841-
transpositions
842-
);
843-
return fq.getAutomata().automaton;
844-
};
845-
bqBuilder.add(new AutomatonQueryOnBinaryDv(name(), searchTerm, deferredAutomatonSupplier), Occur.MUST);
826+
// Verification query
827+
FuzzyQuery fq = new FuzzyQuery(
828+
new Term(name(), searchTerm),
829+
fuzziness.asDistance(searchTerm),
830+
prefixLength,
831+
maxExpansions,
832+
transpositions
833+
);
834+
bqBuilder.add(new AutomatonQueryOnBinaryDv(name(), searchTerm, fq.getAutomata().automaton), Occur.MUST);
846835

847836
return bqBuilder.build();
848837
} catch (IOException ioe) {

x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.lucene.search.BooleanClause;
2323
import org.apache.lucene.search.BooleanClause.Occur;
2424
import org.apache.lucene.search.BooleanQuery;
25+
import org.apache.lucene.search.ConstantScoreQuery;
2526
import org.apache.lucene.search.DocValuesFieldExistsQuery;
2627
import org.apache.lucene.search.IndexSearcher;
2728
import org.apache.lucene.search.MatchAllDocsQuery;
@@ -42,6 +43,7 @@
4243
import org.elasticsearch.Version;
4344
import org.elasticsearch.cluster.metadata.IndexMetadata;
4445
import org.elasticsearch.common.TriFunction;
46+
import org.elasticsearch.common.lucene.search.AutomatonQueries;
4547
import org.elasticsearch.common.settings.Settings;
4648
import org.elasticsearch.common.unit.Fuzziness;
4749
import org.elasticsearch.common.xcontent.XContentBuilder;
@@ -548,20 +550,41 @@ public void testWildcardAcceleration() throws IOException, ParseException {
548550
String expectedAccelerationQueryString = test[1].replaceAll("_", "" + WildcardFieldMapper.TOKEN_START_OR_END_CHAR);
549551
Query wildcardFieldQuery = wildcardFieldType.fieldType().wildcardQuery(pattern, null, MOCK_CONTEXT);
550552
testExpectedAccelerationQuery(pattern, wildcardFieldQuery, expectedAccelerationQueryString);
551-
assertTrue(wildcardFieldQuery instanceof BooleanQuery);
553+
assertTrue(unwrapAnyConstantScore(wildcardFieldQuery) instanceof BooleanQuery);
552554
}
553555

554556
// TODO All these expressions have no acceleration at all and could be improved
555557
String slowPatterns[] = { "??" };
556558
for (String pattern : slowPatterns) {
557559
Query wildcardFieldQuery = wildcardFieldType.fieldType().wildcardQuery(pattern, null, MOCK_CONTEXT);
560+
wildcardFieldQuery = unwrapAnyConstantScore(wildcardFieldQuery);
558561
assertTrue(
559562
pattern + " was not as slow as we assumed " + formatQuery(wildcardFieldQuery),
560563
wildcardFieldQuery instanceof AutomatonQueryOnBinaryDv
561564
);
562565
}
563566

564567
}
568+
569+
public void testQueryCachingEquality() throws IOException, ParseException {
570+
String pattern = "A*b*B?a";
571+
// Case sensitivity matters when it comes to caching
572+
Automaton caseSensitiveAutomaton = WildcardQuery.toAutomaton(new Term("field", pattern));
573+
Automaton caseInSensitiveAutomaton = AutomatonQueries.toCaseInsensitiveWildcardAutomaton(
574+
new Term("field", pattern),
575+
Integer.MAX_VALUE
576+
);
577+
AutomatonQueryOnBinaryDv csQ = new AutomatonQueryOnBinaryDv("field", pattern, caseSensitiveAutomaton);
578+
AutomatonQueryOnBinaryDv ciQ = new AutomatonQueryOnBinaryDv("field", pattern, caseInSensitiveAutomaton);
579+
assertNotEquals(csQ, ciQ);
580+
assertNotEquals(csQ.hashCode(), ciQ.hashCode());
581+
582+
// Same query should be equal
583+
Automaton caseSensitiveAutomaton2 = WildcardQuery.toAutomaton(new Term("field", pattern));
584+
AutomatonQueryOnBinaryDv csQ2 = new AutomatonQueryOnBinaryDv("field", pattern, caseSensitiveAutomaton2);
585+
assertEquals(csQ, csQ2);
586+
assertEquals(csQ.hashCode(), csQ2.hashCode());
587+
}
565588

566589
@Override
567590
protected void minimalMapping(XContentBuilder b) throws IOException {
@@ -719,8 +742,18 @@ void testExpectedAccelerationQuery(String regex, Query combinedQuery, String exp
719742
Query expectedAccelerationQuery = qsp.parse(expectedAccelerationQueryString);
720743
testExpectedAccelerationQuery(regex, combinedQuery, expectedAccelerationQuery);
721744
}
745+
746+
private Query unwrapAnyConstantScore(Query q) {
747+
if (q instanceof ConstantScoreQuery) {
748+
ConstantScoreQuery csq = (ConstantScoreQuery) q;
749+
return csq.getQuery();
750+
} else {
751+
return q;
752+
}
753+
}
754+
722755
void testExpectedAccelerationQuery(String regex, Query combinedQuery, Query expectedAccelerationQuery) throws ParseException {
723-
BooleanQuery cq = (BooleanQuery) combinedQuery;
756+
BooleanQuery cq = (BooleanQuery) unwrapAnyConstantScore(combinedQuery);
724757
assert cq.clauses().size() == 2;
725758
Query approximationQuery = null;
726759
boolean verifyQueryFound = false;

0 commit comments

Comments
 (0)