Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/reference/mapping/types/wildcard.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
=== Wildcard field type

The `wildcard` field type is a specialized keyword field for unstructured
machine-generated content you plan to search using grep-like
machine-generated content you plan to search using grep-like
<<query-dsl-wildcard-query,`wildcard`>> and <<query-dsl-regexp-query,`regexp`>>
queries. The `wildcard` type is optimized for fields with large values or high
cardinality.
Expand Down Expand Up @@ -130,4 +130,5 @@ The following parameters are accepted by `wildcard` fields:
==== Limitations

* `wildcard` fields are untokenized like keyword fields, so do not support queries that rely on word positions such as phrase queries.
* When running `wildcard` queries any `rewrite` parameter is ignored. The scoring is always a constant score.

Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@

import java.io.IOException;
import java.util.Objects;
import java.util.function.Supplier;

/**
* Query that runs an Automaton across all binary doc values.
Expand All @@ -35,20 +34,16 @@ public class AutomatonQueryOnBinaryDv extends Query {

private final String field;
private final String matchPattern;
private final Supplier<Automaton> automatonSupplier;
private final ByteRunAutomaton bytesMatcher;

public AutomatonQueryOnBinaryDv(String field, String matchPattern, Supplier<Automaton> automatonSupplier) {
public AutomatonQueryOnBinaryDv(String field, String matchPattern, Automaton automaton) {
this.field = field;
this.matchPattern = matchPattern;
this.automatonSupplier = automatonSupplier;
bytesMatcher = new ByteRunAutomaton(automaton);
}

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {


ByteRunAutomaton bytesMatcher = new ByteRunAutomaton(automatonSupplier.get());

return new ConstantScoreWeight(this, boost) {

@Override
Expand Down Expand Up @@ -99,12 +94,13 @@ public boolean equals(Object obj) {
return false;
}
AutomatonQueryOnBinaryDv other = (AutomatonQueryOnBinaryDv) obj;
return Objects.equals(field, other.field) && Objects.equals(matchPattern, other.matchPattern);
return Objects.equals(field, other.field) && Objects.equals(matchPattern, other.matchPattern)
&& Objects.equals(bytesMatcher, other.bytesMatcher);
}

@Override
public int hashCode() {
return Objects.hash(field, matchPattern);
return Objects.hash(field, matchPattern, bytesMatcher);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -322,21 +322,17 @@ public Query wildcardQuery(String wildcardPattern, RewriteMethod method, boolean
addClause(string, rewritten, Occur.MUST);
clauseCount++;
}
Supplier<Automaton> deferredAutomatonSupplier = () -> {
if(caseInsensitive) {
return AutomatonQueries.toCaseInsensitiveWildcardAutomaton(new Term(name(), wildcardPattern), Integer.MAX_VALUE);
} else {
return WildcardQuery.toAutomaton(new Term(name(), wildcardPattern));
}
};
AutomatonQueryOnBinaryDv verifyingQuery = new AutomatonQueryOnBinaryDv(name(), wildcardPattern, deferredAutomatonSupplier);
Automaton automaton = caseInsensitive
? AutomatonQueries.toCaseInsensitiveWildcardAutomaton(new Term(name(), wildcardPattern), Integer.MAX_VALUE)
: WildcardQuery.toAutomaton(new Term(name(), wildcardPattern));
AutomatonQueryOnBinaryDv verifyingQuery = new AutomatonQueryOnBinaryDv(name(), wildcardPattern, automaton);
if (clauseCount > 0) {
// We can accelerate execution with the ngram query
BooleanQuery approxQuery = rewritten.build();
BooleanQuery.Builder verifyingBuilder = new BooleanQuery.Builder();
verifyingBuilder.add(new BooleanClause(approxQuery, Occur.MUST));
verifyingBuilder.add(new BooleanClause(verifyingQuery, Occur.MUST));
return verifyingBuilder.build();
return new ConstantScoreQuery(verifyingBuilder.build());
} else if (numWildcardChars == 0 || numWildcardStrings > 0) {
// We have no concrete characters and we're not a pure length query e.g. ???
return new DocValuesFieldExistsQuery(name());
Expand All @@ -362,12 +358,9 @@ public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxD
if (approxNgramQuery instanceof MatchAllDocsQuery) {
return existsQuery(context);
}
Supplier<Automaton> deferredAutomatonSupplier = ()-> {
RegExp regex = new RegExp(value, syntaxFlags, matchFlags);
return regex.toAutomaton(maxDeterminizedStates);
};

AutomatonQueryOnBinaryDv verifyingQuery = new AutomatonQueryOnBinaryDv(name(), value, deferredAutomatonSupplier);
RegExp regex = new RegExp(value, syntaxFlags, matchFlags);
Automaton automaton = regex.toAutomaton(maxDeterminizedStates);
AutomatonQueryOnBinaryDv verifyingQuery = new AutomatonQueryOnBinaryDv(name(), value, automaton);

// MatchAllButRequireVerificationQuery is a special case meaning the regex is reduced to a single
// clause which we can't accelerate at all and needs verification. Example would be ".."
Expand Down Expand Up @@ -746,9 +739,8 @@ public Query rangeQuery(
}
}
}
Supplier <Automaton> deferredAutomatonSupplier
= () -> TermRangeQuery.toAutomaton(lower, upper, includeLower, includeUpper);
AutomatonQueryOnBinaryDv slowQuery = new AutomatonQueryOnBinaryDv(name(), lower + "-" + upper, deferredAutomatonSupplier);
Automaton automaton = TermRangeQuery.toAutomaton(lower, upper, includeLower, includeUpper);
AutomatonQueryOnBinaryDv slowQuery = new AutomatonQueryOnBinaryDv(name(), lower + "-" + upper, automaton);

if (accelerationQuery == null) {
return slowQuery;
Expand Down Expand Up @@ -831,18 +823,15 @@ public Query fuzzyQuery(
bqBuilder.add(ngramQ, Occur.MUST);
}

Supplier <Automaton> deferredAutomatonSupplier = ()->{
// Verification query
FuzzyQuery fq = new FuzzyQuery(
new Term(name(), searchTerm),
fuzziness.asDistance(searchTerm),
prefixLength,
maxExpansions,
transpositions
);
return fq.getAutomata().automaton;
};
bqBuilder.add(new AutomatonQueryOnBinaryDv(name(), searchTerm, deferredAutomatonSupplier), Occur.MUST);
// Verification query
FuzzyQuery fq = new FuzzyQuery(
new Term(name(), searchTerm),
fuzziness.asDistance(searchTerm),
prefixLength,
maxExpansions,
transpositions
);
bqBuilder.add(new AutomatonQueryOnBinaryDv(name(), searchTerm, fq.getAutomata().automaton), Occur.MUST);

return bqBuilder.build();
} catch (IOException ioe) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
Expand All @@ -42,6 +43,7 @@
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.TriFunction;
import org.elasticsearch.common.lucene.search.AutomatonQueries;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.common.xcontent.XContentBuilder;
Expand Down Expand Up @@ -548,20 +550,41 @@ public void testWildcardAcceleration() throws IOException, ParseException {
String expectedAccelerationQueryString = test[1].replaceAll("_", "" + WildcardFieldMapper.TOKEN_START_OR_END_CHAR);
Query wildcardFieldQuery = wildcardFieldType.fieldType().wildcardQuery(pattern, null, MOCK_CONTEXT);
testExpectedAccelerationQuery(pattern, wildcardFieldQuery, expectedAccelerationQueryString);
assertTrue(wildcardFieldQuery instanceof BooleanQuery);
assertTrue(unwrapAnyConstantScore(wildcardFieldQuery) instanceof BooleanQuery);
}

// TODO All these expressions have no acceleration at all and could be improved
String slowPatterns[] = { "??" };
for (String pattern : slowPatterns) {
Query wildcardFieldQuery = wildcardFieldType.fieldType().wildcardQuery(pattern, null, MOCK_CONTEXT);
wildcardFieldQuery = unwrapAnyConstantScore(wildcardFieldQuery);
assertTrue(
pattern + " was not as slow as we assumed " + formatQuery(wildcardFieldQuery),
wildcardFieldQuery instanceof AutomatonQueryOnBinaryDv
);
}

}

public void testQueryCachingEquality() throws IOException, ParseException {
String pattern = "A*b*B?a";
// Case sensitivity matters when it comes to caching
Automaton caseSensitiveAutomaton = WildcardQuery.toAutomaton(new Term("field", pattern));
Automaton caseInSensitiveAutomaton = AutomatonQueries.toCaseInsensitiveWildcardAutomaton(
new Term("field", pattern),
Integer.MAX_VALUE
);
AutomatonQueryOnBinaryDv csQ = new AutomatonQueryOnBinaryDv("field", pattern, caseSensitiveAutomaton);
AutomatonQueryOnBinaryDv ciQ = new AutomatonQueryOnBinaryDv("field", pattern, caseInSensitiveAutomaton);
assertNotEquals(csQ, ciQ);
assertNotEquals(csQ.hashCode(), ciQ.hashCode());

// Same query should be equal
Automaton caseSensitiveAutomaton2 = WildcardQuery.toAutomaton(new Term("field", pattern));
AutomatonQueryOnBinaryDv csQ2 = new AutomatonQueryOnBinaryDv("field", pattern, caseSensitiveAutomaton2);
assertEquals(csQ, csQ2);
assertEquals(csQ.hashCode(), csQ2.hashCode());
}

@Override
protected void minimalMapping(XContentBuilder b) throws IOException {
Expand Down Expand Up @@ -719,8 +742,18 @@ void testExpectedAccelerationQuery(String regex, Query combinedQuery, String exp
Query expectedAccelerationQuery = qsp.parse(expectedAccelerationQueryString);
testExpectedAccelerationQuery(regex, combinedQuery, expectedAccelerationQuery);
}

private Query unwrapAnyConstantScore(Query q) {
if (q instanceof ConstantScoreQuery) {
ConstantScoreQuery csq = (ConstantScoreQuery) q;
return csq.getQuery();
} else {
return q;
}
}

void testExpectedAccelerationQuery(String regex, Query combinedQuery, Query expectedAccelerationQuery) throws ParseException {
BooleanQuery cq = (BooleanQuery) combinedQuery;
BooleanQuery cq = (BooleanQuery) unwrapAnyConstantScore(combinedQuery);
assert cq.clauses().size() == 2;
Query approximationQuery = null;
boolean verifyQueryFound = false;
Expand Down