Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/96265.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 96265
summary: Reduce nesting of same bool queries
area: Query Languages
type: enhancement
issues:
- 96236
Original file line number Diff line number Diff line change
Expand Up @@ -208,14 +208,14 @@ twoFunctionsEqualsBooleanLiterals-caseSensitive
process where endsWith(process_path, "x") == true and endsWith(process_path, "yx") != true
;
{"bool":{"must":[{"wildcard":{"process_path":{"wildcard":"*x","boost":1.0}}},
{"bool":{"must_not":[{"wildcard":{"process_path":{"wildcard":"*yx","boost":1.0}}}],"boost":1.0}}],"boost":1.0}}
{"bool":{"must_not":[{"wildcard":{"process_path":{"wildcard":"*yx","boost":1.0}}}],"boost":1.0}}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering if this change is not affecting the score of a query that uses bools and returns the SCORE() as well.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Confirmed with the search team that the two types of queries are equivalent as Lucene normalizes them to the same query (the one created above) when minimum_should_query is the default (1 - which it is).

;

twoFunctionsEqualsBooleanLiterals-insensitive
process where endsWith~(process_path, "x") == true and endsWith~(process_path, "yx") != true
;
{"bool":{"must":[{"wildcard":{"process_path":{"wildcard":"*x","case_insensitive":true,"boost":1.0}}},
{"bool":{"must_not":[{"wildcard":{"process_path":{"wildcard":"*yx","case_insensitive":true,"boost":1.0}}}],"boost":1.0}}],"boost":1.0}}
{"bool":{"must_not":[{"wildcard":{"process_path":{"wildcard":"*yx","case_insensitive":true,"boost":1.0}}}],"boost":1.0}}
;

endsWithKeywordFieldFunction-caseSensitive
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,14 @@
import org.elasticsearch.xpack.ql.tree.Source;
import org.elasticsearch.xpack.ql.type.DataTypes;
import org.elasticsearch.xpack.ql.util.Check;
import org.elasticsearch.xpack.ql.util.CollectionUtils;

import java.time.OffsetTime;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.temporal.TemporalAccessor;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
Expand Down Expand Up @@ -464,6 +466,15 @@ private static Query boolQuery(Source source, Query left, Query right, boolean i
if (right == null) {
return left;
}
return new BoolQuery(source, isAnd, left, right);
List<Query> queries;
// check if either side is already a bool query to an extra bool query
if (left instanceof BoolQuery bool && bool.isAnd() == isAnd) {
queries = CollectionUtils.combine(bool.queries(), right);
} else if (right instanceof BoolQuery bool && bool.isAnd() == isAnd) {
queries = CollectionUtils.combine(bool.queries(), left);
} else {
queries = Arrays.asList(left, right);
}
return new BoolQuery(source, isAnd, queries);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,15 @@
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.search.sort.NestedSortBuilder;
import org.elasticsearch.xpack.ql.QlIllegalArgumentException;
import org.elasticsearch.xpack.ql.tree.Source;
import org.elasticsearch.xpack.ql.util.CollectionUtils;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.StringJoiner;

import static org.elasticsearch.index.query.QueryBuilders.boolQuery;

Expand All @@ -23,52 +29,59 @@ public class BoolQuery extends Query {
* {@code true} for boolean {@code AND}, {@code false} for boolean {@code OR}.
*/
private final boolean isAnd;
private final Query left;
private final Query right;
private final List<Query> queries;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Imo, this change is a bit forced, conceptually speaking. A boolean query by definition is a boolean operation between two expressions, just like a math operation is. Having multiple expressions ALL linked together by either OR or AND seems like a specialized boolean query, MultiBoolQuery if you want. The BoolQueryBuilder in ES is built similarly, but is generalized: a list of must, a list of should, a list of filters etc.

I would have expected a solution around "flattening" the bool statements, but not touching the BoolQuery in this way.

Copy link
Member Author

@costin costin May 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Balancing the tree would have been another alternative but I opted for the above because it simplifies the Lucene query which is what triggers the exception in the first place.
Based on napkin math a balanced tree has n-2 intermediate nodes (3 ORs - 1 intermediate node, 4 ORs - 2, 5 ORs - 3, etc..). This implementation truly flattens the OR to just 1 node regardless of how many clauses there are.


public BoolQuery(Source source, boolean isAnd, Query left, Query right) {
this(source, isAnd, Arrays.asList(left, right));
}

public BoolQuery(Source source, boolean isAnd, List<Query> queries) {
super(source);
if (left == null) {
throw new IllegalArgumentException("left is required");
}
if (right == null) {
throw new IllegalArgumentException("right is required");
if (CollectionUtils.isEmpty(queries) || queries.size() < 2) {
throw new QlIllegalArgumentException("At least two queries required by bool query");
}
this.isAnd = isAnd;
this.left = left;
this.right = right;
this.queries = queries;
}

@Override
public boolean containsNestedField(String path, String field) {
return left.containsNestedField(path, field) || right.containsNestedField(path, field);
for (Query query : queries) {
if (query.containsNestedField(path, field)) {
return true;
}
}
return false;
}

@Override
public Query addNestedField(String path, String field, String format, boolean hasDocValues) {
Query rewrittenLeft = left.addNestedField(path, field, format, hasDocValues);
Query rewrittenRight = right.addNestedField(path, field, format, hasDocValues);
if (rewrittenLeft == left && rewrittenRight == right) {
return this;
boolean unchanged = true;
List<Query> rewritten = new ArrayList<>(queries.size());
for (Query query : queries) {
var rewrittenQuery = query.addNestedField(path, field, format, hasDocValues);
unchanged &= rewrittenQuery == query;
rewritten.add(rewrittenQuery);
}
return new BoolQuery(source(), isAnd, rewrittenLeft, rewrittenRight);
return unchanged ? this : new BoolQuery(source(), isAnd, rewritten);
}

@Override
public void enrichNestedSort(NestedSortBuilder sort) {
left.enrichNestedSort(sort);
right.enrichNestedSort(sort);
for (Query query : queries) {
query.enrichNestedSort(sort);
}
}

@Override
public QueryBuilder asBuilder() {
BoolQueryBuilder boolQuery = boolQuery();
if (isAnd) {
boolQuery.must(left.asBuilder());
boolQuery.must(right.asBuilder());
} else {
boolQuery.should(left.asBuilder());
boolQuery.should(right.asBuilder());
for (Query query : queries) {
if (isAnd) {
boolQuery.must(query.asBuilder());
} else {
boolQuery.should(query.asBuilder());
}
}
return boolQuery;
}
Expand All @@ -77,17 +90,13 @@ public boolean isAnd() {
return isAnd;
}

public Query left() {
return left;
}

public Query right() {
return right;
public List<Query> queries() {
return queries;
}

@Override
public int hashCode() {
return Objects.hash(super.hashCode(), isAnd, left, right);
return Objects.hash(super.hashCode(), isAnd, queries);
}

@Override
Expand All @@ -96,11 +105,15 @@ public boolean equals(Object obj) {
return false;
}
BoolQuery other = (BoolQuery) obj;
return isAnd == other.isAnd && left.equals(other.left) && right.equals(other.right);
return isAnd == other.isAnd && queries.equals(other.queries);
}

@Override
protected String innerToString() {
return left + (isAnd ? " AND " : " OR ") + right;
StringJoiner sb = new StringJoiner(isAnd ? " AND " : " OR ");
for (Query query : queries) {
sb.add(query.toString());
}
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

import static java.util.Collections.singletonMap;
import static org.elasticsearch.test.EqualsHashCodeTestUtils.checkEqualsAndHashCode;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.hasSize;

public class BoolQueryTests extends ESTestCase {
static BoolQuery randomBoolQuery(int depth) {
Expand All @@ -35,15 +37,15 @@ public void testEqualsAndHashCode() {
}

private static BoolQuery copy(BoolQuery query) {
return new BoolQuery(query.source(), query.isAnd(), query.left(), query.right());
return new BoolQuery(query.source(), query.isAnd(), query.queries());
}

private static BoolQuery mutate(BoolQuery query) {
List<Function<BoolQuery, BoolQuery>> options = Arrays.asList(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test doesn't fully reflect the changes in this PR, but I'm ok with it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed it nevertheless.

q -> new BoolQuery(SourceTests.mutate(q.source()), q.isAnd(), q.left(), q.right()),
q -> new BoolQuery(q.source(), false == q.isAnd(), q.left(), q.right()),
q -> new BoolQuery(q.source(), q.isAnd(), randomValueOtherThan(q.left(), () -> NestedQueryTests.randomQuery(5)), q.right()),
q -> new BoolQuery(q.source(), q.isAnd(), q.left(), randomValueOtherThan(q.right(), () -> NestedQueryTests.randomQuery(5)))
q -> new BoolQuery(SourceTests.mutate(q.source()), q.isAnd(), left(q), right(q)),
q -> new BoolQuery(q.source(), false == q.isAnd(), left(q), right(q)),
q -> new BoolQuery(q.source(), q.isAnd(), randomValueOtherThan(left(q), () -> NestedQueryTests.randomQuery(5)), right(q)),
q -> new BoolQuery(q.source(), q.isAnd(), left(q), randomValueOtherThan(right(q), () -> NestedQueryTests.randomQuery(5)))
);
return randomFrom(options).apply(query);
}
Expand Down Expand Up @@ -124,4 +126,19 @@ public void testToString() {
).toString()
);
}

public static Query left(BoolQuery bool) {
return indexOf(bool, 0);
}

public static Query right(BoolQuery bool) {
return indexOf(bool, 1);
}

private static Query indexOf(BoolQuery bool, int index) {
List<Query> queries = bool.queries();
assertThat(queries, hasSize(greaterThanOrEqualTo(2)));
return queries.get(index);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@

import static java.util.Arrays.asList;
import static org.elasticsearch.xpack.ql.expression.Literal.TRUE;
import static org.elasticsearch.xpack.ql.querydsl.query.BoolQueryTests.left;
import static org.elasticsearch.xpack.ql.querydsl.query.BoolQueryTests.right;
import static org.elasticsearch.xpack.ql.type.DataTypes.DATETIME;
import static org.elasticsearch.xpack.ql.type.DataTypes.DOUBLE;
import static org.elasticsearch.xpack.ql.type.DataTypes.INTEGER;
Expand All @@ -111,6 +113,7 @@
import static org.hamcrest.Matchers.endsWith;
import static org.hamcrest.Matchers.everyItem;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.startsWith;

Expand Down Expand Up @@ -740,11 +743,11 @@ public void testInExpressionWhereClauseDatetime() {
assertTrue(query instanceof BoolQuery);
BoolQuery bq = (BoolQuery) query;
assertFalse(bq.isAnd());
assertTrue(bq.left() instanceof RangeQuery);
assertTrue(bq.right() instanceof RangeQuery);
assertTrue(left(bq) instanceof RangeQuery);
assertTrue(right(bq) instanceof RangeQuery);
List<Tuple<String, RangeQuery>> tuples = asList(
new Tuple<>(dates[0], (RangeQuery) bq.left()),
new Tuple<>(dates[1], (RangeQuery) bq.right())
new Tuple<>(dates[0], (RangeQuery) left(bq)),
new Tuple<>(dates[1], (RangeQuery) right(bq))
);

for (Tuple<String, RangeQuery> t : tuples) {
Expand Down Expand Up @@ -834,12 +837,12 @@ public void testDifferentLikeAndNotLikePatterns() {
assertEquals(BoolQuery.class, qt.query.getClass());
BoolQuery bq = ((BoolQuery) qt.query);
assertTrue(bq.isAnd());
assertTrue(bq.left() instanceof WildcardQuery);
assertTrue(bq.right() instanceof NotQuery);
assertTrue(left(bq) instanceof WildcardQuery);
assertTrue(right(bq) instanceof NotQuery);

NotQuery nq = (NotQuery) bq.right();
NotQuery nq = (NotQuery) right(bq);
assertTrue(nq.child() instanceof WildcardQuery);
WildcardQuery lqsq = (WildcardQuery) bq.left();
WildcardQuery lqsq = (WildcardQuery) left(bq);
WildcardQuery rqsq = (WildcardQuery) nq.child();

assertEquals("X*", lqsq.query());
Expand Down Expand Up @@ -879,12 +882,12 @@ private void assertDifferentRLikeAndNotRLikePatterns(String firstPattern, String
assertEquals(BoolQuery.class, qt.query.getClass());
BoolQuery bq = ((BoolQuery) qt.query);
assertTrue(bq.isAnd());
assertTrue(bq.left() instanceof RegexQuery);
assertTrue(bq.right() instanceof NotQuery);
assertTrue(left(bq) instanceof RegexQuery);
assertTrue(right(bq) instanceof NotQuery);

NotQuery nq = (NotQuery) bq.right();
NotQuery nq = (NotQuery) right(bq);
assertTrue(nq.child() instanceof RegexQuery);
RegexQuery lqsq = (RegexQuery) bq.left();
RegexQuery lqsq = (RegexQuery) left(bq);
RegexQuery rqsq = (RegexQuery) nq.child();

assertEquals(firstPattern, lqsq.regex());
Expand All @@ -906,14 +909,14 @@ public void testStartsWithUsesPrefixQuery() {
BoolQuery bq = (BoolQuery) translation.query;

assertFalse(bq.isAnd());
assertTrue(bq.left() instanceof PrefixQuery);
assertTrue(bq.right() instanceof PrefixQuery);
assertTrue(left(bq) instanceof PrefixQuery);
assertTrue(right(bq) instanceof PrefixQuery);

PrefixQuery pqr = (PrefixQuery) bq.right();
PrefixQuery pqr = (PrefixQuery) right(bq);
assertEquals("keyword", pqr.field());
assertEquals("y", pqr.query());

PrefixQuery pql = (PrefixQuery) bq.left();
PrefixQuery pql = (PrefixQuery) left(bq);
assertEquals("keyword", pql.field());
assertEquals("x", pql.query());
}
Expand All @@ -934,20 +937,21 @@ public void testStartsWithUsesPrefixQueryAndScript() {
BoolQuery bq = (BoolQuery) translation.query;

assertTrue(bq.isAnd());
assertTrue(bq.left() instanceof BoolQuery);
assertTrue(bq.right() instanceof ScriptQuery);
List<Query> queries = bq.queries();
assertThat(queries, hasSize(3));
assertTrue(queries.get(0) instanceof PrefixQuery);
assertTrue(queries.get(1) instanceof PrefixQuery);
assertTrue(queries.get(2) instanceof ScriptQuery);

BoolQuery bbq = (BoolQuery) bq.left();
assertTrue(bbq.isAnd());
PrefixQuery pqr = (PrefixQuery) bbq.right();
PrefixQuery pqr = (PrefixQuery) queries.get(0);
assertEquals("keyword", pqr.field());
assertEquals("xy", pqr.query());
assertEquals("x", pqr.query());

PrefixQuery pql = (PrefixQuery) bbq.left();
PrefixQuery pql = (PrefixQuery) queries.get(1);
assertEquals("keyword", pql.field());
assertEquals("x", pql.query());
assertEquals("xy", pql.query());

ScriptQuery sq = (ScriptQuery) bq.right();
ScriptQuery sq = (ScriptQuery) queries.get(2);
assertEquals(
"InternalQlScriptUtils.nullSafeFilter(InternalQlScriptUtils.startsWith("
+ "InternalSqlScriptUtils.lcase(InternalQlScriptUtils.docValue(doc,params.v0)), "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,21 @@ SELECT * FROM test WHERE some.string LIKE '%a%';
"query":{"wildcard":{"some.string.typical":{"wildcard":"*a*",
;

LikeOnInexactWithOR
SELECT * FROM test WHERE some.string LIKE '%a%' OR some.string LIKE '%b%' OR some.string LIKE '%c%';
{"should":[{"wildcard":{"some.string.typical":{"wildcard":"*a*","boost":1.0}}},{"wildcard":{"some.string.typical":{"wildcard":"*b*","boost":1.0}}},{"wildcard":{"some.string.typical":{"wildcard":"*c*","boost":1.0}}}],"boost":1.0}}
;

LikeOnInexactWithORandAND
SELECT * FROM test WHERE some.string LIKE '%a%' OR some.string LIKE '%b%' AND some.string LIKE '%c%' OR some.string LIKE '%d%' OR some.string LIKE '%e%';
{"bool":{"should":[{"wildcard":{"some.string.typical":{"wildcard":"*a*","boost":1.0}}},{"bool":{"must":[{"wildcard":{"some.string.typical":{"wildcard":"*b*","boost":1.0}}},{"wildcard":{"some.string.typical":{"wildcard":"*c*","boost":1.0}}}],"boost":1.0}},{"wildcard":{"some.string.typical":{"wildcard":"*d*","boost":1.0}}},{"wildcard":{"some.string.typical":{"wildcard":"*e*","boost":1.0}}}],"boost":1.0}}
;

LikeOnInexactWithANDandGroupedOR
SELECT * FROM test WHERE (some.string LIKE '%a%' OR some.string LIKE '%b%') AND (some.string LIKE '%c%' OR some.string LIKE '%d%' OR some.string LIKE '%e%');
{"bool":{"must":[{"bool":{"should":[{"wildcard":{"some.string.typical":{"wildcard":"*a*","boost":1.0}}},{"wildcard":{"some.string.typical":{"wildcard":"*b*","boost":1.0}}}],"boost":1.0}},{"bool":{"should":[{"wildcard":{"some.string.typical":{"wildcard":"*c*","boost":1.0}}},{"wildcard":{"some.string.typical":{"wildcard":"*d*","boost":1.0}}},{"wildcard":{"some.string.typical":{"wildcard":"*e*","boost":1.0}}}],"boost":1.0}}],"boost":1.0}}
;

RLikeOnInexact
SELECT * FROM test WHERE some.string RLIKE '.*a.*';
"query":{"regexp":{"some.string.typical":{"value":".*a.*",
Expand Down