Skip to content

Commit 3f18092

Browse files
committed
SQL: Fix bug regarding histograms usage in scripting (#36866)
Allow scripts to correctly reference grouping functions Fix bug in translation of date/time functions mixed with histograms. Enhance Verifier to prevent histograms being nested inside other functions inside GROUP BY (as it implies double grouping) Extend Histogram docs (cherry picked from commit ac032a0) (cherry picked from commit 3a0fd4c)
1 parent b8d6c85 commit 3f18092

File tree

15 files changed

+234
-47
lines changed

15 files changed

+234
-47
lines changed

docs/reference/sql/functions/grouping.asciidoc

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ The histogram function takes all matching values and divides them into buckets w
3636
bucket_key = Math.floor(value / interval) * interval
3737
----
3838

39+
NOTE:: The histogram in SQL does *NOT* return empty buckets for missing intervals as the traditional <<search-aggregations-bucket-histogram-aggregation, histogram>> and <<search-aggregations-bucket-datehistogram-aggregation, date histogram>>. Such behavior does not fit conceptually in SQL which treats all missing values as `NULL`; as such the histogram places all missing values in the `NULL` group.
40+
3941
`Histogram` can be applied on either numeric fields:
4042

4143

@@ -51,4 +53,26 @@ or date/time fields:
5153
include-tagged::{sql-specs}/docs.csv-spec[histogramDate]
5254
----
5355

56+
Expressions inside the histogram are also supported as long as the
57+
return type is numeric:
58+
59+
["source","sql",subs="attributes,callouts,macros"]
60+
----
61+
include-tagged::{sql-specs}/docs.csv-spec[histogramNumericExpression]
62+
----
63+
64+
Do note that histograms (and grouping functions in general) allow custom expressions but cannot have any functions applied to them in the `GROUP BY`. In other words, the following statement is *NOT* allowed:
5465

66+
["source","sql",subs="attributes,callouts,macros"]
67+
----
68+
include-tagged::{sql-specs}/docs.csv-spec[expressionOnHistogramNotAllowed]
69+
----
70+
71+
as it requires two groupings (one for histogram followed by a second for applying the function on top of the histogram groups).
72+
73+
Instead one can rewrite the query to move the expression on the histogram _inside_ of it:
74+
75+
["source","sql",subs="attributes,callouts,macros"]
76+
----
77+
include-tagged::{sql-specs}/docs.csv-spec[histogramDateExpression]
78+
----

x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -262,9 +262,51 @@ SELECT HISTOGRAM(birth_date, INTERVAL 1 YEAR) AS h, COUNT(*) as c FROM test_emp
262262
null |10
263263
;
264264

265-
histogramDateWithDateFunction-Ignore
266-
SELECT YEAR(HISTOGRAM(birth_date, INTERVAL 1 YEAR)) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC;
265+
histogramDateWithMonthOnTop
266+
schema::h:i|c:l
267+
SELECT HISTOGRAM(MONTH(birth_date), 2) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC;
268+
269+
h | c
270+
---------------+---------------
271+
12 |7
272+
10 |17
273+
8 |16
274+
6 |16
275+
4 |18
276+
2 |10
277+
0 |6
278+
null |10
279+
;
280+
281+
histogramDateWithYearOnTop
282+
schema::h:i|c:l
283+
SELECT HISTOGRAM(YEAR(birth_date), 2) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC;
284+
h | c
285+
---------------+---------------
286+
1964 |5
287+
1962 |13
288+
1960 |16
289+
1958 |16
290+
1956 |9
291+
1954 |12
292+
1952 |19
293+
null |10
294+
;
267295

268-
269-
296+
histogramNumericWithExpression
297+
schema::h:i|c:l
298+
SELECT HISTOGRAM(emp_no % 100, 10) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC;
299+
300+
h | c
301+
---------------+---------------
302+
90 |10
303+
80 |10
304+
70 |10
305+
60 |10
306+
50 |10
307+
40 |10
308+
30 |10
309+
20 |10
310+
10 |10
311+
0 |10
270312
;

x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,27 @@ SELECT HISTOGRAM(salary, 5000) AS h FROM emp GROUP BY h;
725725
// end::histogramNumeric
726726
;
727727

728+
histogramNumericExpression
729+
schema::h:i|c:l
730+
// tag::histogramNumericExpression
731+
SELECT HISTOGRAM(salary % 100, 10) AS h, COUNT(*) AS c FROM emp GROUP BY h;
732+
733+
h | c
734+
---------------+---------------
735+
0 |10
736+
10 |15
737+
20 |10
738+
30 |14
739+
40 |9
740+
50 |9
741+
60 |8
742+
70 |13
743+
80 |3
744+
90 |9
745+
746+
// end::histogramNumericExpression
747+
;
748+
728749
histogramDate
729750
schema::h:ts|c:l
730751
// tag::histogramDate
@@ -752,6 +773,30 @@ null |10
752773
// end::histogramDate
753774
;
754775

776+
expressionOnHistogramNotAllowed-Ignore
777+
// tag::expressionOnHistogramNotAllowed
778+
SELECT MONTH(HISTOGRAM(birth_date), 2)) AS h, COUNT(*) as c FROM emp GROUP BY h ORDER BY h DESC;
779+
// end::expressionOnHistogramNotAllowed
780+
781+
histogramDateExpression
782+
schema::h:i|c:l
783+
// tag::histogramDateExpression
784+
SELECT HISTOGRAM(MONTH(birth_date), 2) AS h, COUNT(*) as c FROM emp GROUP BY h ORDER BY h DESC;
785+
786+
h | c
787+
---------------+---------------
788+
12 |7
789+
10 |17
790+
8 |16
791+
6 |16
792+
4 |18
793+
2 |10
794+
0 |6
795+
null |10
796+
797+
// end::histogramDateExpression
798+
;
799+
755800
///////////////////////////////
756801
//
757802
// Date/Time

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
import org.elasticsearch.xpack.sql.expression.function.FunctionAttribute;
1919
import org.elasticsearch.xpack.sql.expression.function.Functions;
2020
import org.elasticsearch.xpack.sql.expression.function.Score;
21+
import org.elasticsearch.xpack.sql.expression.function.aggregate.AggregateFunctionAttribute;
22+
import org.elasticsearch.xpack.sql.expression.function.grouping.GroupingFunctionAttribute;
2123
import org.elasticsearch.xpack.sql.expression.function.scalar.ScalarFunction;
2224
import org.elasticsearch.xpack.sql.expression.predicate.conditional.ConditionalFunction;
2325
import org.elasticsearch.xpack.sql.expression.predicate.operator.comparison.In;
@@ -224,6 +226,7 @@ Collection<Failure> verify(LogicalPlan plan) {
224226
validateConditional(p, localFailures);
225227

226228
checkFilterOnAggs(p, localFailures);
229+
checkFilterOnGrouping(p, localFailures);
227230

228231
if (!groupingFailures.contains(p)) {
229232
checkGroupBy(p, localFailures, resolvedFunctions, groupingFailures);
@@ -419,7 +422,7 @@ private static boolean checkGroupByHavingHasOnlyAggs(Expression e, Node<?> sourc
419422
return true;
420423
}
421424
// skip aggs (allowed to refer to non-group columns)
422-
if (Functions.isAggregate(e)) {
425+
if (Functions.isAggregate(e) || Functions.isGrouping(e)) {
423426
return true;
424427
}
425428

@@ -448,6 +451,21 @@ private static boolean checkGroupByAgg(LogicalPlan p, Set<Failure> localFailures
448451
}
449452
}));
450453

454+
a.groupings().forEach(e -> {
455+
if (Functions.isGrouping(e) == false) {
456+
e.collectFirstChildren(c -> {
457+
if (Functions.isGrouping(c)) {
458+
localFailures.add(fail(c,
459+
"Cannot combine [%s] grouping function inside GROUP BY, found [%s];"
460+
+ " consider moving the expression inside the histogram",
461+
Expressions.name(c), Expressions.name(e)));
462+
return true;
463+
}
464+
return false;
465+
});
466+
}
467+
});
468+
451469
if (!localFailures.isEmpty()) {
452470
return false;
453471
}
@@ -547,19 +565,30 @@ private static void checkFilterOnAggs(LogicalPlan p, Set<Failure> localFailures)
547565
if (p instanceof Filter) {
548566
Filter filter = (Filter) p;
549567
if ((filter.child() instanceof Aggregate) == false) {
550-
filter.condition().forEachDown(f -> {
551-
if (Functions.isAggregate(f) || Functions.isGrouping(f)) {
552-
String type = Functions.isAggregate(f) ? "aggregate" : "grouping";
553-
localFailures.add(fail(f,
554-
"Cannot use WHERE filtering on %s function [%s], use HAVING instead", type, Expressions.name(f)));
568+
filter.condition().forEachDown(e -> {
569+
if (Functions.isAggregate(e) || e instanceof AggregateFunctionAttribute) {
570+
localFailures.add(
571+
fail(e, "Cannot use WHERE filtering on aggregate function [%s], use HAVING instead", Expressions.name(e)));
555572
}
556-
557-
}, Function.class);
573+
}, Expression.class);
558574
}
559575
}
560576
}
561577

562578

579+
private static void checkFilterOnGrouping(LogicalPlan p, Set<Failure> localFailures) {
580+
if (p instanceof Filter) {
581+
Filter filter = (Filter) p;
582+
filter.condition().forEachDown(e -> {
583+
if (Functions.isGrouping(e) || e instanceof GroupingFunctionAttribute) {
584+
localFailures
585+
.add(fail(e, "Cannot filter on grouping function [%s], use its argument instead", Expressions.name(e)));
586+
}
587+
}, Expression.class);
588+
}
589+
}
590+
591+
563592
private static void checkForScoreInsideFunctions(LogicalPlan p, Set<Failure> localFailures) {
564593
// Make sure that SCORE is only used in "top level" functions
565594
p.forEachExpressions(e ->
@@ -647,4 +676,4 @@ private static boolean areTypesCompatible(DataType left, DataType right) {
647676
(left.isNumeric() && right.isNumeric());
648677
}
649678
}
650-
}
679+
}

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/scalar/whitelist/InternalSqlScriptUtils.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,9 @@ public static Integer weekOfYear(Object dateTime, String tzId) {
346346
}
347347

348348
public static ZonedDateTime asDateTime(Object dateTime) {
349+
if (dateTime == null) {
350+
return null;
351+
}
349352
if (dateTime instanceof JodaCompatibleZonedDateTime) {
350353
return ((JodaCompatibleZonedDateTime) dateTime).getZonedDateTime();
351354
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
package org.elasticsearch.xpack.sql.expression.gen.script;
7+
8+
import org.elasticsearch.xpack.sql.expression.function.grouping.GroupingFunctionAttribute;
9+
10+
class Grouping extends Param<GroupingFunctionAttribute> {
11+
12+
Grouping(GroupingFunctionAttribute groupRef) {
13+
super(groupRef);
14+
}
15+
16+
String groupName() {
17+
return value().functionId();
18+
}
19+
20+
@Override
21+
public String prefix() {
22+
return "g";
23+
}
24+
}

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/gen/script/Params.java

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -85,25 +85,15 @@ Map<String, String> asAggPaths() {
8585
String s = a.aggProperty() != null ? a.aggProperty() : a.aggName();
8686
map.put(p.prefix() + aggs++, s);
8787
}
88-
}
89-
90-
return map;
91-
}
92-
93-
// return the agg refs
94-
List<String> asAggRefs() {
95-
List<String> refs = new ArrayList<>();
96-
97-
for (Param<?> p : params) {
98-
if (p instanceof Agg) {
99-
refs.add(((Agg) p).aggName());
88+
if (p instanceof Grouping) {
89+
Grouping g = (Grouping) p;
90+
map.put(p.prefix() + aggs++, g.groupName());
10091
}
10192
}
10293

103-
return refs;
94+
return map;
10495
}
10596

106-
10797
private static List<Param<?>> flatten(List<Param<?>> params) {
10898
List<Param<?>> flatten = emptyList();
10999

@@ -116,6 +106,9 @@ private static List<Param<?>> flatten(List<Param<?>> params) {
116106
else if (p instanceof Agg) {
117107
flatten.add(p);
118108
}
109+
else if (p instanceof Grouping) {
110+
flatten.add(p);
111+
}
119112
else if (p instanceof Var) {
120113
flatten.add(p);
121114
}
@@ -131,4 +124,4 @@ else if (p instanceof Var) {
131124
public String toString() {
132125
return params.toString();
133126
}
134-
}
127+
}

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/gen/script/ParamsBuilder.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package org.elasticsearch.xpack.sql.expression.gen.script;
77

88
import org.elasticsearch.xpack.sql.expression.function.aggregate.AggregateFunctionAttribute;
9+
import org.elasticsearch.xpack.sql.expression.function.grouping.GroupingFunctionAttribute;
910

1011
import java.util.ArrayList;
1112
import java.util.List;
@@ -28,6 +29,11 @@ public ParamsBuilder agg(AggregateFunctionAttribute agg) {
2829
return this;
2930
}
3031

32+
public ParamsBuilder grouping(GroupingFunctionAttribute grouping) {
33+
params.add(new Grouping(grouping));
34+
return this;
35+
}
36+
3137
public ParamsBuilder script(Params ps) {
3238
params.add(new Script(ps));
3339
return this;

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/gen/script/ScriptTemplate.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,6 @@ public Params params() {
4444
return params;
4545
}
4646

47-
public List<String> aggRefs() {
48-
return params.asAggRefs();
49-
}
50-
5147
public Map<String, String> aggPaths() {
5248
return params.asAggPaths();
5349
}

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/gen/script/ScriptWeaver.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.elasticsearch.xpack.sql.expression.Expressions;
1313
import org.elasticsearch.xpack.sql.expression.FieldAttribute;
1414
import org.elasticsearch.xpack.sql.expression.function.aggregate.AggregateFunctionAttribute;
15+
import org.elasticsearch.xpack.sql.expression.function.grouping.GroupingFunctionAttribute;
1516
import org.elasticsearch.xpack.sql.expression.function.scalar.ScalarFunctionAttribute;
1617
import org.elasticsearch.xpack.sql.expression.literal.IntervalDayTime;
1718
import org.elasticsearch.xpack.sql.expression.literal.IntervalYearMonth;
@@ -37,6 +38,9 @@ default ScriptTemplate asScript(Expression exp) {
3738
if (attr instanceof AggregateFunctionAttribute) {
3839
return scriptWithAggregate((AggregateFunctionAttribute) attr);
3940
}
41+
if (attr instanceof GroupingFunctionAttribute) {
42+
return scriptWithGrouping((GroupingFunctionAttribute) attr);
43+
}
4044
if (attr instanceof FieldAttribute) {
4145
return scriptWithField((FieldAttribute) attr);
4246
}
@@ -83,6 +87,16 @@ default ScriptTemplate scriptWithAggregate(AggregateFunctionAttribute aggregate)
8387
dataType());
8488
}
8589

90+
default ScriptTemplate scriptWithGrouping(GroupingFunctionAttribute grouping) {
91+
String template = "{}";
92+
if (grouping.dataType() == DataType.DATE) {
93+
template = "{sql}.asDateTime({})";
94+
}
95+
return new ScriptTemplate(processScript(template),
96+
paramsBuilder().grouping(grouping).build(),
97+
dataType());
98+
}
99+
86100
default ScriptTemplate scriptWithField(FieldAttribute field) {
87101
return new ScriptTemplate(processScript("doc[{}].value"),
88102
paramsBuilder().variable(field.name()).build(),

0 commit comments

Comments
 (0)