Skip to content

Commit 00803cd

Browse files
Stan Zhaihvanhovell
authored andcommitted
[SPARK-19509][SQL] Grouping Sets do not respect nullable grouping columns
## What changes were proposed in this pull request? The analyzer currently does not check if a column used in grouping sets is actually nullable itself. This can cause the nullability of the column to be incorrect, which can cause null pointer exceptions down the line. This PR fixes that by also consider the nullability of the column. This is only a problem for Spark 2.1 and below. The latest master uses a different approach. Closes #16874 ## How was this patch tested? Added a regression test to `SQLQueryTestSuite.grouping_set`. Author: Herman van Hovell <[email protected]> Closes #16873 from hvanhovell/SPARK-19509. (cherry picked from commit a3d5300) Signed-off-by: Herman van Hovell <[email protected]>
1 parent 8bf6422 commit 00803cd

File tree

3 files changed

+56
-12
lines changed

3 files changed

+56
-12
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,8 @@ class Analyzer(
313313

314314
val attrLength = groupByAliases.length
315315
val expandedAttributes = groupByAliases.zipWithIndex.map { case (a, idx) =>
316-
a.toAttribute.withNullability(((nullBitmask >> (attrLength - idx - 1)) & 1) == 1)
316+
val canBeNull = ((nullBitmask >> (attrLength - idx - 1)) & 1) == 1
317+
a.toAttribute.withNullability(a.nullable || canBeNull)
317318
}
318319

319320
val expand = Expand(x.bitmasks, groupByAliases, expandedAttributes, gid, x.child)

sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,12 @@ CREATE TEMPORARY VIEW grouping AS SELECT * FROM VALUES
22
("1", "2", "3", 1),
33
("4", "5", "6", 1),
44
("7", "8", "9", 1)
5-
as grouping(a, b, c, d);
5+
AS grouping(a, b, c, d);
6+
7+
CREATE TEMPORARY VIEW grouping_null AS SELECT * FROM VALUES
8+
CAST(NULL AS STRING),
9+
CAST(NULL AS STRING)
10+
AS T(e);
611

712
-- SPARK-17849: grouping set throws NPE #1
813
SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS (());
@@ -13,5 +18,8 @@ SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a));
1318
-- SPARK-17849: grouping set throws NPE #3
1419
SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((c));
1520

21+
-- SPARK-19509: grouping set should honor input nullability
22+
SELECT COUNT(1) FROM grouping_null GROUP BY e GROUPING SETS (e);
1623

17-
24+
DROP VIEW IF EXISTS grouping;
25+
DROP VIEW IF EXISTS grouping_null;
Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,77 @@
11
-- Automatically generated by SQLQueryTestSuite
2-
-- Number of queries: 4
2+
-- Number of queries: 8
33

44

55
-- !query 0
66
CREATE TEMPORARY VIEW grouping AS SELECT * FROM VALUES
77
("1", "2", "3", 1),
88
("4", "5", "6", 1),
99
("7", "8", "9", 1)
10-
as grouping(a, b, c, d)
10+
AS grouping(a, b, c, d)
1111
-- !query 0 schema
1212
struct<>
1313
-- !query 0 output
1414

1515

1616

1717
-- !query 1
18-
SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS (())
18+
CREATE TEMPORARY VIEW grouping_null AS SELECT * FROM VALUES
19+
CAST(NULL AS STRING),
20+
CAST(NULL AS STRING)
21+
AS T(e)
1922
-- !query 1 schema
20-
struct<a:string,b:string,c:string,count(d):bigint>
23+
struct<>
2124
-- !query 1 output
22-
NULL NULL NULL 3
25+
2326

2427

2528
-- !query 2
26-
SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a))
29+
SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS (())
2730
-- !query 2 schema
2831
struct<a:string,b:string,c:string,count(d):bigint>
2932
-- !query 2 output
33+
NULL NULL NULL 3
34+
35+
36+
-- !query 3
37+
SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a))
38+
-- !query 3 schema
39+
struct<a:string,b:string,c:string,count(d):bigint>
40+
-- !query 3 output
3041
1 NULL NULL 1
3142
4 NULL NULL 1
3243
7 NULL NULL 1
3344

3445

35-
-- !query 3
46+
-- !query 4
3647
SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((c))
37-
-- !query 3 schema
48+
-- !query 4 schema
3849
struct<a:string,b:string,c:string,count(d):bigint>
39-
-- !query 3 output
50+
-- !query 4 output
4051
NULL NULL 3 1
4152
NULL NULL 6 1
4253
NULL NULL 9 1
54+
55+
56+
-- !query 5
57+
SELECT COUNT(1) FROM grouping_null GROUP BY e GROUPING SETS (e)
58+
-- !query 5 schema
59+
struct<count(1):bigint>
60+
-- !query 5 output
61+
2
62+
63+
64+
-- !query 6
65+
DROP VIEW IF EXISTS grouping
66+
-- !query 6 schema
67+
struct<>
68+
-- !query 6 output
69+
70+
71+
72+
-- !query 7
73+
DROP VIEW IF EXISTS grouping_null
74+
-- !query 7 schema
75+
struct<>
76+
-- !query 7 output
77+

0 commit comments

Comments
 (0)