From a6d4651d63c5d31dcd7b1ca6d48be9707316e33b Mon Sep 17 00:00:00 2001 From: wangyang Date: Mon, 10 Oct 2016 12:19:31 +0800 Subject: [PATCH 1/8] fix grouping id bug --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index ae8869ff25f2d..03e5a22d1c1bc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -298,10 +298,11 @@ class Analyzer( case other => Alias(other, other.toString)() } - val nonNullBitmask = x.bitmasks.reduce(_ & _) + val nonNullBitmask = ~ x.bitmasks.reduce(_ | _) + val attrLength = groupByAliases.length val expandedAttributes = groupByAliases.zipWithIndex.map { case (a, idx) => - a.toAttribute.withNullability((nonNullBitmask & 1 << idx) == 0) + a.toAttribute.withNullability((nonNullBitmask & 1 << (attrLength - idx - 1)) == 0) } val expand = Expand(x.bitmasks, groupByAliases, expandedAttributes, gid, x.child) From ab82b211afc9c9d2030c25b4f953db6f4b1bb62e Mon Sep 17 00:00:00 2001 From: wangyang Date: Mon, 10 Oct 2016 15:03:35 +0800 Subject: [PATCH 2/8] add test --- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 0ee8c959eeb4d..7e0730e921784 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2189,6 +2189,19 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } + test("SPARK-17849: grouping set throws NPE") { + val data = Seq( + ("1", "2", "3", 1), + ("4", "5", "6", 1), + ("7", "8", "9", 1) + ) + data.toDF("a", "b", "c", "d").createOrReplaceTempView("point_table") + checkAnswer( + sql("select a, b, c, count(d) from point_table group by a, b, c GROUPING SETS (())"), + Row(null, null, null, 3) :: Nil) + } + + test("hash function") { val df = Seq(1 -> "a", 2 -> "b").toDF("i", "j") withTempView("tbl") { From c849b6a02ee65e3a818abc03f301d82b36ec7f43 Mon Sep 17 00:00:00 2001 From: wangyang Date: Mon, 10 Oct 2016 15:31:51 +0800 Subject: [PATCH 3/8] add test --- .../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 7e0730e921784..e816bf23800ef 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2199,6 +2199,11 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { checkAnswer( sql("select a, b, c, count(d) from point_table group by a, b, c GROUPING SETS (())"), Row(null, null, null, 3) :: Nil) + checkAnswer( + sql("select a, b, c, count(d) from point_table group by a, b, c GROUPING SETS ((a))"), + Row("1", null, null, 1) :: + Row("4", null, null, 1) :: + Row("7", null, null, 1) :: Nil) } From 42f7a63e0f43695e5ce8d85b5bf7425aa02a8b8b Mon Sep 17 00:00:00 2001 From: wangyang Date: Mon, 10 Oct 2016 16:31:48 +0800 Subject: [PATCH 4/8] mv test to SQLQueryTestSuit --- .../sql-tests/inputs/grouping_set.sql | 17 ++++++++ .../sql-tests/results/grouping_set.sql.out | 42 +++++++++++++++++++ .../org/apache/spark/sql/SQLQuerySuite.scala | 18 -------- 3 files changed, 59 insertions(+), 18 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out diff --git a/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql b/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql new file mode 100644 index 0000000000000..3594283505280 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql @@ -0,0 +1,17 @@ +CREATE TEMPORARY VIEW grouping AS SELECT * FROM VALUES + ("1", "2", "3", 1), + ("4", "5", "6", 1), + ("7", "8", "9", 1) + as grouping(a, b, c, d); + +-- SPARK-17849: grouping set throws NPE #1 +SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS (()); + +-- SPARK-17849: grouping set throws NPE #2 +SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a)); + +-- SPARK-17849: grouping set throws NPE #3 +SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((c)); + + + diff --git a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out new file mode 100644 index 0000000000000..edb38a52b7514 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out @@ -0,0 +1,42 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 4 + + +-- !query 0 +CREATE TEMPORARY VIEW grouping AS SELECT * FROM VALUES + ("1", "2", "3", 1), + ("4", "5", "6", 1), + ("7", "8", "9", 1) + as grouping(a, b, c, d) +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS (()) +-- !query 1 schema +struct +-- !query 1 output +NULL NULL NULL 3 + + +-- !query 2 +SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a)) +-- !query 2 schema +struct +-- !query 2 output +1 NULL NULL 1 +4 NULL NULL 1 +7 NULL NULL 1 + + +-- !query 3 +SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((c)) +-- !query 3 schema +struct +-- !query 3 output +NULL NULL 3 1 +NULL NULL 6 1 +NULL NULL 9 1 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index e816bf23800ef..0ee8c959eeb4d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2189,24 +2189,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } - test("SPARK-17849: grouping set throws NPE") { - val data = Seq( - ("1", "2", "3", 1), - ("4", "5", "6", 1), - ("7", "8", "9", 1) - ) - data.toDF("a", "b", "c", "d").createOrReplaceTempView("point_table") - checkAnswer( - sql("select a, b, c, count(d) from point_table group by a, b, c GROUPING SETS (())"), - Row(null, null, null, 3) :: Nil) - checkAnswer( - sql("select a, b, c, count(d) from point_table group by a, b, c GROUPING SETS ((a))"), - Row("1", null, null, 1) :: - Row("4", null, null, 1) :: - Row("7", null, null, 1) :: Nil) - } - - test("hash function") { val df = Seq(1 -> "a", 2 -> "b").toDF("i", "j") withTempView("tbl") { From 9d7f33af203e0eb57b19ad42fbcad2bd9f6dfe4f Mon Sep 17 00:00:00 2001 From: wangyang Date: Tue, 11 Oct 2016 10:27:53 +0800 Subject: [PATCH 5/8] add comment --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 03e5a22d1c1bc..624d05cecf763 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -298,6 +298,9 @@ class Analyzer( case other => Alias(other, other.toString)() } + // The left most bit in the bitmasks corresponds to the last expression in groupByAliases + // with 0 indicating this expression is in the grouping set. The following line of code + // calculates the bit mask representing the expressions that exist in all the grouping sets. val nonNullBitmask = ~ x.bitmasks.reduce(_ | _) val attrLength = groupByAliases.length From 9893c03982b93378e167f1a03c6d38915fa7758e Mon Sep 17 00:00:00 2001 From: wangyang Date: Tue, 11 Oct 2016 12:30:40 +0800 Subject: [PATCH 6/8] address davies' comments --- .../apache/spark/sql/catalyst/analysis/Analyzer.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 624d05cecf763..19e38d9e9fae8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -298,14 +298,14 @@ class Analyzer( case other => Alias(other, other.toString)() } - // The left most bit in the bitmasks corresponds to the last expression in groupByAliases - // with 0 indicating this expression is in the grouping set. The following line of code - // calculates the bit mask representing the expressions that exist in all the grouping sets. - val nonNullBitmask = ~ x.bitmasks.reduce(_ | _) + // The rightmost bit in the bitmasks corresponds to the last expression in groupByAliases with 0 + // indicating this expression is in the grouping set. The following line of code calculates the + // bitmask representing the expressions that exist in all the grouping sets (also indicated by 0). + val nonNullBitmask = x.bitmasks.reduce(_ | _) val attrLength = groupByAliases.length val expandedAttributes = groupByAliases.zipWithIndex.map { case (a, idx) => - a.toAttribute.withNullability((nonNullBitmask & 1 << (attrLength - idx - 1)) == 0) + a.toAttribute.withNullability(((nonNullBitmask >> (attrLength - idx - 1)) & 1) == 1) } val expand = Expand(x.bitmasks, groupByAliases, expandedAttributes, gid, x.child) From 69f6e4f1bc37afd6b3ca529c8b0f0afec891459a Mon Sep 17 00:00:00 2001 From: wangyang Date: Tue, 11 Oct 2016 13:29:13 +0800 Subject: [PATCH 7/8] refine name --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 19e38d9e9fae8..ccdaf17ea2b62 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -300,12 +300,12 @@ class Analyzer( // The rightmost bit in the bitmasks corresponds to the last expression in groupByAliases with 0 // indicating this expression is in the grouping set. The following line of code calculates the - // bitmask representing the expressions that exist in all the grouping sets (also indicated by 0). - val nonNullBitmask = x.bitmasks.reduce(_ | _) + // bitmask representing the expressions that absent in at least one grouping set (indicated by 1). + val nullBitmask = x.bitmasks.reduce(_ | _) val attrLength = groupByAliases.length val expandedAttributes = groupByAliases.zipWithIndex.map { case (a, idx) => - a.toAttribute.withNullability(((nonNullBitmask >> (attrLength - idx - 1)) & 1) == 1) + a.toAttribute.withNullability(((nullBitmask >> (attrLength - idx - 1)) & 1) == 1) } val expand = Expand(x.bitmasks, groupByAliases, expandedAttributes, gid, x.child) From 0ad7aba8dfb997f5d8e65a874123a92466c47e1a Mon Sep 17 00:00:00 2001 From: wangyang Date: Fri, 14 Oct 2016 16:18:42 +0800 Subject: [PATCH 8/8] fix scala style --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index ccdaf17ea2b62..05e516136a36f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -298,9 +298,10 @@ class Analyzer( case other => Alias(other, other.toString)() } - // The rightmost bit in the bitmasks corresponds to the last expression in groupByAliases with 0 - // indicating this expression is in the grouping set. The following line of code calculates the - // bitmask representing the expressions that absent in at least one grouping set (indicated by 1). + // The rightmost bit in the bitmasks corresponds to the last expression in groupByAliases + // with 0 indicating this expression is in the grouping set. The following line of code + // calculates the bitmask representing the expressions that absent in at least one grouping + // set (indicated by 1). val nullBitmask = x.bitmasks.reduce(_ | _) val attrLength = groupByAliases.length