From a6d4651d63c5d31dcd7b1ca6d48be9707316e33b Mon Sep 17 00:00:00 2001
From: wangyang <wangyang@haizhi.com>
Date: Mon, 10 Oct 2016 12:19:31 +0800
Subject: [PATCH 1/8] fix grouping id bug

---
 .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala    | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index ae8869ff25f2d..03e5a22d1c1bc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -298,10 +298,11 @@ class Analyzer(
           case other => Alias(other, other.toString)()
         }
 
-        val nonNullBitmask = x.bitmasks.reduce(_ & _)
+        val nonNullBitmask = ~ x.bitmasks.reduce(_ | _)
 
+        val attrLength = groupByAliases.length
         val expandedAttributes = groupByAliases.zipWithIndex.map { case (a, idx) =>
-          a.toAttribute.withNullability((nonNullBitmask & 1 << idx) == 0)
+          a.toAttribute.withNullability((nonNullBitmask & 1 << (attrLength - idx - 1)) == 0)
         }
 
         val expand = Expand(x.bitmasks, groupByAliases, expandedAttributes, gid, x.child)

From ab82b211afc9c9d2030c25b4f953db6f4b1bb62e Mon Sep 17 00:00:00 2001
From: wangyang <wangyang@haizhi.com>
Date: Mon, 10 Oct 2016 15:03:35 +0800
Subject: [PATCH 2/8] add test

---
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala  | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 0ee8c959eeb4d..7e0730e921784 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2189,6 +2189,19 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
     }
   }
 
+  test("SPARK-17849: grouping set throws NPE") {
+    val data = Seq(
+      ("1", "2", "3", 1),
+      ("4", "5", "6", 1),
+      ("7", "8", "9", 1)
+    )
+    data.toDF("a", "b", "c", "d").createOrReplaceTempView("point_table")
+    checkAnswer(
+      sql("select a, b, c, count(d) from point_table group by a, b, c GROUPING SETS (())"),
+      Row(null, null, null, 3) :: Nil)
+  }
+
+
   test("hash function") {
     val df = Seq(1 -> "a", 2 -> "b").toDF("i", "j")
     withTempView("tbl") {

From c849b6a02ee65e3a818abc03f301d82b36ec7f43 Mon Sep 17 00:00:00 2001
From: wangyang <wangyang@haizhi.com>
Date: Mon, 10 Oct 2016 15:31:51 +0800
Subject: [PATCH 3/8] add test

---
 .../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala  | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 7e0730e921784..e816bf23800ef 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2199,6 +2199,11 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
     checkAnswer(
       sql("select a, b, c, count(d) from point_table group by a, b, c GROUPING SETS (())"),
       Row(null, null, null, 3) :: Nil)
+    checkAnswer(
+      sql("select a, b, c, count(d) from point_table group by a, b, c GROUPING SETS ((a))"),
+      Row("1", null, null, 1) ::
+        Row("4", null, null, 1) ::
+        Row("7", null, null, 1) :: Nil)
   }
 
 

From 42f7a63e0f43695e5ce8d85b5bf7425aa02a8b8b Mon Sep 17 00:00:00 2001
From: wangyang <wangyang@haizhi.com>
Date: Mon, 10 Oct 2016 16:31:48 +0800
Subject: [PATCH 4/8] mv test to SQLQueryTestSuit

---
 .../sql-tests/inputs/grouping_set.sql         | 17 ++++++++
 .../sql-tests/results/grouping_set.sql.out    | 42 +++++++++++++++++++
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 18 --------
 3 files changed, 59 insertions(+), 18 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out

diff --git a/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql b/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql
new file mode 100644
index 0000000000000..3594283505280
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql
@@ -0,0 +1,17 @@
+CREATE TEMPORARY VIEW grouping AS SELECT * FROM VALUES
+  ("1", "2", "3", 1),
+  ("4", "5", "6", 1),
+  ("7", "8", "9", 1)
+  as grouping(a, b, c, d);
+
+-- SPARK-17849: grouping set throws NPE #1
+SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS (());
+
+-- SPARK-17849: grouping set throws NPE #2
+SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a));
+
+-- SPARK-17849: grouping set throws NPE #3
+SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((c));
+
+
+
diff --git a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out
new file mode 100644
index 0000000000000..edb38a52b7514
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out
@@ -0,0 +1,42 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 4
+
+
+-- !query 0
+CREATE TEMPORARY VIEW grouping AS SELECT * FROM VALUES
+  ("1", "2", "3", 1),
+  ("4", "5", "6", 1),
+  ("7", "8", "9", 1)
+  as grouping(a, b, c, d)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS (())
+-- !query 1 schema
+struct<a:string,b:string,c:string,count(d):bigint>
+-- !query 1 output
+NULL	NULL	NULL	3
+
+
+-- !query 2
+SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a))
+-- !query 2 schema
+struct<a:string,b:string,c:string,count(d):bigint>
+-- !query 2 output
+1	NULL	NULL	1
+4	NULL	NULL	1
+7	NULL	NULL	1
+
+
+-- !query 3
+SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((c))
+-- !query 3 schema
+struct<a:string,b:string,c:string,count(d):bigint>
+-- !query 3 output
+NULL	NULL	3	1
+NULL	NULL	6	1
+NULL	NULL	9	1
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index e816bf23800ef..0ee8c959eeb4d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2189,24 +2189,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
     }
   }
 
-  test("SPARK-17849: grouping set throws NPE") {
-    val data = Seq(
-      ("1", "2", "3", 1),
-      ("4", "5", "6", 1),
-      ("7", "8", "9", 1)
-    )
-    data.toDF("a", "b", "c", "d").createOrReplaceTempView("point_table")
-    checkAnswer(
-      sql("select a, b, c, count(d) from point_table group by a, b, c GROUPING SETS (())"),
-      Row(null, null, null, 3) :: Nil)
-    checkAnswer(
-      sql("select a, b, c, count(d) from point_table group by a, b, c GROUPING SETS ((a))"),
-      Row("1", null, null, 1) ::
-        Row("4", null, null, 1) ::
-        Row("7", null, null, 1) :: Nil)
-  }
-
-
   test("hash function") {
     val df = Seq(1 -> "a", 2 -> "b").toDF("i", "j")
     withTempView("tbl") {

From 9d7f33af203e0eb57b19ad42fbcad2bd9f6dfe4f Mon Sep 17 00:00:00 2001
From: wangyang <wangyang@haizhi.com>
Date: Tue, 11 Oct 2016 10:27:53 +0800
Subject: [PATCH 5/8] add comment

---
 .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala      | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 03e5a22d1c1bc..624d05cecf763 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -298,6 +298,9 @@ class Analyzer(
           case other => Alias(other, other.toString)()
         }
 
+        // The left most bit in the bitmasks corresponds to the last expression in groupByAliases
+        // with 0 indicating this expression is in the grouping set. The following line of code
+        // calculates the bit mask representing the expressions that exist in all the grouping sets.
         val nonNullBitmask = ~ x.bitmasks.reduce(_ | _)
 
         val attrLength = groupByAliases.length

From 9893c03982b93378e167f1a03c6d38915fa7758e Mon Sep 17 00:00:00 2001
From: wangyang <wangyang@haizhi.com>
Date: Tue, 11 Oct 2016 12:30:40 +0800
Subject: [PATCH 6/8] address davies' comments

---
 .../apache/spark/sql/catalyst/analysis/Analyzer.scala  | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 624d05cecf763..19e38d9e9fae8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -298,14 +298,14 @@ class Analyzer(
           case other => Alias(other, other.toString)()
         }
 
-        // The left most bit in the bitmasks corresponds to the last expression in groupByAliases
-        // with 0 indicating this expression is in the grouping set. The following line of code
-        // calculates the bit mask representing the expressions that exist in all the grouping sets.
-        val nonNullBitmask = ~ x.bitmasks.reduce(_ | _)
+        // The rightmost bit in the bitmasks corresponds to the last expression in groupByAliases with 0
+        // indicating this expression is in the grouping set. The following line of code calculates the
+        // bitmask representing the expressions that exist in all the grouping sets (also indicated by 0).
+        val nonNullBitmask = x.bitmasks.reduce(_ | _)
 
         val attrLength = groupByAliases.length
         val expandedAttributes = groupByAliases.zipWithIndex.map { case (a, idx) =>
-          a.toAttribute.withNullability((nonNullBitmask & 1 << (attrLength - idx - 1)) == 0)
+          a.toAttribute.withNullability(((nonNullBitmask >> (attrLength - idx - 1)) & 1) == 1)
         }
 
         val expand = Expand(x.bitmasks, groupByAliases, expandedAttributes, gid, x.child)

From 69f6e4f1bc37afd6b3ca529c8b0f0afec891459a Mon Sep 17 00:00:00 2001
From: wangyang <wangyang@haizhi.com>
Date: Tue, 11 Oct 2016 13:29:13 +0800
Subject: [PATCH 7/8] refine name

---
 .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 19e38d9e9fae8..ccdaf17ea2b62 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -300,12 +300,12 @@ class Analyzer(
 
         // The rightmost bit in the bitmasks corresponds to the last expression in groupByAliases with 0
         // indicating this expression is in the grouping set. The following line of code calculates the
-        // bitmask representing the expressions that exist in all the grouping sets (also indicated by 0).
-        val nonNullBitmask = x.bitmasks.reduce(_ | _)
+        // bitmask representing the expressions that absent in at least one grouping set (indicated by 1).
+        val nullBitmask = x.bitmasks.reduce(_ | _)
 
         val attrLength = groupByAliases.length
         val expandedAttributes = groupByAliases.zipWithIndex.map { case (a, idx) =>
-          a.toAttribute.withNullability(((nonNullBitmask >> (attrLength - idx - 1)) & 1) == 1)
+          a.toAttribute.withNullability(((nullBitmask >> (attrLength - idx - 1)) & 1) == 1)
         }
 
         val expand = Expand(x.bitmasks, groupByAliases, expandedAttributes, gid, x.child)

From 0ad7aba8dfb997f5d8e65a874123a92466c47e1a Mon Sep 17 00:00:00 2001
From: wangyang <wangyang@haizhi.com>
Date: Fri, 14 Oct 2016 16:18:42 +0800
Subject: [PATCH 8/8] fix scala style

---
 .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala  | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index ccdaf17ea2b62..05e516136a36f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -298,9 +298,10 @@ class Analyzer(
           case other => Alias(other, other.toString)()
         }
 
-        // The rightmost bit in the bitmasks corresponds to the last expression in groupByAliases with 0
-        // indicating this expression is in the grouping set. The following line of code calculates the
-        // bitmask representing the expressions that absent in at least one grouping set (indicated by 1).
+        // The rightmost bit in the bitmasks corresponds to the last expression in groupByAliases
+        // with 0 indicating this expression is in the grouping set. The following line of code
+        // calculates the bitmask representing the expressions that absent in at least one grouping
+        // set (indicated by 1).
         val nullBitmask = x.bitmasks.reduce(_ | _)
 
         val attrLength = groupByAliases.length