From 9c26e2e765ce40fc1ded22e69155b8d85f4e1598 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Fri, 6 Oct 2017 18:54:03 +0100
Subject: [PATCH 1/5] Initial commit

---
 .../expressions/codegen/CodeGenerator.scala     |  3 +--
 .../org/apache/spark/sql/internal/SQLConf.scala | 17 +++++++++++++++++
 .../sql/execution/debug/DebuggingSuite.scala    | 16 ++++++++++++++++
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 838c045d5bcce..19905a25dbdfa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -1177,8 +1177,7 @@ class CodegenContext {
     // be extremely expensive in certain cases, such as deeply-nested expressions which operate over
     // inputs with wide schemas. For more details on the performance issues that motivated this
     // flat, see SPARK-15680.
-    if (force ||
-      SparkEnv.get != null && SparkEnv.get.conf.getBoolean("spark.sql.codegen.comments", false)) {
+    if (force || SQLConf.get.codegenComments) {
       val name = if (placeholderId != "") {
         assert(!placeHolderToComments.contains(placeholderId))
         placeholderId
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 07d33fa7d52ae..b784944f4e5d1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -751,6 +751,19 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val MAX_CASES_BRANCHES = buildConf("spark.sql.codegen.maxCaseBranches")
+    .internal()
+    .doc("The maximum number of switches supported with codegen.")
+    .intConf
+    .createWithDefault(20)
+
+  val CODEGEN_COMMENTS = buildConf("spark.sql.codegen.comments")
+    .internal()
+    .doc("When true, put comment in the generated code. Since computing huge comments " +
+      "can be extremely expensive in certain cases, default is false.")
+    .booleanConf
+    .createWithDefault(false)
+
   val CODEGEN_LOGGING_MAX_LINES = buildConf("spark.sql.codegen.logging.maxLines")
     .internal()
     .doc("The maximum number of codegen lines to log when errors occur. Use -1 for unlimited.")
@@ -1539,6 +1552,10 @@ class SQLConf extends Serializable with Logging {
 
   def codegenFallback: Boolean = getConf(CODEGEN_FALLBACK)
 
+  def maxCaseBranchesForCodegen: Int = getConf(MAX_CASES_BRANCHES)
+
+  def codegenComments: Boolean = getConf(CODEGEN_COMMENTS)
+
   def loggingMaxLinesForCodegen: Int = getConf(CODEGEN_LOGGING_MAX_LINES)
 
   def hugeMethodLimit: Int = getConf(WHOLESTAGE_HUGE_METHOD_LIMIT)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
index 8251ff159e05f..be4a79e3d429f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
@@ -48,4 +48,20 @@ class DebuggingSuite extends SparkFunSuite with SharedSQLContext {
     assert(res.forall{ case (subtree, code) =>
       subtree.contains("Range") && code.contains("Object[]")})
   }
+
+  test("check to generate comment") {
+    withSQLConf("spark.sql.codegen.comments" -> "false") {
+      val res = codegenStringSeq(spark.range(10).groupBy("id").count().queryExecution.executedPlan)
+      assert(res.length == 2)
+      assert(res.forall{ case (_, code) =>
+        !code.contains("* Codegend pipeline") && !code.contains("// input[")})
+    }
+
+    withSQLConf("spark.sql.codegen.comments" -> "true") {
+      val res = codegenStringSeq(spark.range(10).groupBy("id").count().queryExecution.executedPlan)
+      assert(res.length == 2)
+      assert(res.forall{ case (_, code) =>
+        code.contains("* Codegend pipeline") && code.contains("// input[")})
+    }
+  }
 }

From a198901d2a1d2b164107fb415ae72abcf0b7075d Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Mon, 16 Jul 2018 17:26:41 +0100
Subject: [PATCH 2/5] use StaticSQLConf

---
 .../apache/spark/sql/internal/SQLConf.scala   |  9 +--------
 .../spark/sql/internal/StaticSQLConf.scala    |  7 +++++++
 .../sql/execution/debug/DebuggingSuite.scala  | 16 ---------------
 .../internal/ExecutorSideSQLConfSuite.scala   | 20 +++++++++++++++++++
 4 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index b784944f4e5d1..58ed78cac7122 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -757,13 +757,6 @@ object SQLConf {
     .intConf
     .createWithDefault(20)
 
-  val CODEGEN_COMMENTS = buildConf("spark.sql.codegen.comments")
-    .internal()
-    .doc("When true, put comment in the generated code. Since computing huge comments " +
-      "can be extremely expensive in certain cases, default is false.")
-    .booleanConf
-    .createWithDefault(false)
-
   val CODEGEN_LOGGING_MAX_LINES = buildConf("spark.sql.codegen.logging.maxLines")
     .internal()
     .doc("The maximum number of codegen lines to log when errors occur. Use -1 for unlimited.")
@@ -1554,7 +1547,7 @@ class SQLConf extends Serializable with Logging {
 
   def maxCaseBranchesForCodegen: Int = getConf(MAX_CASES_BRANCHES)
 
-  def codegenComments: Boolean = getConf(CODEGEN_COMMENTS)
+  def codegenComments: Boolean = getConf(StaticSQLConf.CODEGEN_COMMENTS)
 
   def loggingMaxLinesForCodegen: Int = getConf(CODEGEN_LOGGING_MAX_LINES)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
index 384b1917a1f79..0aee436c4a88e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
@@ -74,6 +74,13 @@ object StaticSQLConf {
       .checkValue(maxEntries => maxEntries >= 0, "The maximum must not be negative")
       .createWithDefault(100)
 
+  val CODEGEN_COMMENTS = buildStaticConf("spark.sql.codegen.comments")
+    .internal()
+    .doc("When true, put comment in the generated code. Since computing huge comments " +
+      "can be extremely expensive in certain cases, default is false.")
+    .booleanConf
+    .createWithDefault(false)
+
   // When enabling the debug, Spark SQL internal table properties are not filtered out; however,
   // some related DDL commands (e.g., ANALYZE TABLE and CREATE TABLE LIKE) might not work properly.
   val DEBUG_MODE = buildStaticConf("spark.sql.debug")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
index be4a79e3d429f..8251ff159e05f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
@@ -48,20 +48,4 @@ class DebuggingSuite extends SparkFunSuite with SharedSQLContext {
     assert(res.forall{ case (subtree, code) =>
       subtree.contains("Range") && code.contains("Object[]")})
   }
-
-  test("check to generate comment") {
-    withSQLConf("spark.sql.codegen.comments" -> "false") {
-      val res = codegenStringSeq(spark.range(10).groupBy("id").count().queryExecution.executedPlan)
-      assert(res.length == 2)
-      assert(res.forall{ case (_, code) =>
-        !code.contains("* Codegend pipeline") && !code.contains("// input[")})
-    }
-
-    withSQLConf("spark.sql.codegen.comments" -> "true") {
-      val res = codegenStringSeq(spark.range(10).groupBy("id").count().queryExecution.executedPlan)
-      assert(res.length == 2)
-      assert(res.forall{ case (_, code) =>
-        code.contains("* Codegend pipeline") && code.contains("// input[")})
-    }
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
index 855fe4f4523f2..a8b47edd54745 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.internal
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.execution.debug.codegenStringSeq
+import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.test.SQLTestUtils
 
 class ExecutorSideSQLConfSuite extends SparkFunSuite with SQLTestUtils {
@@ -82,4 +84,22 @@ class ExecutorSideSQLConfSuite extends SparkFunSuite with SQLTestUtils {
       assert(checks.forall(_ == true))
     }
   }
+
+  test("SPARK-22219: refactor to control to generate comment") {
+    withSQLConf(StaticSQLConf.CODEGEN_COMMENTS.key -> "false") {
+      val res = codegenStringSeq(spark.range(10).groupBy(col("id") * 2).count()
+        .queryExecution.executedPlan)
+      assert(res.length == 2)
+      assert(res.forall{ case (_, code) =>
+        !code.contains("* Codegend pipeline") && !code.contains("// input[")})
+    }
+
+    withSQLConf(StaticSQLConf.CODEGEN_COMMENTS.key -> "true") {
+      val res = codegenStringSeq(spark.range(10).groupBy(col("id") * 2).count()
+        .queryExecution.executedPlan)
+      assert(res.length == 2)
+      assert(res.forall{ case (_, code) =>
+        code.contains("* Codegend pipeline") && code.contains("// input[")})
+    }
+  }
 }

From 86800261afa8c451f9d0bf43903026a14ee971ae Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Tue, 17 Jul 2018 02:13:05 +0100
Subject: [PATCH 3/5] address review comment

---
 .../scala/org/apache/spark/sql/internal/SQLConf.scala     | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 58ed78cac7122..a0d3fa184cac6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -751,12 +751,6 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
-  val MAX_CASES_BRANCHES = buildConf("spark.sql.codegen.maxCaseBranches")
-    .internal()
-    .doc("The maximum number of switches supported with codegen.")
-    .intConf
-    .createWithDefault(20)
-
   val CODEGEN_LOGGING_MAX_LINES = buildConf("spark.sql.codegen.logging.maxLines")
     .internal()
     .doc("The maximum number of codegen lines to log when errors occur. Use -1 for unlimited.")
@@ -1545,8 +1539,6 @@ class SQLConf extends Serializable with Logging {
 
   def codegenFallback: Boolean = getConf(CODEGEN_FALLBACK)
 
-  def maxCaseBranchesForCodegen: Int = getConf(MAX_CASES_BRANCHES)
-
   def codegenComments: Boolean = getConf(StaticSQLConf.CODEGEN_COMMENTS)
 
   def loggingMaxLinesForCodegen: Int = getConf(CODEGEN_LOGGING_MAX_LINES)

From afe889d7cd05f7a293f76103616cd62106b91305 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Tue, 31 Jul 2018 20:21:20 +0100
Subject: [PATCH 4/5] address review comments

---
 .../sql/catalyst/expressions/codegen/CodeGenerator.scala      | 4 ----
 .../scala/org/apache/spark/sql/internal/StaticSQLConf.scala   | 3 ++-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 19905a25dbdfa..8414c1f522836 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -1173,10 +1173,6 @@ class CodegenContext {
        text: => String,
        placeholderId: String = "",
        force: Boolean = false): Block = {
-    // By default, disable comments in generated code because computing the comments themselves can
-    // be extremely expensive in certain cases, such as deeply-nested expressions which operate over
-    // inputs with wide schemas. For more details on the performance issues that motivated this
-    // flat, see SPARK-15680.
     if (force || SQLConf.get.codegenComments) {
       val name = if (placeholderId != "") {
         assert(!placeHolderToComments.contains(placeholderId))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
index 0aee436c4a88e..d9c354b165e52 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
@@ -77,7 +77,8 @@ object StaticSQLConf {
   val CODEGEN_COMMENTS = buildStaticConf("spark.sql.codegen.comments")
     .internal()
     .doc("When true, put comment in the generated code. Since computing huge comments " +
-      "can be extremely expensive in certain cases, default is false.")
+      "can be extremely expensive in certain cases, such as deeply-nested expressions which " +
+      "operate over inputs with wide schemas, default is false.")
     .booleanConf
     .createWithDefault(false)
 

From 253bc19af270185e6d419a9ed0261917f84688c1 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Wed, 1 Aug 2018 19:49:10 +0100
Subject: [PATCH 5/5] address review comment

---
 .../internal/ExecutorSideSQLConfSuite.scala   | 24 ++++++++-----------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
index a8b47edd54745..5b4736ef4f7f3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
@@ -86,20 +86,16 @@ class ExecutorSideSQLConfSuite extends SparkFunSuite with SQLTestUtils {
   }
 
   test("SPARK-22219: refactor to control to generate comment") {
-    withSQLConf(StaticSQLConf.CODEGEN_COMMENTS.key -> "false") {
-      val res = codegenStringSeq(spark.range(10).groupBy(col("id") * 2).count()
-        .queryExecution.executedPlan)
-      assert(res.length == 2)
-      assert(res.forall{ case (_, code) =>
-        !code.contains("* Codegend pipeline") && !code.contains("// input[")})
-    }
-
-    withSQLConf(StaticSQLConf.CODEGEN_COMMENTS.key -> "true") {
-      val res = codegenStringSeq(spark.range(10).groupBy(col("id") * 2).count()
-        .queryExecution.executedPlan)
-      assert(res.length == 2)
-      assert(res.forall{ case (_, code) =>
-        code.contains("* Codegend pipeline") && code.contains("// input[")})
+    Seq(true, false).foreach { flag =>
+      withSQLConf(StaticSQLConf.CODEGEN_COMMENTS.key -> flag.toString) {
+        val res = codegenStringSeq(spark.range(10).groupBy(col("id") * 2).count()
+          .queryExecution.executedPlan)
+        assert(res.length == 2)
+        assert(res.forall { case (_, code) =>
+          (code.contains("* Codegend pipeline") == flag) &&
+            (code.contains("// input[") == flag)
+        })
+      }
     }
   }
 }