diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 838c045d5bcce..8414c1f522836 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -1173,12 +1173,7 @@ class CodegenContext { text: => String, placeholderId: String = "", force: Boolean = false): Block = { - // By default, disable comments in generated code because computing the comments themselves can - // be extremely expensive in certain cases, such as deeply-nested expressions which operate over - // inputs with wide schemas. For more details on the performance issues that motivated this - // flat, see SPARK-15680. - if (force || - SparkEnv.get != null && SparkEnv.get.conf.getBoolean("spark.sql.codegen.comments", false)) { + if (force || SQLConf.get.codegenComments) { val name = if (placeholderId != "") { assert(!placeHolderToComments.contains(placeholderId)) placeholderId diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 07d33fa7d52ae..a0d3fa184cac6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1539,6 +1539,8 @@ class SQLConf extends Serializable with Logging { def codegenFallback: Boolean = getConf(CODEGEN_FALLBACK) + def codegenComments: Boolean = getConf(StaticSQLConf.CODEGEN_COMMENTS) + def loggingMaxLinesForCodegen: Int = getConf(CODEGEN_LOGGING_MAX_LINES) def hugeMethodLimit: Int = getConf(WHOLESTAGE_HUGE_METHOD_LIMIT) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala index 384b1917a1f79..d9c354b165e52 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala @@ -74,6 +74,14 @@ object StaticSQLConf { .checkValue(maxEntries => maxEntries >= 0, "The maximum must not be negative") .createWithDefault(100) + val CODEGEN_COMMENTS = buildStaticConf("spark.sql.codegen.comments") + .internal() + .doc("When true, put comment in the generated code. Since computing huge comments " + + "can be extremely expensive in certain cases, such as deeply-nested expressions which " + + "operate over inputs with wide schemas, default is false.") + .booleanConf + .createWithDefault(false) + // When enabling the debug, Spark SQL internal table properties are not filtered out; however, // some related DDL commands (e.g., ANALYZE TABLE and CREATE TABLE LIKE) might not work properly. val DEBUG_MODE = buildStaticConf("spark.sql.debug") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala index 855fe4f4523f2..5b4736ef4f7f3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql.internal import org.apache.spark.SparkFunSuite import org.apache.spark.sql.{AnalysisException, SparkSession} +import org.apache.spark.sql.execution.debug.codegenStringSeq +import org.apache.spark.sql.functions.col import org.apache.spark.sql.test.SQLTestUtils class ExecutorSideSQLConfSuite extends SparkFunSuite with SQLTestUtils { @@ -82,4 +84,18 @@ class ExecutorSideSQLConfSuite extends SparkFunSuite with SQLTestUtils { assert(checks.forall(_ == true)) } } + + test("SPARK-22219: refactor to control to generate comment") { + Seq(true, false).foreach { flag => + withSQLConf(StaticSQLConf.CODEGEN_COMMENTS.key -> flag.toString) { + val res = codegenStringSeq(spark.range(10).groupBy(col("id") * 2).count() + .queryExecution.executedPlan) + assert(res.length == 2) + assert(res.forall { case (_, code) => + (code.contains("* Codegend pipeline") == flag) && + (code.contains("// input[") == flag) + }) + } + } + } }