File tree Expand file tree Collapse file tree 5 files changed +47
-6
lines changed
main/scala/org/apache/spark/sql
catalyst/expressions/codegen
test/scala/org/apache/spark/sql/catalyst/expressions/codegen
core/src/test/scala/org/apache/spark/sql/execution/benchmark Expand file tree Collapse file tree 5 files changed +47
-6
lines changed Original file line number Diff line number Diff line change @@ -89,6 +89,14 @@ object CodeFormatter {
8989 }
9090 new CodeAndComment (code.result().trim(), map)
9191 }
92+
93+ def stripExtraNewLinesAndComments (input : String ): String = {
94+ val commentReg =
95+ (""" ([ |\t]*?\/\*[\s|\S]*?\*\/[ |\t]*?)|""" + // strip /*comment*/
96+ """ ([ |\t]*?\/\/[\s\S]*?\n)""" ).r // strip //comment
97+ val codeWithoutComment = commentReg.replaceAllIn(input, " " )
98+ codeWithoutComment.replaceAll(""" \n\s*\n""" , " \n " ) // strip ExtraNewLines
99+ }
92100}
93101
94102private class CodeFormatter {
Original file line number Diff line number Diff line change @@ -356,14 +356,15 @@ class CodegenContext {
356356 private val placeHolderToComments = new mutable.HashMap [String , String ]
357357
358358 /**
359- * Returns if the length of codegen function is too long or not
359+ * Returns if there is a codegen function the lines of which is greater than maxLinesPerFunction
360360 * It will count the lines of every codegen function, if there is a function of length
361- * greater than spark.sql.codegen.MaxFunctionLength , it will return true.
361+ * greater than spark.sql.codegen.maxLinesPerFunction , it will return true.
362362 */
363363 def existTooLongFunction (): Boolean = {
364364 classFunctions.exists { case (className, functions) =>
365365 functions.exists{ case (name, code) =>
366- CodeFormatter .stripExtraNewLines(code).count(_ == '\n ' ) > SQLConf .get.maxFunctionLength
366+ val codeWithoutComments = CodeFormatter .stripExtraNewLinesAndComments(code)
367+ codeWithoutComments.count(_ == '\n ' ) > SQLConf .get.maxLinesPerFunction
367368 }
368369 }
369370 }
Original file line number Diff line number Diff line change @@ -572,7 +572,7 @@ object SQLConf {
572572 " disable logging or -1 to apply no limit." )
573573 .createWithDefault(1000 )
574574
575- val WHOLESTAGE_MAX_FUNCTION_LEN = buildConf(" spark.sql.codegen.MaxFunctionLength " )
575+ val WHOLESTAGE_MAX_LINES_PER_FUNCTION = buildConf(" spark.sql.codegen.maxLinesPerFunction " )
576576 .internal()
577577 .doc(" The maximum lines of a function that will be supported before" +
578578 " deactivating whole-stage codegen." )
@@ -1021,7 +1021,7 @@ class SQLConf extends Serializable with Logging {
10211021
10221022 def loggingMaxLinesForCodegen : Int = getConf(CODEGEN_LOGGING_MAX_LINES )
10231023
1024- def maxFunctionLength : Int = getConf(WHOLESTAGE_MAX_FUNCTION_LEN )
1024+ def maxLinesPerFunction : Int = getConf(WHOLESTAGE_MAX_LINES_PER_FUNCTION )
10251025
10261026 def tableRelationCacheSize : Int =
10271027 getConf(StaticSQLConf .FILESOURCE_TABLE_RELATION_CACHE_SIZE )
Original file line number Diff line number Diff line change @@ -53,6 +53,38 @@ class CodeFormatterSuite extends SparkFunSuite {
5353 assert(reducedCode.body === " /*project_c4*/" )
5454 }
5555
56+ test(" removing extra new lines and comments" ) {
57+ val code =
58+ """
59+ |/*
60+ | * multi
61+ | * line
62+ | * comments
63+ | */
64+ |
65+ |public function() {
66+ |/*comment*/
67+ | /*comment_with_space*/
68+ |code_body
69+ |//comment
70+ |code_body
71+ | //comment_with_space
72+ |
73+ |code_body
74+ |}
75+ """ .stripMargin
76+
77+ val reducedCode = CodeFormatter .stripExtraNewLinesAndComments(code)
78+ assert(reducedCode ===
79+ """
80+ |public function() {
81+ |code_body
82+ |code_body
83+ |code_body
84+ |}
85+ """ .stripMargin)
86+ }
87+
5688 testCase(" basic example" ) {
5789 """
5890 |class A {
Original file line number Diff line number Diff line change @@ -340,7 +340,7 @@ class AggregateBenchmark extends BenchmarkBase {
340340
341341 benchmark.addCase(s " codegen = T " ) { iter =>
342342 sparkSession.conf.set(" spark.sql.codegen.wholeStage" , " true" )
343- sparkSession.conf.set(" spark.sql.codegen.MaxFunctionLength " , " 10000" )
343+ sparkSession.conf.set(" spark.sql.codegen.maxLinesPerFunction " , " 10000" )
344344 f()
345345 }
346346
You can’t perform that action at this time.
0 commit comments