Skip to content

Commit d0c753a

Browse files
committed
count lines of function without comments
1 parent 52da6b2 commit d0c753a

File tree

5 files changed

+47
-6
lines changed

5 files changed

+47
-6
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,14 @@ object CodeFormatter {
8989
}
9090
new CodeAndComment(code.result().trim(), map)
9191
}
92+
93+
def stripExtraNewLinesAndComments(input: String): String = {
94+
val commentReg =
95+
("""([ |\t]*?\/\*[\s|\S]*?\*\/[ |\t]*?)|""" + // strip /*comment*/
96+
"""([ |\t]*?\/\/[\s\S]*?\n)""").r // strip //comment
97+
val codeWithoutComment = commentReg.replaceAllIn(input, "")
98+
codeWithoutComment.replaceAll("""\n\s*\n""", "\n") // strip ExtraNewLines
99+
}
92100
}
93101

94102
private class CodeFormatter {

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -356,14 +356,15 @@ class CodegenContext {
356356
private val placeHolderToComments = new mutable.HashMap[String, String]
357357

358358
/**
359-
* Returns if the length of codegen function is too long or not
359+
* Returns if there is a codegen function the lines of which is greater than maxLinesPerFunction
360360
* It will count the lines of every codegen function, if there is a function of length
361-
* greater than spark.sql.codegen.MaxFunctionLength, it will return true.
361+
* greater than spark.sql.codegen.maxLinesPerFunction, it will return true.
362362
*/
363363
def existTooLongFunction(): Boolean = {
364364
classFunctions.exists { case (className, functions) =>
365365
functions.exists{ case (name, code) =>
366-
CodeFormatter.stripExtraNewLines(code).count(_ == '\n') > SQLConf.get.maxFunctionLength
366+
val codeWithoutComments = CodeFormatter.stripExtraNewLinesAndComments(code)
367+
codeWithoutComments.count(_ == '\n') > SQLConf.get.maxLinesPerFunction
367368
}
368369
}
369370
}

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -572,7 +572,7 @@ object SQLConf {
572572
"disable logging or -1 to apply no limit.")
573573
.createWithDefault(1000)
574574

575-
val WHOLESTAGE_MAX_FUNCTION_LEN = buildConf("spark.sql.codegen.MaxFunctionLength")
575+
val WHOLESTAGE_MAX_LINES_PER_FUNCTION = buildConf("spark.sql.codegen.maxLinesPerFunction")
576576
.internal()
577577
.doc("The maximum lines of a function that will be supported before" +
578578
" deactivating whole-stage codegen.")
@@ -1021,7 +1021,7 @@ class SQLConf extends Serializable with Logging {
10211021

10221022
def loggingMaxLinesForCodegen: Int = getConf(CODEGEN_LOGGING_MAX_LINES)
10231023

1024-
def maxFunctionLength: Int = getConf(WHOLESTAGE_MAX_FUNCTION_LEN)
1024+
def maxLinesPerFunction: Int = getConf(WHOLESTAGE_MAX_LINES_PER_FUNCTION)
10251025

10261026
def tableRelationCacheSize: Int =
10271027
getConf(StaticSQLConf.FILESOURCE_TABLE_RELATION_CACHE_SIZE)

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,38 @@ class CodeFormatterSuite extends SparkFunSuite {
5353
assert(reducedCode.body === "/*project_c4*/")
5454
}
5555

56+
test("removing extra new lines and comments") {
57+
val code =
58+
"""
59+
|/*
60+
| * multi
61+
| * line
62+
| * comments
63+
| */
64+
|
65+
|public function() {
66+
|/*comment*/
67+
| /*comment_with_space*/
68+
|code_body
69+
|//comment
70+
|code_body
71+
| //comment_with_space
72+
|
73+
|code_body
74+
|}
75+
""".stripMargin
76+
77+
val reducedCode = CodeFormatter.stripExtraNewLinesAndComments(code)
78+
assert(reducedCode ===
79+
"""
80+
|public function() {
81+
|code_body
82+
|code_body
83+
|code_body
84+
|}
85+
""".stripMargin)
86+
}
87+
5688
testCase("basic example") {
5789
"""
5890
|class A {

sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ class AggregateBenchmark extends BenchmarkBase {
340340

341341
benchmark.addCase(s"codegen = T") { iter =>
342342
sparkSession.conf.set("spark.sql.codegen.wholeStage", "true")
343-
sparkSession.conf.set("spark.sql.codegen.MaxFunctionLength", "10000")
343+
sparkSession.conf.set("spark.sql.codegen.maxLinesPerFunction", "10000")
344344
f()
345345
}
346346

0 commit comments

Comments
 (0)