Skip to content

Commit 2a53fbf

Browse files
pj.fanninggatorsmile
authored andcommitted
[SPARK-20871][SQL] limit logging of Janino code
## What changes were proposed in this pull request? When the code that is generated is greater than 64k, then Janino compile will fail and CodeGenerator.scala will log the entire code at Error level. SPARK-20871 suggests only logging the code at Debug level. Since, the code is already logged at debug level, this Pull Request proposes not including the formatted code in the Error logging and exception message at all. When an exception occurs, the code will be logged at Info level but truncated if it is more than 1000 lines long. ## How was this patch tested? Existing tests were run. An extra test test case was added to CodeFormatterSuite to test the new maxLines parameter, Author: pj.fanning <[email protected]> Closes #18658 from pjfanning/SPARK-20871.
1 parent cecd285 commit 2a53fbf

File tree

4 files changed

+61
-12
lines changed

4 files changed

+61
-12
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,20 @@ import java.util.regex.Matcher
2828
object CodeFormatter {
2929
val commentHolder = """\/\*(.+?)\*\/""".r
3030

31-
def format(code: CodeAndComment): String = {
31+
def format(code: CodeAndComment, maxLines: Int = -1): String = {
3232
val formatter = new CodeFormatter
33-
code.body.split("\n").foreach { line =>
33+
val lines = code.body.split("\n")
34+
val needToTruncate = maxLines >= 0 && lines.length > maxLines
35+
val filteredLines = if (needToTruncate) lines.take(maxLines) else lines
36+
filteredLines.foreach { line =>
3437
val commentReplaced = commentHolder.replaceAllIn(
3538
line.trim,
3639
m => code.comment.get(m.group(1)).map(Matcher.quoteReplacement).getOrElse(m.group(0)))
3740
formatter.addLine(commentReplaced)
3841
}
42+
if (needToTruncate) {
43+
formatter.addLine(s"[truncated to $maxLines lines (total lines is ${lines.length})]")
44+
}
3945
formatter.result()
4046
}
4147

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ import org.apache.spark.metrics.source.CodegenMetrics
3939
import org.apache.spark.sql.catalyst.InternalRow
4040
import org.apache.spark.sql.catalyst.expressions._
4141
import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
42+
import org.apache.spark.sql.internal.SQLConf
4243
import org.apache.spark.sql.types._
4344
import org.apache.spark.unsafe.Platform
4445
import org.apache.spark.unsafe.types._
@@ -1037,25 +1038,27 @@ object CodeGenerator extends Logging {
10371038
))
10381039
evaluator.setExtendedClass(classOf[GeneratedClass])
10391040

1040-
lazy val formatted = CodeFormatter.format(code)
1041-
10421041
logDebug({
10431042
// Only add extra debugging info to byte code when we are going to print the source code.
10441043
evaluator.setDebuggingInformation(true, true, false)
1045-
s"\n$formatted"
1044+
s"\n${CodeFormatter.format(code)}"
10461045
})
10471046

10481047
try {
10491048
evaluator.cook("generated.java", code.body)
10501049
recordCompilationStats(evaluator)
10511050
} catch {
10521051
case e: JaninoRuntimeException =>
1053-
val msg = s"failed to compile: $e\n$formatted"
1052+
val msg = s"failed to compile: $e"
10541053
logError(msg, e)
1054+
val maxLines = SQLConf.get.loggingMaxLinesForCodegen
1055+
logInfo(s"\n${CodeFormatter.format(code, maxLines)}")
10551056
throw new JaninoRuntimeException(msg, e)
10561057
case e: CompileException =>
1057-
val msg = s"failed to compile: $e\n$formatted"
1058+
val msg = s"failed to compile: $e"
10581059
logError(msg, e)
1060+
val maxLines = SQLConf.get.loggingMaxLinesForCodegen
1061+
logInfo(s"\n${CodeFormatter.format(code, maxLines)}")
10591062
throw new CompileException(msg, e.getLocation)
10601063
}
10611064
evaluator.getClazz().newInstance().asInstanceOf[GeneratedClass]

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,14 @@ object SQLConf {
564564
.intConf
565565
.createWithDefault(20)
566566

567+
val CODEGEN_LOGGING_MAX_LINES = buildConf("spark.sql.codegen.logging.maxLines")
568+
.internal()
569+
.doc("The maximum number of codegen lines to log when errors occur. Use -1 for unlimited.")
570+
.intConf
571+
.checkValue(maxLines => maxLines >= -1, "The maximum must be a positive integer, 0 to " +
572+
"disable logging or -1 to apply no limit.")
573+
.createWithDefault(1000)
574+
567575
val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes")
568576
.doc("The maximum number of bytes to pack into a single partition when reading files.")
569577
.longConf
@@ -1004,6 +1012,8 @@ class SQLConf extends Serializable with Logging {
10041012

10051013
def maxCaseBranchesForCodegen: Int = getConf(MAX_CASES_BRANCHES)
10061014

1015+
def loggingMaxLinesForCodegen: Int = getConf(CODEGEN_LOGGING_MAX_LINES)
1016+
10071017
def tableRelationCacheSize: Int =
10081018
getConf(StaticSQLConf.FILESOURCE_TABLE_RELATION_CACHE_SIZE)
10091019

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,18 @@ package org.apache.spark.sql.catalyst.expressions.codegen
2020
import org.apache.spark.SparkFunSuite
2121
import org.apache.spark.sql.catalyst.util._
2222

23-
2423
class CodeFormatterSuite extends SparkFunSuite {
2524

26-
def testCase(name: String)(
27-
input: String, comment: Map[String, String] = Map.empty)(expected: String): Unit = {
25+
def testCase(name: String)(input: String,
26+
comment: Map[String, String] = Map.empty, maxLines: Int = -1)(expected: String): Unit = {
2827
test(name) {
2928
val sourceCode = new CodeAndComment(input.trim, comment)
30-
if (CodeFormatter.format(sourceCode).trim !== expected.trim) {
29+
if (CodeFormatter.format(sourceCode, maxLines).trim !== expected.trim) {
3130
fail(
3231
s"""
3332
|== FAIL: Formatted code doesn't match ===
34-
|${sideBySide(CodeFormatter.format(sourceCode).trim, expected.trim).mkString("\n")}
33+
|${sideBySide(CodeFormatter.format(sourceCode, maxLines).trim,
34+
expected.trim).mkString("\n")}
3535
""".stripMargin)
3636
}
3737
}
@@ -129,6 +129,36 @@ class CodeFormatterSuite extends SparkFunSuite {
129129
""".stripMargin
130130
}
131131

132+
testCase("function calls with maxLines=0") (
133+
"""
134+
|foo(
135+
|a,
136+
|b,
137+
|c)
138+
""".stripMargin,
139+
maxLines = 0
140+
) {
141+
"""
142+
|/* 001 */ [truncated to 0 lines (total lines is 4)]
143+
""".stripMargin
144+
}
145+
146+
testCase("function calls with maxLines=2") (
147+
"""
148+
|foo(
149+
|a,
150+
|b,
151+
|c)
152+
""".stripMargin,
153+
maxLines = 2
154+
) {
155+
"""
156+
|/* 001 */ foo(
157+
|/* 002 */ a,
158+
|/* 003 */ [truncated to 2 lines (total lines is 4)]
159+
""".stripMargin
160+
}
161+
132162
testCase("single line comments") {
133163
"""
134164
|// This is a comment about class A { { { ( (

0 commit comments

Comments
 (0)