diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala index 05b7c96e44c0..60e600d8dbd8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala @@ -28,14 +28,20 @@ import java.util.regex.Matcher object CodeFormatter { val commentHolder = """\/\*(.+?)\*\/""".r - def format(code: CodeAndComment): String = { + def format(code: CodeAndComment, maxLines: Int = -1): String = { val formatter = new CodeFormatter - code.body.split("\n").foreach { line => + val lines = code.body.split("\n") + val needToTruncate = maxLines >= 0 && lines.length > maxLines + val filteredLines = if (needToTruncate) lines.take(maxLines) else lines + filteredLines.foreach { line => val commentReplaced = commentHolder.replaceAllIn( line.trim, m => code.comment.get(m.group(1)).map(Matcher.quoteReplacement).getOrElse(m.group(0))) formatter.addLine(commentReplaced) } + if (needToTruncate) { + formatter.addLine(s"[truncated to $maxLines lines (total lines is ${lines.length})]") + } formatter.result() } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 7cf9daf62860..a014e2aa3482 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -39,6 +39,7 @@ import org.apache.spark.metrics.source.CodegenMetrics import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.{ArrayData, MapData} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.Platform import org.apache.spark.unsafe.types._ @@ -1037,12 +1038,10 @@ object CodeGenerator extends Logging { )) evaluator.setExtendedClass(classOf[GeneratedClass]) - lazy val formatted = CodeFormatter.format(code) - logDebug({ // Only add extra debugging info to byte code when we are going to print the source code. evaluator.setDebuggingInformation(true, true, false) - s"\n$formatted" + s"\n${CodeFormatter.format(code)}" }) try { @@ -1050,12 +1049,16 @@ object CodeGenerator extends Logging { recordCompilationStats(evaluator) } catch { case e: JaninoRuntimeException => - val msg = s"failed to compile: $e\n$formatted" + val msg = s"failed to compile: $e" logError(msg, e) + val maxLines = SQLConf.get.loggingMaxLinesForCodegen + logInfo(s"\n${CodeFormatter.format(code, maxLines)}") throw new JaninoRuntimeException(msg, e) case e: CompileException => - val msg = s"failed to compile: $e\n$formatted" + val msg = s"failed to compile: $e" logError(msg, e) + val maxLines = SQLConf.get.loggingMaxLinesForCodegen + logInfo(s"\n${CodeFormatter.format(code, maxLines)}") throw new CompileException(msg, e.getLocation) } evaluator.getClazz().newInstance().asInstanceOf[GeneratedClass] diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 55558ca9f700..b3bedb36e257 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -562,6 +562,14 @@ object SQLConf { .intConf .createWithDefault(20) + val CODEGEN_LOGGING_MAX_LINES = buildConf("spark.sql.codegen.logging.maxLines") + .internal() + .doc("The maximum number of codegen lines to log when errors occur. Use -1 for unlimited.") + .intConf + .checkValue(maxLines => maxLines >= -1, "The maximum must be a positive integer, 0 to " + + "disable logging or -1 to apply no limit.") + .createWithDefault(1000) + val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes") .doc("The maximum number of bytes to pack into a single partition when reading files.") .longConf @@ -1002,6 +1010,8 @@ class SQLConf extends Serializable with Logging { def maxCaseBranchesForCodegen: Int = getConf(MAX_CASES_BRANCHES) + def loggingMaxLinesForCodegen: Int = getConf(CODEGEN_LOGGING_MAX_LINES) + def tableRelationCacheSize: Int = getConf(StaticSQLConf.FILESOURCE_TABLE_RELATION_CACHE_SIZE) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala index bc5a8f078234..9d0a41661bea 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala @@ -20,18 +20,18 @@ package org.apache.spark.sql.catalyst.expressions.codegen import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.util._ - class CodeFormatterSuite extends SparkFunSuite { - def testCase(name: String)( - input: String, comment: Map[String, String] = Map.empty)(expected: String): Unit = { + def testCase(name: String)(input: String, + comment: Map[String, String] = Map.empty, maxLines: Int = -1)(expected: String): Unit = { test(name) { val sourceCode = new CodeAndComment(input.trim, comment) - if (CodeFormatter.format(sourceCode).trim !== expected.trim) { + if (CodeFormatter.format(sourceCode, maxLines).trim !== expected.trim) { fail( s""" |== FAIL: Formatted code doesn't match === - |${sideBySide(CodeFormatter.format(sourceCode).trim, expected.trim).mkString("\n")} + |${sideBySide(CodeFormatter.format(sourceCode, maxLines).trim, + expected.trim).mkString("\n")} """.stripMargin) } } @@ -129,6 +129,36 @@ class CodeFormatterSuite extends SparkFunSuite { """.stripMargin } + testCase("function calls with maxLines=0") ( + """ + |foo( + |a, + |b, + |c) + """.stripMargin, + maxLines = 0 + ) { + """ + |/* 001 */ [truncated to 0 lines (total lines is 4)] + """.stripMargin + } + + testCase("function calls with maxLines=2") ( + """ + |foo( + |a, + |b, + |c) + """.stripMargin, + maxLines = 2 + ) { + """ + |/* 001 */ foo( + |/* 002 */ a, + |/* 003 */ [truncated to 2 lines (total lines is 4)] + """.stripMargin + } + testCase("single line comments") { """ |// This is a comment about class A { { { ( (