From 803166c66ed00df88f4f9d629503d1a87c0d3f45 Mon Sep 17 00:00:00 2001 From: "pj.fanning" Date: Mon, 17 Jul 2017 18:03:18 +0100 Subject: [PATCH 1/6] [SPARK-20871] only log Janino code at debug level --- .../sql/catalyst/expressions/codegen/CodeGenerator.scala | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 7cf9daf628608..6dd416d549fbd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -1037,12 +1037,10 @@ object CodeGenerator extends Logging { )) evaluator.setExtendedClass(classOf[GeneratedClass]) - lazy val formatted = CodeFormatter.format(code) - logDebug({ // Only add extra debugging info to byte code when we are going to print the source code. evaluator.setDebuggingInformation(true, true, false) - s"\n$formatted" + s"\n${CodeFormatter.format(code)}" }) try { @@ -1050,11 +1048,11 @@ object CodeGenerator extends Logging { recordCompilationStats(evaluator) } catch { case e: JaninoRuntimeException => - val msg = s"failed to compile: $e\n$formatted" + val msg = s"failed to compile: $e" logError(msg, e) throw new JaninoRuntimeException(msg, e) case e: CompileException => - val msg = s"failed to compile: $e\n$formatted" + val msg = s"failed to compile: $e" logError(msg, e) throw new CompileException(msg, e.getLocation) } From 52b20f38f550dacc4896d061c5ac7f69ad56f875 Mon Sep 17 00:00:00 2001 From: "pj.fanning" Date: Mon, 17 Jul 2017 23:14:44 +0100 Subject: [PATCH 2/6] [SPARK-20871][SQL] log Janino code at info level (but include a max of 1000 lines) --- .../expressions/codegen/CodeFormatter.scala | 12 +++++++-- .../expressions/codegen/CodeGenerator.scala | 2 ++ .../codegen/CodeFormatterSuite.scala | 26 +++++++++++++++---- 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala index 05b7c96e44c02..8972ca44f6864 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala @@ -28,14 +28,22 @@ import java.util.regex.Matcher object CodeFormatter { val commentHolder = """\/\*(.+?)\*\/""".r - def format(code: CodeAndComment): String = { + def format(code: CodeAndComment): String = format(code, -1) + + def format(code: CodeAndComment, maxLines: Int): String = { val formatter = new CodeFormatter - code.body.split("\n").foreach { line => + val lines = code.body.split("\n") + val needToTruncate = maxLines >= 0 && lines.length > maxLines + val filteredLines = if (needToTruncate) lines.take(maxLines) else lines + filteredLines.foreach { line => val commentReplaced = commentHolder.replaceAllIn( line.trim, m => code.comment.get(m.group(1)).map(Matcher.quoteReplacement).getOrElse(m.group(0))) formatter.addLine(commentReplaced) } + if (needToTruncate) { + formatter.addLine(s"[truncated to $maxLines lines (total lines is ${lines.length})]") + } formatter.result() } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 6dd416d549fbd..85ab5e01f4989 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -1050,10 +1050,12 @@ object CodeGenerator extends Logging { case e: JaninoRuntimeException => val msg = s"failed to compile: $e" logError(msg, e) + logInfo(s"\n${CodeFormatter.format(code, 1000)}") throw new JaninoRuntimeException(msg, e) case e: CompileException => val msg = s"failed to compile: $e" logError(msg, e) + logInfo(s"\n${CodeFormatter.format(code, 1000)}") throw new CompileException(msg, e.getLocation) } evaluator.getClazz().newInstance().asInstanceOf[GeneratedClass] diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala index bc5a8f078234a..63b0977ca8df4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala @@ -20,18 +20,18 @@ package org.apache.spark.sql.catalyst.expressions.codegen import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.util._ - class CodeFormatterSuite extends SparkFunSuite { - def testCase(name: String)( - input: String, comment: Map[String, String] = Map.empty)(expected: String): Unit = { + def testCase(name: String)(input: String, + comment: Map[String, String] = Map.empty, maxLines: Int = -1)(expected: String): Unit = { test(name) { val sourceCode = new CodeAndComment(input.trim, comment) - if (CodeFormatter.format(sourceCode).trim !== expected.trim) { + if (CodeFormatter.format(sourceCode, maxLines).trim !== expected.trim) { fail( s""" |== FAIL: Formatted code doesn't match === - |${sideBySide(CodeFormatter.format(sourceCode).trim, expected.trim).mkString("\n")} + |${sideBySide(CodeFormatter.format(sourceCode, maxLines).trim, + expected.trim).mkString("\n")} """.stripMargin) } } @@ -129,6 +129,22 @@ class CodeFormatterSuite extends SparkFunSuite { """.stripMargin } + testCase("function calls with maxLines=2") ( + """ + |foo( + |a, + |b, + |c) + """.stripMargin, + maxLines = 2 + ) { + """ + |/* 001 */ foo( + |/* 002 */ a, + |/* 003 */ [truncated to 2 lines (total lines is 4)] + """.stripMargin + } + testCase("single line comments") { """ |// This is a comment about class A { { { ( ( From 2997ded8c42f6c28faa471136e8c6de0ab7f1e9f Mon Sep 17 00:00:00 2001 From: "pj.fanning" Date: Fri, 21 Jul 2017 09:41:39 +0100 Subject: [PATCH 3/6] Explicitly reference the maxLines param --- .../sql/catalyst/expressions/codegen/CodeGenerator.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 85ab5e01f4989..ed95fdb8c6995 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -1050,12 +1050,12 @@ object CodeGenerator extends Logging { case e: JaninoRuntimeException => val msg = s"failed to compile: $e" logError(msg, e) - logInfo(s"\n${CodeFormatter.format(code, 1000)}") + logInfo(s"\n${CodeFormatter.format(code, maxLines = 1000)}") throw new JaninoRuntimeException(msg, e) case e: CompileException => val msg = s"failed to compile: $e" logError(msg, e) - logInfo(s"\n${CodeFormatter.format(code, 1000)}") + logInfo(s"\n${CodeFormatter.format(code, maxLines = 1000)}") throw new CompileException(msg, e.getLocation) } evaluator.getClazz().newInstance().asInstanceOf[GeneratedClass] From 252f8ea36b69ea48a6dfd5dfe6175f49337ee8cf Mon Sep 17 00:00:00 2001 From: "pj.fanning" Date: Fri, 21 Jul 2017 13:15:32 +0100 Subject: [PATCH 4/6] [SPARK-20871][SQL] have just one format function with a default applied for maxLines --- .../sql/catalyst/expressions/codegen/CodeFormatter.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala index 8972ca44f6864..60e600d8dbd8f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala @@ -28,9 +28,7 @@ import java.util.regex.Matcher object CodeFormatter { val commentHolder = """\/\*(.+?)\*\/""".r - def format(code: CodeAndComment): String = format(code, -1) - - def format(code: CodeAndComment, maxLines: Int): String = { + def format(code: CodeAndComment, maxLines: Int = -1): String = { val formatter = new CodeFormatter val lines = code.body.split("\n") val needToTruncate = maxLines >= 0 && lines.length > maxLines From c42dc46653778d600a69f7dd978269492ec7bcde Mon Sep 17 00:00:00 2001 From: "pj.fanning" Date: Sun, 23 Jul 2017 00:32:57 +0100 Subject: [PATCH 5/6] [SPARK-20871][SQL] add SQLConf internal property: spark.sql.codegen.logging.maxLines --- .../sql/catalyst/expressions/codegen/CodeGenerator.scala | 7 +++++-- .../scala/org/apache/spark/sql/internal/SQLConf.scala | 8 ++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index ed95fdb8c6995..154210f0b8a6d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -39,6 +39,7 @@ import org.apache.spark.metrics.source.CodegenMetrics import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.{ArrayData, MapData} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.Platform import org.apache.spark.unsafe.types._ @@ -1050,12 +1051,14 @@ object CodeGenerator extends Logging { case e: JaninoRuntimeException => val msg = s"failed to compile: $e" logError(msg, e) - logInfo(s"\n${CodeFormatter.format(code, maxLines = 1000)}") + val maxLines = new SQLConf().loggingMaxLinesForCodegen + logInfo(s"\n${CodeFormatter.format(code, maxLines)}") throw new JaninoRuntimeException(msg, e) case e: CompileException => val msg = s"failed to compile: $e" logError(msg, e) - logInfo(s"\n${CodeFormatter.format(code, maxLines = 1000)}") + val maxLines = new SQLConf().loggingMaxLinesForCodegen + logInfo(s"\n${CodeFormatter.format(code, maxLines)}") throw new CompileException(msg, e.getLocation) } evaluator.getClazz().newInstance().asInstanceOf[GeneratedClass] diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 55558ca9f700c..d689bb59326b6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -562,6 +562,12 @@ object SQLConf { .intConf .createWithDefault(20) + val CODEGEN_LOGGING_MAX_LINES = buildConf("spark.sql.codegen.logging.maxLines") + .internal() + .doc("The maximum number of codegen lines to log when errors occur.") + .intConf + .createWithDefault(1000) + val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes") .doc("The maximum number of bytes to pack into a single partition when reading files.") .longConf @@ -1002,6 +1008,8 @@ class SQLConf extends Serializable with Logging { def maxCaseBranchesForCodegen: Int = getConf(MAX_CASES_BRANCHES) + def loggingMaxLinesForCodegen: Int = getConf(CODEGEN_LOGGING_MAX_LINES) + def tableRelationCacheSize: Int = getConf(StaticSQLConf.FILESOURCE_TABLE_RELATION_CACHE_SIZE) From a795db225dbdf71efd3cceca906244644e70c218 Mon Sep 17 00:00:00 2001 From: "pj.fanning" Date: Sun, 23 Jul 2017 08:52:46 +0100 Subject: [PATCH 6/6] [SPARK-20871][SQL] add check on SQLConf internal property: spark.sql.codegen.logging.maxLines --- .../expressions/codegen/CodeGenerator.scala | 4 ++-- .../org/apache/spark/sql/internal/SQLConf.scala | 4 +++- .../expressions/codegen/CodeFormatterSuite.scala | 14 ++++++++++++++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 154210f0b8a6d..a014e2aa34820 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -1051,13 +1051,13 @@ object CodeGenerator extends Logging { case e: JaninoRuntimeException => val msg = s"failed to compile: $e" logError(msg, e) - val maxLines = new SQLConf().loggingMaxLinesForCodegen + val maxLines = SQLConf.get.loggingMaxLinesForCodegen logInfo(s"\n${CodeFormatter.format(code, maxLines)}") throw new JaninoRuntimeException(msg, e) case e: CompileException => val msg = s"failed to compile: $e" logError(msg, e) - val maxLines = new SQLConf().loggingMaxLinesForCodegen + val maxLines = SQLConf.get.loggingMaxLinesForCodegen logInfo(s"\n${CodeFormatter.format(code, maxLines)}") throw new CompileException(msg, e.getLocation) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index d689bb59326b6..b3bedb36e2575 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -564,8 +564,10 @@ object SQLConf { val CODEGEN_LOGGING_MAX_LINES = buildConf("spark.sql.codegen.logging.maxLines") .internal() - .doc("The maximum number of codegen lines to log when errors occur.") + .doc("The maximum number of codegen lines to log when errors occur. Use -1 for unlimited.") .intConf + .checkValue(maxLines => maxLines >= -1, "The maximum must be a positive integer, 0 to " + + "disable logging or -1 to apply no limit.") .createWithDefault(1000) val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala index 63b0977ca8df4..9d0a41661beaa 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala @@ -129,6 +129,20 @@ class CodeFormatterSuite extends SparkFunSuite { """.stripMargin } + testCase("function calls with maxLines=0") ( + """ + |foo( + |a, + |b, + |c) + """.stripMargin, + maxLines = 0 + ) { + """ + |/* 001 */ [truncated to 0 lines (total lines is 4)] + """.stripMargin + } + testCase("function calls with maxLines=2") ( """ |foo(