Skip to content

Commit e99b220

Browse files
sarutakdavies
authored andcommitted
[SPARK-15165] [SPARK-15205] [SQL] Introduce place holder for comments in generated code
## What changes were proposed in this pull request? This PR introduce place holder for comment in generated code and the purpose is same for #12939 but much safer. Generated code to be compiled doesn't include actual comments but includes place holder instead. Place holders in generated code will be replaced with actual comments only at the time of logging. Also, this PR can resolve SPARK-15205. ## How was this patch tested? Existing tests. Author: Kousuke Saruta <[email protected]> Closes #12979 from sarutak/SPARK-15205. (cherry picked from commit 22947cd) Signed-off-by: Davies Liu <[email protected]>
1 parent 1677fd3 commit e99b220

File tree

15 files changed

+95
-57
lines changed

15 files changed

+95
-57
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import org.apache.spark.sql.catalyst.InternalRow
2121
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
2222
import org.apache.spark.sql.catalyst.expressions.codegen._
2323
import org.apache.spark.sql.catalyst.trees.TreeNode
24-
import org.apache.spark.sql.catalyst.util.toCommentSafeString
2524
import org.apache.spark.sql.types._
2625

2726
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -97,15 +96,14 @@ abstract class Expression extends TreeNode[Expression] {
9796
ctx.subExprEliminationExprs.get(this).map { subExprState =>
9897
// This expression is repeated which means that the code to evaluate it has already been added
9998
// as a function before. In that case, we just re-use it.
100-
val code = s"/* ${toCommentSafeString(this.toString)} */"
101-
ExprCode(code, subExprState.isNull, subExprState.value)
99+
ExprCode(ctx.registerComment(this.toString), subExprState.isNull, subExprState.value)
102100
}.getOrElse {
103101
val isNull = ctx.freshName("isNull")
104102
val value = ctx.freshName("value")
105103
val ve = doGenCode(ctx, ExprCode("", isNull, value))
106104
if (ve.code.nonEmpty) {
107105
// Add `this` in the comment.
108-
ve.copy(s"/* ${toCommentSafeString(this.toString)} */\n" + ve.code.trim)
106+
ve.copy(code = s"${ctx.registerComment(this.toString)}\n" + ve.code.trim)
109107
} else {
110108
ve
111109
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,24 @@
1717

1818
package org.apache.spark.sql.catalyst.expressions.codegen
1919

20+
import org.apache.commons.lang3.StringUtils
21+
2022
/**
2123
* An utility class that indents a block of code based on the curly braces and parentheses.
2224
* This is used to prettify generated code when in debug mode (or exceptions).
2325
*
2426
* Written by Matei Zaharia.
2527
*/
2628
object CodeFormatter {
27-
def format(code: String): String = new CodeFormatter().addLines(code).result()
29+
def format(code: CodeAndComment): String = {
30+
new CodeFormatter().addLines(
31+
StringUtils.replaceEach(
32+
code.body,
33+
code.comment.keys.toArray,
34+
code.comment.values.toArray)
35+
).result
36+
}
37+
2838
def stripExtraNewLines(input: String): String = {
2939
val code = new StringBuilder
3040
var lastLine: String = "dummy"

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala

Lines changed: 53 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,11 @@ class CodegenContext {
199199
*/
200200
var freshNamePrefix = ""
201201

202+
/**
203+
* The map from a place holder to a corresponding comment
204+
*/
205+
private val placeHolderToComments = new mutable.HashMap[String, String]
206+
202207
/**
203208
* Returns a term name that is unique within this instance of a `CodegenContext`.
204209
*/
@@ -706,6 +711,35 @@ class CodegenContext {
706711
if (doSubexpressionElimination) subexpressionElimination(expressions)
707712
expressions.map(e => e.genCode(this))
708713
}
714+
715+
/**
716+
* get a map of the pair of a place holder and a corresponding comment
717+
*/
718+
def getPlaceHolderToComments(): collection.Map[String, String] = placeHolderToComments
719+
720+
/**
721+
* Register a multi-line comment and return the corresponding place holder
722+
*/
723+
private def registerMultilineComment(text: String): String = {
724+
val placeHolder = s"/*${freshName("c")}*/"
725+
val comment = text.split("(\r\n)|\r|\n").mkString("/**\n * ", "\n * ", "\n */")
726+
placeHolderToComments += (placeHolder -> comment)
727+
placeHolder
728+
}
729+
730+
/**
731+
* Register a comment and return the corresponding place holder
732+
*/
733+
def registerComment(text: String): String = {
734+
if (text.contains("\n") || text.contains("\r")) {
735+
registerMultilineComment(text)
736+
} else {
737+
val placeHolder = s"/*${freshName("c")}*/"
738+
val safeComment = s"// $text"
739+
placeHolderToComments += (placeHolder -> safeComment)
740+
placeHolder
741+
}
742+
}
709743
}
710744

711745
/**
@@ -716,6 +750,19 @@ abstract class GeneratedClass {
716750
def generate(references: Array[Any]): Any
717751
}
718752

753+
/**
754+
* A wrapper for the source code to be compiled by [[CodeGenerator]].
755+
*/
756+
class CodeAndComment(val body: String, val comment: collection.Map[String, String])
757+
extends Serializable {
758+
override def equals(that: Any): Boolean = that match {
759+
case t: CodeAndComment if t.body == body => true
760+
case _ => false
761+
}
762+
763+
override def hashCode(): Int = body.hashCode
764+
}
765+
719766
/**
720767
* A base class for generators of byte code to perform expression evaluation. Includes a set of
721768
* helpers for referring to Catalyst types and building trees that perform evaluation of individual
@@ -760,14 +807,14 @@ object CodeGenerator extends Logging {
760807
/**
761808
* Compile the Java source code into a Java class, using Janino.
762809
*/
763-
def compile(code: String): GeneratedClass = {
810+
def compile(code: CodeAndComment): GeneratedClass = {
764811
cache.get(code)
765812
}
766813

767814
/**
768815
* Compile the Java source code into a Java class, using Janino.
769816
*/
770-
private[this] def doCompile(code: String): GeneratedClass = {
817+
private[this] def doCompile(code: CodeAndComment): GeneratedClass = {
771818
val evaluator = new ClassBodyEvaluator()
772819
evaluator.setParentClassLoader(Utils.getContextOrSparkClassLoader)
773820
// Cannot be under package codegen, or fail with java.lang.InstantiationException
@@ -788,7 +835,7 @@ object CodeGenerator extends Logging {
788835
))
789836
evaluator.setExtendedClass(classOf[GeneratedClass])
790837

791-
def formatted = CodeFormatter.format(code)
838+
lazy val formatted = CodeFormatter.format(code)
792839

793840
logDebug({
794841
// Only add extra debugging info to byte code when we are going to print the source code.
@@ -797,7 +844,7 @@ object CodeGenerator extends Logging {
797844
})
798845

799846
try {
800-
evaluator.cook("generated.java", code)
847+
evaluator.cook("generated.java", code.body)
801848
} catch {
802849
case e: Exception =>
803850
val msg = s"failed to compile: $e\n$formatted"
@@ -819,8 +866,8 @@ object CodeGenerator extends Logging {
819866
private val cache = CacheBuilder.newBuilder()
820867
.maximumSize(100)
821868
.build(
822-
new CacheLoader[String, GeneratedClass]() {
823-
override def load(code: String): GeneratedClass = {
869+
new CacheLoader[CodeAndComment, GeneratedClass]() {
870+
override def load(code: CodeAndComment): GeneratedClass = {
824871
val startTime = System.nanoTime()
825872
val result = doCompile(code)
826873
val endTime = System.nanoTime()

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenFallback.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
package org.apache.spark.sql.catalyst.expressions.codegen
1919

2020
import org.apache.spark.sql.catalyst.expressions.{Expression, LeafExpression, Nondeterministic}
21-
import org.apache.spark.sql.catalyst.util.toCommentSafeString
2221

2322
/**
2423
* A trait that can be used to provide a fallback mode for expression code generation.
@@ -36,9 +35,10 @@ trait CodegenFallback extends Expression {
3635
val idx = ctx.references.length
3736
ctx.references += this
3837
val objectTerm = ctx.freshName("obj")
38+
val placeHolder = ctx.registerComment(this.toString)
3939
if (nullable) {
4040
ev.copy(code = s"""
41-
/* expression: ${toCommentSafeString(this.toString)} */
41+
$placeHolder
4242
Object $objectTerm = ((Expression) references[$idx]).eval($input);
4343
boolean ${ev.isNull} = $objectTerm == null;
4444
${ctx.javaType(this.dataType)} ${ev.value} = ${ctx.defaultValue(this.dataType)};
@@ -47,7 +47,7 @@ trait CodegenFallback extends Expression {
4747
}""")
4848
} else {
4949
ev.copy(code = s"""
50-
/* expression: ${toCommentSafeString(this.toString)} */
50+
$placeHolder
5151
Object $objectTerm = ((Expression) references[$idx]).eval($input);
5252
${ctx.javaType(this.dataType)} ${ev.value} = (${ctx.boxedType(this.dataType)}) $objectTerm;
5353
""", isNull = "false")

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
9494
val allProjections = ctx.splitExpressions(ctx.INPUT_ROW, projectionCodes)
9595
val allUpdates = ctx.splitExpressions(ctx.INPUT_ROW, updates)
9696

97-
val code = s"""
97+
val codeBody = s"""
9898
public java.lang.Object generate(Object[] references) {
9999
return new SpecificMutableProjection(references);
100100
}
@@ -133,6 +133,7 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
133133
}
134134
"""
135135

136+
val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())
136137
logDebug(s"code for ${expressions.mkString(",")}:\n${CodeFormatter.format(code)}")
137138

138139
val c = CodeGenerator.compile(code)

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
113113
protected def create(ordering: Seq[SortOrder]): BaseOrdering = {
114114
val ctx = newCodeGenContext()
115115
val comparisons = genComparisons(ctx, ordering)
116-
val code = s"""
116+
val codeBody = s"""
117117
public SpecificOrdering generate(Object[] references) {
118118
return new SpecificOrdering(references);
119119
}
@@ -136,6 +136,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
136136
}
137137
}"""
138138

139+
val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())
139140
logDebug(s"Generated Ordering by ${ordering.mkString(",")}:\n${CodeFormatter.format(code)}")
140141

141142
CodeGenerator.compile(code).generate(ctx.references.toArray).asInstanceOf[BaseOrdering]

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ object GeneratePredicate extends CodeGenerator[Expression, (InternalRow) => Bool
4040
protected def create(predicate: Expression): ((InternalRow) => Boolean) = {
4141
val ctx = newCodeGenContext()
4242
val eval = predicate.genCode(ctx)
43-
val code = s"""
43+
val codeBody = s"""
4444
public SpecificPredicate generate(Object[] references) {
4545
return new SpecificPredicate(references);
4646
}
@@ -61,6 +61,7 @@ object GeneratePredicate extends CodeGenerator[Expression, (InternalRow) => Bool
6161
}
6262
}"""
6363

64+
val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())
6465
logDebug(s"Generated predicate '$predicate':\n${CodeFormatter.format(code)}")
6566

6667
val p = CodeGenerator.compile(code).generate(ctx.references.toArray).asInstanceOf[Predicate]

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
155155
"""
156156
}
157157
val allExpressions = ctx.splitExpressions(ctx.INPUT_ROW, expressionCodes)
158-
val code = s"""
158+
val codeBody = s"""
159159
public java.lang.Object generate(Object[] references) {
160160
return new SpecificSafeProjection(references);
161161
}
@@ -181,6 +181,7 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
181181
}
182182
"""
183183

184+
val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())
184185
logDebug(s"code for ${expressions.mkString(",")}:\n${CodeFormatter.format(code)}")
185186

186187
val c = CodeGenerator.compile(code)

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
362362
val ctx = newCodeGenContext()
363363
val eval = createCode(ctx, expressions, subexpressionEliminationEnabled)
364364

365-
val code = s"""
365+
val codeBody = s"""
366366
public java.lang.Object generate(Object[] references) {
367367
return new SpecificUnsafeProjection(references);
368368
}
@@ -390,6 +390,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
390390
}
391391
"""
392392

393+
val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())
393394
logDebug(s"code for ${expressions.mkString(",")}:\n${CodeFormatter.format(code)}")
394395

395396
val c = CodeGenerator.compile(code)

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ object GenerateUnsafeRowJoiner extends CodeGenerator[(StructType, StructType), U
157157
}.mkString("\n")
158158

159159
// ------------------------ Finally, put everything together --------------------------- //
160-
val code = s"""
160+
val codeBody = s"""
161161
|public java.lang.Object generate(Object[] references) {
162162
| return new SpecificUnsafeRowJoiner();
163163
|}
@@ -193,7 +193,7 @@ object GenerateUnsafeRowJoiner extends CodeGenerator[(StructType, StructType), U
193193
| }
194194
|}
195195
""".stripMargin
196-
196+
val code = new CodeAndComment(codeBody, Map.empty)
197197
logDebug(s"SpecificUnsafeRowJoiner($schema1, $schema2):\n${CodeFormatter.format(code)}")
198198

199199
val c = CodeGenerator.compile(code)

0 commit comments

Comments
 (0)