Skip to content

Commit 366cac6

Browse files
sameeragarwalrxin
authored andcommitted
[SPARK-14225][SQL] Cap the length of toCommentSafeString at 128 chars
## What changes were proposed in this pull request? Builds on #12022 and (a) appends "..." to truncated comment strings and (b) fixes indentation in lines after the commented strings if they happen to have a `(`, `{`, `)` or `}` ## How was this patch tested? Manually examined the generated code. Author: Sameer Agarwal <[email protected]> Closes #12044 from sameeragarwal/comment.
1 parent a7a93a1 commit 366cac6

File tree

3 files changed

+79
-8
lines changed

3 files changed

+79
-8
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,44 @@ object CodeFormatter {
4343

4444
private class CodeFormatter {
4545
private val code = new StringBuilder
46-
private var indentLevel = 0
4746
private val indentSize = 2
47+
48+
// Tracks the level of indentation in the current line.
49+
private var indentLevel = 0
4850
private var indentString = ""
4951
private var currentLine = 1
5052

53+
// Tracks the level of indentation in multi-line comment blocks.
54+
private var inCommentBlock = false
55+
private var indentLevelOutsideCommentBlock = indentLevel
56+
5157
private def addLine(line: String): Unit = {
52-
val indentChange =
53-
line.count(c => "({".indexOf(c) >= 0) - line.count(c => ")}".indexOf(c) >= 0)
54-
val newIndentLevel = math.max(0, indentLevel + indentChange)
58+
59+
// We currently infer the level of indentation of a given line based on a simple heuristic that
60+
// examines the number of parenthesis and braces in that line. This isn't the most robust
61+
// implementation but works for all code that we generate.
62+
val indentChange = line.count(c => "({".indexOf(c) >= 0) - line.count(c => ")}".indexOf(c) >= 0)
63+
var newIndentLevel = math.max(0, indentLevel + indentChange)
64+
65+
// Please note that while we try to format the comment blocks in exactly the same way as the
66+
// rest of the code, once the block ends, we reset the next line's indentation level to what it
67+
// was immediately before entering the comment block.
68+
if (!inCommentBlock) {
69+
if (line.startsWith("/*")) {
70+
// Handle multi-line comments
71+
inCommentBlock = true
72+
indentLevelOutsideCommentBlock = indentLevel
73+
} else if (line.startsWith("//")) {
74+
// Handle single line comments
75+
newIndentLevel = indentLevel
76+
}
77+
} else {
78+
if (line.endsWith("*/")) {
79+
inCommentBlock = false
80+
newIndentLevel = indentLevelOutsideCommentBlock
81+
}
82+
}
83+
5584
// Lines starting with '}' should be de-indented even if they contain '{' after;
5685
// in addition, lines ending with ':' are typically labels
5786
val thisLineIndent = if (line.startsWith("}") || line.startsWith(")") || line.endsWith(":")) {

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,10 +155,13 @@ package object util {
155155

156156
/**
157157
* Returns the string representation of this expression that is safe to be put in
158-
* code comments of generated code.
158+
* code comments of generated code. The length is capped at 128 characters.
159159
*/
160-
def toCommentSafeString(str: String): String =
161-
str.replace("*/", "\\*\\/").replace("\\u", "\\\\u")
160+
def toCommentSafeString(str: String): String = {
161+
val len = math.min(str.length, 128)
162+
val suffix = if (str.length > len) "..." else ""
163+
str.substring(0, len).replace("*/", "\\*\\/").replace("\\u", "\\\\u") + suffix
164+
}
162165

163166
/* FIX ME
164167
implicit class debugLogging(a: Any) {

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,20 @@
1818
package org.apache.spark.sql.catalyst.expressions.codegen
1919

2020
import org.apache.spark.SparkFunSuite
21+
import org.apache.spark.sql.catalyst.util._
2122

2223

2324
class CodeFormatterSuite extends SparkFunSuite {
2425

2526
def testCase(name: String)(input: String)(expected: String): Unit = {
2627
test(name) {
27-
assert(CodeFormatter.format(input).trim === expected.trim)
28+
if (CodeFormatter.format(input).trim !== expected.trim) {
29+
fail(
30+
s"""
31+
|== FAIL: Formatted code doesn't match ===
32+
|${sideBySide(CodeFormatter.format(input).trim, expected.trim).mkString("\n")}
33+
""".stripMargin)
34+
}
2835
}
2936
}
3037

@@ -93,4 +100,36 @@ class CodeFormatterSuite extends SparkFunSuite {
93100
|/* 004 */ c)
94101
""".stripMargin
95102
}
103+
104+
testCase("single line comments") {
105+
"""// This is a comment about class A { { { ( (
106+
|class A {
107+
|class body;
108+
|}""".stripMargin
109+
}{
110+
"""
111+
|/* 001 */ // This is a comment about class A { { { ( (
112+
|/* 002 */ class A {
113+
|/* 003 */ class body;
114+
|/* 004 */ }
115+
""".stripMargin
116+
}
117+
118+
testCase("multi-line comments") {
119+
""" /* This is a comment about
120+
|class A {
121+
|class body; ...*/
122+
|class A {
123+
|class body;
124+
|}""".stripMargin
125+
}{
126+
"""
127+
|/* 001 */ /* This is a comment about
128+
|/* 002 */ class A {
129+
|/* 003 */ class body; ...*/
130+
|/* 004 */ class A {
131+
|/* 005 */ class body;
132+
|/* 006 */ }
133+
""".stripMargin
134+
}
96135
}

0 commit comments

Comments
 (0)