Skip to content

Commit c0c3ec3

Browse files
sarutakdavies
authored andcommitted
[SPARK-15165] [SQL] Codegen can break because toCommentSafeString is not actually safe
## What changes were proposed in this pull request? toCommentSafeString method replaces "\u" with "\\\\u" to avoid codegen breaking. But if the even number of "\" is put before "u", like "\\\\u", in the string literal in the query, codegen can break. Following code causes compilation error. ``` val df = Seq(...).toDF df.select("'\\\\\\\\u002A/'").show ``` The reason of the compilation error is because "\\\\\\\\\\\\\\\\u002A/" is translated into "*/" (the end of comment). Due to this unsafety, arbitrary code can be injected like as follows. ``` val df = Seq(...).toDF // Inject "System.exit(1)" df.select("'\\\\\\\\u002A/{System.exit(1);}/*'").show ``` ## How was this patch tested? Added new test cases. Author: Kousuke Saruta <[email protected]> Author: sarutak <[email protected]> Closes #12939 from sarutak/SPARK-15165.
1 parent bebe5f9 commit c0c3ec3

File tree

3 files changed

+320
-1
lines changed

3 files changed

+320
-1
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,18 @@ package object util {
162162
def toCommentSafeString(str: String): String = {
163163
val len = math.min(str.length, 128)
164164
val suffix = if (str.length > len) "..." else ""
165-
str.substring(0, len).replace("*/", "\\*\\/").replace("\\u", "\\\\u") + suffix
165+
166+
// Unicode literals, like \u0022, should be escaped before
167+
// they are put in code comment to avoid codegen breaking.
168+
// To escape them, single "\" should be prepended to a series of "\" just before "u"
169+
// only when the number of "\" is odd.
170+
// For example, \u0022 should become to \\u0022
171+
// but \\u0022 should not become to \\\u0022 because the first backslash escapes the second one,
172+
// and \u0022 will remain, means not escaped.
173+
// Otherwise, the runtime Java compiler will fail to compile or code injection can be allowed.
174+
// For details, see SPARK-15165.
175+
str.substring(0, len).replace("*/", "*\\/")
176+
.replaceAll("(^|[^\\\\])(\\\\(\\\\\\\\)*u)", "$1\\\\$2") + suffix
166177
}
167178

168179
/* FIX ME

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,4 +194,48 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
194194
true,
195195
InternalRow(UTF8String.fromString("\\u")))
196196
}
197+
198+
test("check compilation error doesn't occur caused by specific literal") {
199+
// The end of comment (*/) should be escaped.
200+
GenerateUnsafeProjection.generate(
201+
Literal.create("*/Compilation error occurs/*", StringType) :: Nil)
202+
203+
// `\u002A` is `*` and `\u002F` is `/`
204+
// so if the end of comment consists of those characters in queries, we need to escape them.
205+
GenerateUnsafeProjection.generate(
206+
Literal.create("\\u002A/Compilation error occurs/*", StringType) :: Nil)
207+
GenerateUnsafeProjection.generate(
208+
Literal.create("\\\\u002A/Compilation error occurs/*", StringType) :: Nil)
209+
GenerateUnsafeProjection.generate(
210+
Literal.create("\\u002a/Compilation error occurs/*", StringType) :: Nil)
211+
GenerateUnsafeProjection.generate(
212+
Literal.create("\\\\u002a/Compilation error occurs/*", StringType) :: Nil)
213+
GenerateUnsafeProjection.generate(
214+
Literal.create("*\\u002FCompilation error occurs/*", StringType) :: Nil)
215+
GenerateUnsafeProjection.generate(
216+
Literal.create("*\\\\u002FCompilation error occurs/*", StringType) :: Nil)
217+
GenerateUnsafeProjection.generate(
218+
Literal.create("*\\002fCompilation error occurs/*", StringType) :: Nil)
219+
GenerateUnsafeProjection.generate(
220+
Literal.create("*\\\\002fCompilation error occurs/*", StringType) :: Nil)
221+
GenerateUnsafeProjection.generate(
222+
Literal.create("\\002A\\002FCompilation error occurs/*", StringType) :: Nil)
223+
GenerateUnsafeProjection.generate(
224+
Literal.create("\\\\002A\\002FCompilation error occurs/*", StringType) :: Nil)
225+
GenerateUnsafeProjection.generate(
226+
Literal.create("\\002A\\\\002FCompilation error occurs/*", StringType) :: Nil)
227+
228+
// \ u002X is an invalid unicode literal so it should be escaped.
229+
GenerateUnsafeProjection.generate(
230+
Literal.create("\\u002X/Compilation error occurs", StringType) :: Nil)
231+
GenerateUnsafeProjection.generate(
232+
Literal.create("\\\\u002X/Compilation error occurs", StringType) :: Nil)
233+
234+
// \ u001 is an invalid unicode literal so it should be escaped.
235+
GenerateUnsafeProjection.generate(
236+
Literal.create("\\u001/Compilation error occurs", StringType) :: Nil)
237+
GenerateUnsafeProjection.generate(
238+
Literal.create("\\\\u001/Compilation error occurs", StringType) :: Nil)
239+
240+
}
197241
}

sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala

Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2496,4 +2496,268 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
24962496
}
24972497
}
24982498

2499+
test("check code injection is prevented") {
2500+
// The end of comment (*/) should be escaped.
2501+
var literal =
2502+
"""|*/
2503+
|{
2504+
| new Object() {
2505+
| void f() { throw new RuntimeException("This exception is injected."); }
2506+
| }.f();
2507+
|}
2508+
|/*""".stripMargin
2509+
var expected =
2510+
"""|*/
2511+
|{
2512+
| new Object() {
2513+
| void f() { throw new RuntimeException("This exception is injected."); }
2514+
| }.f();
2515+
|}
2516+
|/*""".stripMargin
2517+
checkAnswer(
2518+
sql(s"SELECT '$literal' AS DUMMY"),
2519+
Row(s"$expected") :: Nil)
2520+
2521+
// `\u002A` is `*` and `\u002F` is `/`
2522+
// so if the end of comment consists of those characters in queries, we need to escape them.
2523+
literal =
2524+
"""|\\u002A/
2525+
|{
2526+
| new Object() {
2527+
| void f() { throw new RuntimeException("This exception is injected."); }
2528+
| }.f();
2529+
|}
2530+
|/*""".stripMargin
2531+
expected =
2532+
s"""|${"\\u002A/"}
2533+
|{
2534+
| new Object() {
2535+
| void f() { throw new RuntimeException("This exception is injected."); }
2536+
| }.f();
2537+
|}
2538+
|/*""".stripMargin
2539+
checkAnswer(
2540+
sql(s"SELECT '$literal' AS DUMMY"),
2541+
Row(s"$expected") :: Nil)
2542+
2543+
literal =
2544+
"""|\\\\u002A/
2545+
|{
2546+
| new Object() {
2547+
| void f() { throw new RuntimeException("This exception is injected."); }
2548+
| }.f();
2549+
|}
2550+
|/*""".stripMargin
2551+
expected =
2552+
"""|\\u002A/
2553+
|{
2554+
| new Object() {
2555+
| void f() { throw new RuntimeException("This exception is injected."); }
2556+
| }.f();
2557+
|}
2558+
|/*""".stripMargin
2559+
checkAnswer(
2560+
sql(s"SELECT '$literal' AS DUMMY"),
2561+
Row(s"$expected") :: Nil)
2562+
2563+
literal =
2564+
"""|\\u002a/
2565+
|{
2566+
| new Object() {
2567+
| void f() { throw new RuntimeException("This exception is injected."); }
2568+
| }.f();
2569+
|}
2570+
|/*""".stripMargin
2571+
expected =
2572+
s"""|${"\\u002a/"}
2573+
|{
2574+
| new Object() {
2575+
| void f() { throw new RuntimeException("This exception is injected."); }
2576+
| }.f();
2577+
|}
2578+
|/*""".stripMargin
2579+
checkAnswer(
2580+
sql(s"SELECT '$literal' AS DUMMY"),
2581+
Row(s"$expected") :: Nil)
2582+
2583+
literal =
2584+
"""|\\\\u002a/
2585+
|{
2586+
| new Object() {
2587+
| void f() { throw new RuntimeException("This exception is injected."); }
2588+
| }.f();
2589+
|}
2590+
|/*""".stripMargin
2591+
expected =
2592+
"""|\\u002a/
2593+
|{
2594+
| new Object() {
2595+
| void f() { throw new RuntimeException("This exception is injected."); }
2596+
| }.f();
2597+
|}
2598+
|/*""".stripMargin
2599+
checkAnswer(
2600+
sql(s"SELECT '$literal' AS DUMMY"),
2601+
Row(s"$expected") :: Nil)
2602+
2603+
literal =
2604+
"""|*\\u002F
2605+
|{
2606+
| new Object() {
2607+
| void f() { throw new RuntimeException("This exception is injected."); }
2608+
| }.f();
2609+
|}
2610+
|/*""".stripMargin
2611+
expected =
2612+
s"""|${"*\\u002F"}
2613+
|{
2614+
| new Object() {
2615+
| void f() { throw new RuntimeException("This exception is injected."); }
2616+
| }.f();
2617+
|}
2618+
|/*""".stripMargin
2619+
checkAnswer(
2620+
sql(s"SELECT '$literal' AS DUMMY"),
2621+
Row(s"$expected") :: Nil)
2622+
2623+
literal =
2624+
"""|*\\\\u002F
2625+
|{
2626+
| new Object() {
2627+
| void f() { throw new RuntimeException("This exception is injected."); }
2628+
| }.f();
2629+
|}
2630+
|/*""".stripMargin
2631+
expected =
2632+
"""|*\\u002F
2633+
|{
2634+
| new Object() {
2635+
| void f() { throw new RuntimeException("This exception is injected."); }
2636+
| }.f();
2637+
|}
2638+
|/*""".stripMargin
2639+
checkAnswer(
2640+
sql(s"SELECT '$literal' AS DUMMY"),
2641+
Row(s"$expected") :: Nil)
2642+
2643+
literal =
2644+
"""|*\\u002f
2645+
|{
2646+
| new Object() {
2647+
| void f() { throw new RuntimeException("This exception is injected."); }
2648+
| }.f();
2649+
|}
2650+
|/*""".stripMargin
2651+
expected =
2652+
s"""|${"*\\u002f"}
2653+
|{
2654+
| new Object() {
2655+
| void f() { throw new RuntimeException("This exception is injected."); }
2656+
| }.f();
2657+
|}
2658+
|/*""".stripMargin
2659+
checkAnswer(
2660+
sql(s"SELECT '$literal' AS DUMMY"),
2661+
Row(s"$expected") :: Nil)
2662+
2663+
literal =
2664+
"""|*\\\\u002f
2665+
|{
2666+
| new Object() {
2667+
| void f() { throw new RuntimeException("This exception is injected."); }
2668+
| }.f();
2669+
|}
2670+
|/*""".stripMargin
2671+
expected =
2672+
"""|*\\u002f
2673+
|{
2674+
| new Object() {
2675+
| void f() { throw new RuntimeException("This exception is injected."); }
2676+
| }.f();
2677+
|}
2678+
|/*""".stripMargin
2679+
checkAnswer(
2680+
sql(s"SELECT '$literal' AS DUMMY"),
2681+
Row(s"$expected") :: Nil)
2682+
2683+
literal =
2684+
"""|\\u002A\\u002F
2685+
|{
2686+
| new Object() {
2687+
| void f() { throw new RuntimeException("This exception is injected."); }
2688+
| }.f();
2689+
|}
2690+
|/*""".stripMargin
2691+
expected =
2692+
s"""|${"\\u002A\\u002F"}
2693+
|{
2694+
| new Object() {
2695+
| void f() { throw new RuntimeException("This exception is injected."); }
2696+
| }.f();
2697+
|}
2698+
|/*""".stripMargin
2699+
checkAnswer(
2700+
sql(s"SELECT '$literal' AS DUMMY"),
2701+
Row(s"$expected") :: Nil)
2702+
2703+
literal =
2704+
"""|\\\\u002A\\u002F
2705+
|{
2706+
| new Object() {
2707+
| void f() { throw new RuntimeException("This exception is injected."); }
2708+
| }.f();
2709+
|}
2710+
|/*""".stripMargin
2711+
expected =
2712+
s"""|${"\\\\u002A\\u002F"}
2713+
|{
2714+
| new Object() {
2715+
| void f() { throw new RuntimeException("This exception is injected."); }
2716+
| }.f();
2717+
|}
2718+
|/*""".stripMargin
2719+
checkAnswer(
2720+
sql(s"SELECT '$literal' AS DUMMY"),
2721+
Row(s"$expected") :: Nil)
2722+
2723+
literal =
2724+
"""|\\u002A\\\\u002F
2725+
|{
2726+
| new Object() {
2727+
| void f() { throw new RuntimeException("This exception is injected."); }
2728+
| }.f();
2729+
|}
2730+
|/*""".stripMargin
2731+
expected =
2732+
s"""|${"\\u002A\\\\u002F"}
2733+
|{
2734+
| new Object() {
2735+
| void f() { throw new RuntimeException("This exception is injected."); }
2736+
| }.f();
2737+
|}
2738+
|/*""".stripMargin
2739+
checkAnswer(
2740+
sql(s"SELECT '$literal' AS DUMMY"),
2741+
Row(s"$expected") :: Nil)
2742+
2743+
literal =
2744+
"""|\\\\u002A\\\\u002F
2745+
|{
2746+
| new Object() {
2747+
| void f() { throw new RuntimeException("This exception is injected."); }
2748+
| }.f();
2749+
|}
2750+
|/*""".stripMargin
2751+
expected =
2752+
"""|\\u002A\\u002F
2753+
|{
2754+
| new Object() {
2755+
| void f() { throw new RuntimeException("This exception is injected."); }
2756+
| }.f();
2757+
|}
2758+
|/*""".stripMargin
2759+
checkAnswer(
2760+
sql(s"SELECT '$literal' AS DUMMY"),
2761+
Row(s"$expected") :: Nil)
2762+
}
24992763
}

0 commit comments

Comments
 (0)