Skip to content

Commit f706ce3

Browse files
author
Eric Wasserman
committed
more efficient fix; add test
1 parent 8ef9b6a commit f706ce3

File tree

2 files changed

+22
-3
lines changed

2 files changed

+22
-3
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.sql.catalyst.expressions
1919

20-
import java.io.{ByteArrayOutputStream, CharArrayWriter, StringWriter}
20+
import java.io.{ByteArrayInputStream, ByteArrayOutputStream, CharArrayWriter, InputStreamReader, StringWriter}
2121

2222
import scala.util.parsing.combinator.RegexParsers
2323

@@ -149,7 +149,8 @@ case class GetJsonObject(json: Expression, path: Expression)
149149

150150
if (parsed.isDefined) {
151151
try {
152-
Utils.tryWithResource(jsonFactory.createParser(jsonStr.toString)) { parser =>
152+
Utils.tryWithResource(jsonFactory.createParser(new InputStreamReader(
153+
new ByteArrayInputStream(jsonStr.getBytes), "UTF-8"))) { parser =>
153154
val output = new ByteArrayOutputStream()
154155
val matched = Utils.tryWithResource(
155156
jsonFactory.createGenerator(output, JsonEncoding.UTF8)) { generator =>
@@ -393,7 +394,8 @@ case class JsonTuple(children: Seq[Expression])
393394
}
394395

395396
try {
396-
Utils.tryWithResource(jsonFactory.createParser(json.toString)) {
397+
Utils.tryWithResource(jsonFactory.createParser(new InputStreamReader(
398+
new ByteArrayInputStream(json.getBytes), "UTF-8"))) {
397399
parser => parseRow(parser, input)
398400
}
399401
} catch {

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
3939
|"fb:testid":"1234"}
4040
|""".stripMargin
4141

42+
/* invalid json with leading nulls would trigger java.io.CharConversionException
43+
in Jackson's JsonFactory.createParser(byte[]) due to RFC-4627 encoding detection */
44+
val badJson = "\0\0\0A\1AAA"
45+
4246
test("$.store.bicycle") {
4347
checkEvaluation(
4448
GetJsonObject(Literal(json), Literal("$.store.bicycle")),
@@ -224,6 +228,13 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
224228
null)
225229
}
226230

231+
test("SPARK-16548: character conversion") {
232+
checkEvaluation(
233+
GetJsonObject(Literal(badJson), Literal("$.a")),
234+
null
235+
)
236+
}
237+
227238
test("non foldable literal") {
228239
checkEvaluation(
229240
GetJsonObject(NonFoldableLiteral(json), NonFoldableLiteral("$.fb:testid")),
@@ -340,6 +351,12 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
340351
InternalRow(null, null, null, null, null))
341352
}
342353

354+
test("SPARK-16548: json_tuple - invalid json with leading nulls") {
355+
checkJsonTuple(
356+
JsonTuple(Literal(badJson) :: jsonTupleQuery),
357+
InternalRow(null, null, null, null, null))
358+
}
359+
343360
test("json_tuple - preserve newlines") {
344361
checkJsonTuple(
345362
JsonTuple(Literal("{\"a\":\"b\nc\"}") :: Literal("a") :: Nil),

0 commit comments

Comments
 (0)