Skip to content

Commit 149cd69

Browse files
gatorsmiledavies
authored andcommitted
[SPARK-12028] [SQL] get_json_object returns an incorrect result when the value is null literals
When calling `get_json_object` for the following two cases, both results are `"null"`: ```scala val tuple: Seq[(String, String)] = ("5", """{"f1": null}""") :: Nil val df: DataFrame = tuple.toDF("key", "jstring") val res = df.select(functions.get_json_object($"jstring", "$.f1")).collect() ``` ```scala val tuple2: Seq[(String, String)] = ("5", """{"f1": "null"}""") :: Nil val df2: DataFrame = tuple2.toDF("key", "jstring") val res3 = df2.select(functions.get_json_object($"jstring", "$.f1")).collect() ``` Fixed the problem and also added a test case. Author: gatorsmile <[email protected]> Closes #10018 from gatorsmile/get_json_object.
1 parent b992152 commit 149cd69

File tree

2 files changed

+25
-2
lines changed

2 files changed

+25
-2
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -298,8 +298,11 @@ case class GetJsonObject(json: Expression, path: Expression)
298298

299299
case (FIELD_NAME, Named(name) :: xs) if p.getCurrentName == name =>
300300
// exact field match
301-
p.nextToken()
302-
evaluatePath(p, g, style, xs)
301+
if (p.nextToken() != JsonToken.VALUE_NULL) {
302+
evaluatePath(p, g, style, xs)
303+
} else {
304+
false
305+
}
303306

304307
case (FIELD_NAME, Wildcard :: xs) =>
305308
// wildcard field match

sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,26 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
3939
("6", "[invalid JSON string]") ::
4040
Nil
4141

42+
test("function get_json_object - null") {
43+
val df: DataFrame = tuples.toDF("key", "jstring")
44+
val expected =
45+
Row("1", "value1", "value2", "3", null, "5.23") ::
46+
Row("2", "value12", "2", "value3", "4.01", null) ::
47+
Row("3", "value13", "2", "value33", "value44", "5.01") ::
48+
Row("4", null, null, null, null, null) ::
49+
Row("5", "", null, null, null, null) ::
50+
Row("6", null, null, null, null, null) ::
51+
Nil
52+
53+
checkAnswer(
54+
df.select($"key", functions.get_json_object($"jstring", "$.f1"),
55+
functions.get_json_object($"jstring", "$.f2"),
56+
functions.get_json_object($"jstring", "$.f3"),
57+
functions.get_json_object($"jstring", "$.f4"),
58+
functions.get_json_object($"jstring", "$.f5")),
59+
expected)
60+
}
61+
4262
test("json_tuple select") {
4363
val df: DataFrame = tuples.toDF("key", "jstring")
4464
val expected =

0 commit comments

Comments
 (0)