Skip to content

Commit bafbd32

Browse files
MaxGekkdaspalrahul
authored andcommitted
[SPARK-25514][SQL] Generating pretty JSON by to_json
## What changes were proposed in this pull request? The PR introduces new JSON option `pretty` which allows to turn on `DefaultPrettyPrinter` of `Jackson`'s Json generator. New option is useful in exploring of deep nested columns and in converting of JSON columns in more readable representation (look at the added test). ## How was this patch tested? Added rount trip test which convert an JSON string to pretty representation via `from_json()` and `to_json()`. Closes apache#22534 from MaxGekk/pretty-json. Lead-authored-by: Maxim Gekk <[email protected]> Co-authored-by: Maxim Gekk <[email protected]> Signed-off-by: hyukjinkwon <[email protected]>
1 parent 315754c commit bafbd32

File tree

6 files changed

+40
-4
lines changed

6 files changed

+40
-4
lines changed

R/pkg/R/functions.R

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,9 @@ NULL
198198
#' }
199199
#' @param ... additional argument(s). In \code{to_json} and \code{from_json}, this contains
200200
#' additional named properties to control how it is converted, accepts the same
201-
#' options as the JSON data source. In \code{arrays_zip}, this contains additional
202-
#' Columns of arrays to be merged.
201+
#' options as the JSON data source. Additionally \code{to_json} supports the "pretty"
202+
#' option which enables pretty JSON generation. In \code{arrays_zip}, this contains
203+
#' additional Columns of arrays to be merged.
203204
#' @name column_collection_functions
204205
#' @rdname column_collection_functions
205206
#' @family collection functions

python/pyspark/sql/functions.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2295,7 +2295,9 @@ def to_json(col, options={}):
22952295
into a JSON string. Throws an exception, in the case of an unsupported type.
22962296
22972297
:param col: name of column containing a struct, an array or a map.
2298-
:param options: options to control converting. accepts the same options as the JSON datasource
2298+
:param options: options to control converting. accepts the same options as the JSON datasource.
2299+
Additionally the function supports the `pretty` option which enables
2300+
pretty JSON generation.
22992301
23002302
>>> from pyspark.sql import Row
23012303
>>> from pyspark.sql.types import *

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,11 @@ private[sql] class JSONOptions(
113113
}
114114
val lineSeparatorInWrite: String = lineSeparator.getOrElse("\n")
115115

116+
/**
117+
* Generating JSON strings in pretty representation if the parameter is enabled.
118+
*/
119+
val pretty: Boolean = parameters.get("pretty").map(_.toBoolean).getOrElse(false)
120+
116121
/** Sets config options on a Jackson [[JsonFactory]]. */
117122
def setJacksonOptions(factory: JsonFactory): Unit = {
118123
factory.configure(JsonParser.Feature.ALLOW_COMMENTS, allowComments)

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,10 @@ private[sql] class JacksonGenerator(
7070
s"Initial type ${dataType.catalogString} must be a ${MapType.simpleString}")
7171
}
7272

73-
private val gen = new JsonFactory().createGenerator(writer).setRootValueSeparator(null)
73+
private val gen = {
74+
val generator = new JsonFactory().createGenerator(writer).setRootValueSeparator(null)
75+
if (options.pretty) generator.useDefaultPrettyPrinter() else generator
76+
}
7477

7578
private val lineSeparator: String = options.lineSeparatorInWrite
7679

sql/core/src/main/scala/org/apache/spark/sql/functions.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3619,6 +3619,8 @@ object functions {
36193619
* @param e a column containing a struct, an array or a map.
36203620
* @param options options to control how the struct column is converted into a json string.
36213621
* accepts the same options and the json data source.
3622+
* Additionally the function supports the `pretty` option which enables
3623+
* pretty JSON generation.
36223624
*
36233625
* @group collection_funcs
36243626
* @since 2.1.0
@@ -3635,6 +3637,8 @@ object functions {
36353637
* @param e a column containing a struct, an array or a map.
36363638
* @param options options to control how the struct column is converted into a json string.
36373639
* accepts the same options and the json data source.
3640+
* Additionally the function supports the `pretty` option which enables
3641+
* pretty JSON generation.
36383642
*
36393643
* @group collection_funcs
36403644
* @since 2.1.0

sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,4 +518,25 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
518518
jsonDF.select(to_json(from_json($"a", schema))),
519519
Seq(Row(json)))
520520
}
521+
522+
test("pretty print - roundtrip from_json -> to_json") {
523+
val json = """[{"book":{"publisher":[{"country":"NL","year":[1981,1986,1999]}]}}]"""
524+
val jsonDF = Seq(json).toDF("root")
525+
val expected =
526+
"""[ {
527+
| "book" : {
528+
| "publisher" : [ {
529+
| "country" : "NL",
530+
| "year" : [ 1981, 1986, 1999 ]
531+
| } ]
532+
| }
533+
|} ]""".stripMargin
534+
535+
checkAnswer(
536+
jsonDF.select(
537+
to_json(
538+
from_json($"root", schema_of_json(lit(json))),
539+
Map("pretty" -> "true"))),
540+
Seq(Row(expected)))
541+
}
521542
}

0 commit comments

Comments
 (0)