[SPARK-25514][SQL] Generating pretty JSON by to_json

MaxGekk · daspalrahul · commit bafbd32e44ae · 2018-09-29T19:29:40.000+05:30
## What changes were proposed in this pull request? The PR introduces new JSON option `pretty` which allows to turn on `DefaultPrettyPrinter` of `Jackson`'s Json generator. New option is useful in exploring of deep nested columns and in converting of JSON columns in more readable representation (look at the added test). ## How was this patch tested? Added rount trip test which convert an JSON string to pretty representation via `from_json()` and `to_json()`. Closes apache#22534 from MaxGekk/pretty-json. Lead-authored-by: Maxim Gekk <maxim.gekk@databricks.com> Co-authored-by: Maxim Gekk <max.gekk@gmail.com> Signed-off-by: hyukjinkwon <gurwls223@apache.org>
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
@@ -198,8 +198,9 @@ NULL
 #'          }
 #' @param ... additional argument(s). In \code{to_json} and \code{from_json}, this contains
 #'            additional named properties to control how it is converted, accepts the same
-#'            options as the JSON data source.  In \code{arrays_zip}, this contains additional
-#'            Columns of arrays to be merged.
+#'            options as the JSON data source. Additionally \code{to_json} supports the "pretty"
+#'            option which enables pretty JSON generation. In \code{arrays_zip}, this contains
+#'            additional Columns of arrays to be merged.
 #' @name column_collection_functions
 #' @rdname column_collection_functions
 #' @family collection functions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
@@ -2295,7 +2295,9 @@ def to_json(col, options={}):
     into a JSON string. Throws an exception, in the case of an unsupported type.
 
     :param col: name of column containing a struct, an array or a map.
-    :param options: options to control converting. accepts the same options as the JSON datasource
+    :param options: options to control converting. accepts the same options as the JSON datasource.
+                    Additionally the function supports the `pretty` option which enables
+                    pretty JSON generation.
 
     >>> from pyspark.sql import Row
     >>> from pyspark.sql.types import *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -113,6 +113,11 @@ private[sql] class JSONOptions(
   }
   val lineSeparatorInWrite: String = lineSeparator.getOrElse("\n")
 
+  /**
+   * Generating JSON strings in pretty representation if the parameter is enabled.
+   */
+  val pretty: Boolean = parameters.get("pretty").map(_.toBoolean).getOrElse(false)
+
   /** Sets config options on a Jackson [[JsonFactory]]. */
   def setJacksonOptions(factory: JsonFactory): Unit = {
     factory.configure(JsonParser.Feature.ALLOW_COMMENTS, allowComments)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
@@ -70,7 +70,10 @@ private[sql] class JacksonGenerator(
       s"Initial type ${dataType.catalogString} must be a ${MapType.simpleString}")
   }
 
-  private val gen = new JsonFactory().createGenerator(writer).setRootValueSeparator(null)
+  private val gen = {
+    val generator = new JsonFactory().createGenerator(writer).setRootValueSeparator(null)
+    if (options.pretty) generator.useDefaultPrettyPrinter() else generator
+  }
 
   private val lineSeparator: String = options.lineSeparatorInWrite
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -3619,6 +3619,8 @@ object functions {
    * @param e a column containing a struct, an array or a map.
    * @param options options to control how the struct column is converted into a json string.
    *                accepts the same options and the json data source.
+   *                Additionally the function supports the `pretty` option which enables
+   *                pretty JSON generation.
    *
    * @group collection_funcs
    * @since 2.1.0
@@ -3635,6 +3637,8 @@ object functions {
    * @param e a column containing a struct, an array or a map.
    * @param options options to control how the struct column is converted into a json string.
    *                accepts the same options and the json data source.
+   *                Additionally the function supports the `pretty` option which enables
+   *                pretty JSON generation.
    *
    * @group collection_funcs
    * @since 2.1.0
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -518,4 +518,25 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
       jsonDF.select(to_json(from_json($"a", schema))),
       Seq(Row(json)))
   }
+
+  test("pretty print - roundtrip from_json -> to_json") {
+    val json = """[{"book":{"publisher":[{"country":"NL","year":[1981,1986,1999]}]}}]"""
+    val jsonDF = Seq(json).toDF("root")
+    val expected =
+      """[ {
+        |  "book" : {
+        |    "publisher" : [ {
+        |      "country" : "NL",
+        |      "year" : [ 1981, 1986, 1999 ]
+        |    } ]
+        |  }
+        |} ]""".stripMargin
+
+    checkAnswer(
+      jsonDF.select(
+        to_json(
+          from_json($"root", schema_of_json(lit(json))),
+          Map("pretty" -> "true"))),
+      Seq(Row(expected)))
+  }
 }

Original file line number	Diff line number	Diff line change
`@@ -70,7 +70,10 @@ private[sql] class JacksonGenerator(`
`70`	`70`	`s"Initial type ${dataType.catalogString} must be a ${MapType.simpleString}")`
`71`	`71`	`}`
`72`	`72`
`73`		`- private val gen = new JsonFactory().createGenerator(writer).setRootValueSeparator(null)`
	`73`	`+ private val gen = {`
	`74`	`+ val generator = new JsonFactory().createGenerator(writer).setRootValueSeparator(null)`
	`75`	`+ if (options.pretty) generator.useDefaultPrettyPrinter() else generator`
	`76`	`+ }`
`74`	`77`
`75`	`78`	`private val lineSeparator: String = options.lineSeparatorInWrite`
`76`	`79`