diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 572dee50127b8..6425c9d26bef3 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -198,8 +198,9 @@ NULL #' } #' @param ... additional argument(s). In \code{to_json} and \code{from_json}, this contains #' additional named properties to control how it is converted, accepts the same -#' options as the JSON data source. In \code{arrays_zip}, this contains additional -#' Columns of arrays to be merged. +#' options as the JSON data source. Additionally \code{to_json} supports the "pretty" +#' option which enables pretty JSON generation. In \code{arrays_zip}, this contains +#' additional Columns of arrays to be merged. #' @name column_collection_functions #' @rdname column_collection_functions #' @family collection functions diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 6da5237d18de4..1c3d9725b285b 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -2295,7 +2295,9 @@ def to_json(col, options={}): into a JSON string. Throws an exception, in the case of an unsupported type. :param col: name of column containing a struct, an array or a map. - :param options: options to control converting. accepts the same options as the JSON datasource + :param options: options to control converting. accepts the same options as the JSON datasource. + Additionally the function supports the `pretty` option which enables + pretty JSON generation. >>> from pyspark.sql import Row >>> from pyspark.sql.types import * diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala index 47eeb70e00427..64152e04928d2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala @@ -113,6 +113,11 @@ private[sql] class JSONOptions( } val lineSeparatorInWrite: String = lineSeparator.getOrElse("\n") + /** + * Generating JSON strings in pretty representation if the parameter is enabled. + */ + val pretty: Boolean = parameters.get("pretty").map(_.toBoolean).getOrElse(false) + /** Sets config options on a Jackson [[JsonFactory]]. */ def setJacksonOptions(factory: JsonFactory): Unit = { factory.configure(JsonParser.Feature.ALLOW_COMMENTS, allowComments) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala index 9b86d865622dc..d02a2be8ddad6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala @@ -70,7 +70,10 @@ private[sql] class JacksonGenerator( s"Initial type ${dataType.catalogString} must be a ${MapType.simpleString}") } - private val gen = new JsonFactory().createGenerator(writer).setRootValueSeparator(null) + private val gen = { + val generator = new JsonFactory().createGenerator(writer).setRootValueSeparator(null) + if (options.pretty) generator.useDefaultPrettyPrinter() else generator + } private val lineSeparator: String = options.lineSeparatorInWrite diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 10b67d7a1ca54..4c58e77df485e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -3619,6 +3619,8 @@ object functions { * @param e a column containing a struct, an array or a map. * @param options options to control how the struct column is converted into a json string. * accepts the same options and the json data source. + * Additionally the function supports the `pretty` option which enables + * pretty JSON generation. * * @group collection_funcs * @since 2.1.0 @@ -3635,6 +3637,8 @@ object functions { * @param e a column containing a struct, an array or a map. * @param options options to control how the struct column is converted into a json string. * accepts the same options and the json data source. + * Additionally the function supports the `pretty` option which enables + * pretty JSON generation. * * @group collection_funcs * @since 2.1.0 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index fe4bf15fa3921..853bc182f2f4a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -518,4 +518,25 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext { jsonDF.select(to_json(from_json($"a", schema))), Seq(Row(json))) } + + test("pretty print - roundtrip from_json -> to_json") { + val json = """[{"book":{"publisher":[{"country":"NL","year":[1981,1986,1999]}]}}]""" + val jsonDF = Seq(json).toDF("root") + val expected = + """[ { + | "book" : { + | "publisher" : [ { + | "country" : "NL", + | "year" : [ 1981, 1986, 1999 ] + | } ] + | } + |} ]""".stripMargin + + checkAnswer( + jsonDF.select( + to_json( + from_json($"root", schema_of_json(lit(json))), + Map("pretty" -> "true"))), + Seq(Row(expected))) + } }