From 826e8c3dd5b7e54ebdceee74f30798e1b01bcaed Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sun, 23 Sep 2018 20:49:58 +0200 Subject: [PATCH 1/4] Added a round trip tests - from_json and to_json --- .../scala/org/apache/spark/sql/JsonFunctionsSuite.scala | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index fe4bf15fa3921..9b484f520fbd5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -518,4 +518,13 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext { jsonDF.select(to_json(from_json($"a", schema))), Seq(Row(json))) } + + test("pretty print - roundtrip from_json -> to_json") { + val json = """[{"book":{"publisher":[{"country":"NL","year":[1981,1986,1999]}]}}]""" + val jsonDF = Seq(json).toDF("root") + + checkAnswer( + jsonDF.select(to_json(from_json($"root", schema_of_json(lit(json))))), + Seq(Row(json))) + } } From 051c8fd47741637fc9ace6afc059b4b1d18471f5 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sun, 23 Sep 2018 21:02:05 +0200 Subject: [PATCH 2/4] Support the pretty option --- .../spark/sql/catalyst/json/JSONOptions.scala | 5 +++++ .../sql/catalyst/json/JacksonGenerator.scala | 5 ++++- .../apache/spark/sql/JsonFunctionsSuite.scala | 16 ++++++++++++++-- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala index 47eeb70e00427..694c0f1f34396 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala @@ -113,6 +113,11 @@ private[sql] class JSONOptions( } val lineSeparatorInWrite: String = lineSeparator.getOrElse("\n") + /** + * Generating JSON strings in pretty representation if the parameter enabled. + */ + val pretty: Boolean = parameters.get("pretty").map(_.toBoolean).getOrElse(false) + /** Sets config options on a Jackson [[JsonFactory]]. */ def setJacksonOptions(factory: JsonFactory): Unit = { factory.configure(JsonParser.Feature.ALLOW_COMMENTS, allowComments) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala index 9b86d865622dc..d02a2be8ddad6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala @@ -70,7 +70,10 @@ private[sql] class JacksonGenerator( s"Initial type ${dataType.catalogString} must be a ${MapType.simpleString}") } - private val gen = new JsonFactory().createGenerator(writer).setRootValueSeparator(null) + private val gen = { + val generator = new JsonFactory().createGenerator(writer).setRootValueSeparator(null) + if (options.pretty) generator.useDefaultPrettyPrinter() else generator + } private val lineSeparator: String = options.lineSeparatorInWrite diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index 9b484f520fbd5..853bc182f2f4a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -522,9 +522,21 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext { test("pretty print - roundtrip from_json -> to_json") { val json = """[{"book":{"publisher":[{"country":"NL","year":[1981,1986,1999]}]}}]""" val jsonDF = Seq(json).toDF("root") + val expected = + """[ { + | "book" : { + | "publisher" : [ { + | "country" : "NL", + | "year" : [ 1981, 1986, 1999 ] + | } ] + | } + |} ]""".stripMargin checkAnswer( - jsonDF.select(to_json(from_json($"root", schema_of_json(lit(json))))), - Seq(Row(json))) + jsonDF.select( + to_json( + from_json($"root", schema_of_json(lit(json))), + Map("pretty" -> "true"))), + Seq(Row(expected))) } } From f2d7b7e72b8dbba4043cce7d99ae63870479269c Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 24 Sep 2018 20:04:50 +0200 Subject: [PATCH 3/4] Description of the pretty option --- R/pkg/R/functions.R | 5 +++-- python/pyspark/sql/functions.py | 4 +++- sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 4 ++++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 572dee50127b8..6425c9d26bef3 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -198,8 +198,9 @@ NULL #' } #' @param ... additional argument(s). In \code{to_json} and \code{from_json}, this contains #' additional named properties to control how it is converted, accepts the same -#' options as the JSON data source. In \code{arrays_zip}, this contains additional -#' Columns of arrays to be merged. +#' options as the JSON data source. Additionally \code{to_json} supports the "pretty" +#' option which enables pretty JSON generation. In \code{arrays_zip}, this contains +#' additional Columns of arrays to be merged. #' @name column_collection_functions #' @rdname column_collection_functions #' @family collection functions diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 6da5237d18de4..1c3d9725b285b 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -2295,7 +2295,9 @@ def to_json(col, options={}): into a JSON string. Throws an exception, in the case of an unsupported type. :param col: name of column containing a struct, an array or a map. - :param options: options to control converting. accepts the same options as the JSON datasource + :param options: options to control converting. accepts the same options as the JSON datasource. + Additionally the function supports the `pretty` option which enables + pretty JSON generation. >>> from pyspark.sql import Row >>> from pyspark.sql.types import * diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 10b67d7a1ca54..4c58e77df485e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -3619,6 +3619,8 @@ object functions { * @param e a column containing a struct, an array or a map. * @param options options to control how the struct column is converted into a json string. * accepts the same options and the json data source. + * Additionally the function supports the `pretty` option which enables + * pretty JSON generation. * * @group collection_funcs * @since 2.1.0 @@ -3635,6 +3637,8 @@ object functions { * @param e a column containing a struct, an array or a map. * @param options options to control how the struct column is converted into a json string. * accepts the same options and the json data source. + * Additionally the function supports the `pretty` option which enables + * pretty JSON generation. * * @group collection_funcs * @since 2.1.0 From 80bb0a180173f0f84ac2f1638b067c82f3c96a25 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 25 Sep 2018 11:02:54 +0200 Subject: [PATCH 4/4] Update JSONOptions.scala Fix a typo --- .../scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala index 694c0f1f34396..64152e04928d2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala @@ -114,7 +114,7 @@ private[sql] class JSONOptions( val lineSeparatorInWrite: String = lineSeparator.getOrElse("\n") /** - * Generating JSON strings in pretty representation if the parameter enabled. + * Generating JSON strings in pretty representation if the parameter is enabled. */ val pretty: Boolean = parameters.get("pretty").map(_.toBoolean).getOrElse(false)