From 739bbf0a2ebe5306e700e6d8b1971018b2709303 Mon Sep 17 00:00:00 2001 From: Branden Smith Date: Mon, 28 Jan 2019 13:04:22 +0000 Subject: [PATCH 1/2] add tests from #23665 --- .../src/test/resources/test-data/with-empty-line.json | 7 +++++++ .../sql/execution/datasources/json/JsonSuite.scala | 11 +++++++++++ 2 files changed, 18 insertions(+) create mode 100644 sql/core/src/test/resources/test-data/with-empty-line.json diff --git a/sql/core/src/test/resources/test-data/with-empty-line.json b/sql/core/src/test/resources/test-data/with-empty-line.json new file mode 100644 index 0000000000000..41573aa02a59e --- /dev/null +++ b/sql/core/src/test/resources/test-data/with-empty-line.json @@ -0,0 +1,7 @@ +{ "a" : 1 , "b" : 2 , "c" : 3 } + + { "a" : 4 , "b" : 5 , "c" : 6 } + +{ "a" : 7 , "b" : 8 , "c" : 9 } + + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index 49dd9c22e831b..16c776d77a3c6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -2426,6 +2426,17 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData { countForMalformedJSON(0, Seq("")) } + test("SPARK-26745: count() for non-multiline input with empty lines") { + val df = spark.read.json(testFile("test-data/with-empty-line.json")) + val withEmptyLineData = Row(1, 2, 3) :: Row(4, 5, 6) :: Row(7, 8, 9) :: Nil + // important to do this .count() first, prior to caching/persisting/computing/collecting, to + // test the non-parsed-count pathway + assert(df.count() === withEmptyLineData.length, + "JSON DataFrame unparsed-count should exclude whitespace-only lines") + df.createOrReplaceTempView("jsonWithEmptyLineTable") + checkAnswer(sql("select * from jsonWithEmptyLineTable"), withEmptyLineData) + } + test("SPARK-25040: empty strings should be disallowed") { def failedOnEmptyString(dataType: DataType): Unit = { val df = spark.read.schema(s"a ${dataType.catalogString}") From 63ab5bd53bb81a16f6a23f7d76f091feb36e5031 Mon Sep 17 00:00:00 2001 From: Branden Smith Date: Wed, 6 Feb 2019 01:43:51 +0000 Subject: [PATCH 2/2] internalize testdata for JsonSuite SPARK-26745 test --- .../resources/test-data/with-empty-line.json | 7 ------- .../execution/datasources/json/JsonSuite.scala | 17 +++++++++-------- 2 files changed, 9 insertions(+), 15 deletions(-) delete mode 100644 sql/core/src/test/resources/test-data/with-empty-line.json diff --git a/sql/core/src/test/resources/test-data/with-empty-line.json b/sql/core/src/test/resources/test-data/with-empty-line.json deleted file mode 100644 index 41573aa02a59e..0000000000000 --- a/sql/core/src/test/resources/test-data/with-empty-line.json +++ /dev/null @@ -1,7 +0,0 @@ -{ "a" : 1 , "b" : 2 , "c" : 3 } - - { "a" : 4 , "b" : 5 , "c" : 6 } - -{ "a" : 7 , "b" : 8 , "c" : 9 } - - diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index 16c776d77a3c6..69761775e3d58 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -2427,14 +2427,15 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData { } test("SPARK-26745: count() for non-multiline input with empty lines") { - val df = spark.read.json(testFile("test-data/with-empty-line.json")) - val withEmptyLineData = Row(1, 2, 3) :: Row(4, 5, 6) :: Row(7, 8, 9) :: Nil - // important to do this .count() first, prior to caching/persisting/computing/collecting, to - // test the non-parsed-count pathway - assert(df.count() === withEmptyLineData.length, - "JSON DataFrame unparsed-count should exclude whitespace-only lines") - df.createOrReplaceTempView("jsonWithEmptyLineTable") - checkAnswer(sql("select * from jsonWithEmptyLineTable"), withEmptyLineData) + withTempPath { tempPath => + val path = tempPath.getCanonicalPath + Seq("""{ "a" : 1 }""", "", """ { "a" : 2 }""", " \t ") + .toDS() + .repartition(1) + .write + .text(path) + assert(spark.read.json(path).count() === 2) + } } test("SPARK-25040: empty strings should be disallowed") {