From 083fe0f4d57436b0bc939d40a7c9715e21b4b18f Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Sun, 22 May 2016 21:31:57 +0900 Subject: [PATCH 1/3] Add tests for writing and reading back empty data for Parquet, Json and Text data sources --- .../sql/sources/JsonHadoopFsRelationSuite.scala | 15 +++++++++++++++ .../sources/ParquetHadoopFsRelationSuite.scala | 16 ++++++++++++++++ .../SimpleTextHadoopFsRelationSuite.scala | 16 ++++++++++++++++ 3 files changed, 47 insertions(+) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala index ef37787137d0..d1eee4f115d5 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala @@ -108,4 +108,19 @@ class JsonHadoopFsRelationSuite extends HadoopFsRelationTest { ) } } + + test("Write and read back empty data") { + withTempPath { path => + val emptyDf = spark.range(10).limit(0).toDF() + emptyDf.write + .format(dataSourceName) + .save(path.getCanonicalPath) + + val copyEmptyDf = spark.read + .format(dataSourceName) + .load(path.getCanonicalPath) + + assert(copyEmptyDf.count() === 0) + } + } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala index 4b4852c1d793..a34c26db81fa 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala @@ -228,4 +228,20 @@ class ParquetHadoopFsRelationSuite extends HadoopFsRelationTest { } } } + + + test("Write and read back empty data") { + withTempPath { path => + val emptyDf = spark.range(10).limit(0).toDF() + emptyDf.write + .format(dataSourceName) + .save(path.getCanonicalPath) + + val copyEmptyDf = spark.read + .format(dataSourceName) + .load(path.getCanonicalPath) + + checkAnswer(emptyDf, copyEmptyDf) + } + } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala index fa64c7dcfab6..999099e085c8 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala @@ -88,4 +88,20 @@ class SimpleTextHadoopFsRelationSuite extends HadoopFsRelationTest with Predicat assert(SimpleTextRelation.lastHadoopConf.get.get("some-random-read-option") == "hahah-READ") } } + + test("Write and read back empty data") { + withTempPath { path => + val emptyDf = spark.range(10).limit(0).toDF() + emptyDf.write + .format(dataSourceName) + .save(path.getCanonicalPath) + + val copyEmptyDf = spark.read + .format(dataSourceName) + .option("dataSchema", emptyDf.schema.json) + .load(path.getCanonicalPath) + + checkAnswer(emptyDf, copyEmptyDf) + } + } } From d45009467f09b2853f0c0e0e4963386ae42c60a9 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Sun, 22 May 2016 21:36:02 +0900 Subject: [PATCH 2/3] Remove extra newline between tests --- .../apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala index a34c26db81fa..cbb2ed38ea45 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala @@ -229,7 +229,6 @@ class ParquetHadoopFsRelationSuite extends HadoopFsRelationTest { } } - test("Write and read back empty data") { withTempPath { path => val emptyDf = spark.range(10).limit(0).toDF() From c51fbe3243fe4c4c6929d7e462f760d708cf43b4 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Tue, 24 May 2016 17:52:30 +0900 Subject: [PATCH 3/3] Move `text` test to `TextSuite` which was in wrong location. --- .../execution/datasources/text/TextSuite.scala | 16 ++++++++++++++++ .../SimpleTextHadoopFsRelationSuite.scala | 16 ---------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala index b5e51e963f1b..ec0ee21bc3bb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala @@ -126,6 +126,22 @@ class TextSuite extends QueryTest with SharedSQLContext { } } + test("Write and read back empty data") { + withTempPath { path => + val df = spark.read.format("text").load(testFile) + val emptyDf = df.limit(0) + emptyDf.write + .format("text") + .save(path.getCanonicalPath) + + val copyEmptyDf = spark.read + .format("text") + .load(path.getCanonicalPath) + + checkAnswer(emptyDf, copyEmptyDf) + } + } + private def testFile: String = { Thread.currentThread().getContextClassLoader.getResource("text-suite.txt").toString } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala index 999099e085c8..fa64c7dcfab6 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala @@ -88,20 +88,4 @@ class SimpleTextHadoopFsRelationSuite extends HadoopFsRelationTest with Predicat assert(SimpleTextRelation.lastHadoopConf.get.get("some-random-read-option") == "hahah-READ") } } - - test("Write and read back empty data") { - withTempPath { path => - val emptyDf = spark.range(10).limit(0).toDF() - emptyDf.write - .format(dataSourceName) - .save(path.getCanonicalPath) - - val copyEmptyDf = spark.read - .format(dataSourceName) - .option("dataSchema", emptyDf.schema.json) - .load(path.getCanonicalPath) - - checkAnswer(emptyDf, copyEmptyDf) - } - } }