From 8d021e47e9a4e95ade99d617c77ef1e17245a796 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Fri, 17 Jun 2016 11:24:42 -0700 Subject: [PATCH 1/8] test cases --- .../datasources/parquet/ParquetOptions.scala | 2 +- .../execution/datasources/csv/CSVSuite.scala | 12 ++++++++++ .../datasources/json/JsonSuite.scala | 12 ++++++++++ .../datasources/parquet/ParquetIOSuite.scala | 23 +++++++++++++++++++ .../datasources/text/TextSuite.scala | 23 +++++++++++++++++++ .../spark/sql/hive/orc/OrcOptions.scala | 2 +- 6 files changed, 72 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala index dd2e915e7b7f9..34aafaf355e19 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala @@ -40,7 +40,7 @@ private[sql] class ParquetOptions( if (!shortParquetCompressionCodecNames.contains(codecName)) { val availableCodecs = shortParquetCompressionCodecNames.keys.map(_.toLowerCase) throw new IllegalArgumentException(s"Codec [$codecName] " + - s"is not available. Available codecs are ${availableCodecs.mkString(", ")}.") + s"is not available. Known codecs are ${availableCodecs.mkString(", ")}.") } shortParquetCompressionCodecNames(codecName).name() } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index f170065132acd..34b8a342dc671 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -342,6 +342,18 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { } } + test("save API - empty path or illegal path") { + var e = intercept[IllegalArgumentException] { + spark.range(1).coalesce(1).write.format("csv").save() + }.getMessage + assert(e.contains("'path' is not specified")) + + e = intercept[IllegalArgumentException] { + spark.range(1).coalesce(1).write.csv("") + }.getMessage + assert(e.contains("Can not create a Path from an empty string")) + } + test("save csv with quote") { withTempDir { dir => val csvDir = new File(dir, "csv").getCanonicalPath diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index 9f35c02d48762..24c955dfd2e09 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -1662,4 +1662,16 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData { assert(df.schema.size === 2) df.collect() } + + test("save API - empty path or illegal path") { + var e = intercept[IllegalArgumentException] { + spark.range(1).coalesce(1).write.format("json").save() + }.getMessage + assert(e.contains("'path' is not specified")) + + e = intercept[IllegalArgumentException] { + spark.range(1).coalesce(1).write.json("") + }.getMessage + assert(e.contains("Can not create a Path from an empty string")) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala index fc9ce6bb3041b..4eeddd4282a5e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala @@ -413,6 +413,29 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext { } } + test("save API - empty path or illegal path") { + var e = intercept[IllegalArgumentException] { + spark.range(1).coalesce(1).write.format("parquet").save() + }.getMessage + assert(e.contains("'path' is not specified")) + + e = intercept[IllegalArgumentException] { + spark.range(1).coalesce(1).write.parquet("") + }.getMessage + assert(e.contains("Can not create a Path from an empty string")) + } + + test("illegal compression") { + withTempDir { dir => + val path = dir.getCanonicalPath + val df = spark.range(0, 10) + val e = intercept[IllegalArgumentException] { + df.write.option("compression", "illegal").mode("overwrite").format("parquet").save(path) + }.getMessage + assert(e.contains("Codec [illegal] is not available. Known codecs are")) + } + } + test("SPARK-6315 regression test") { // Spark 1.1 and prior versions write Spark schema as case class string into Parquet metadata. // This has been deprecated by JSON format since 1.2. Notice that, 1.3 further refactored data diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala index 71d3da915840a..54bd58c307adb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala @@ -154,6 +154,29 @@ class TextSuite extends QueryTest with SharedSQLContext { } } + test("save API - empty path or illegal path") { + var e = intercept[IllegalArgumentException] { + spark.range(1).coalesce(1).write.format("text").save() + }.getMessage + assert(e.contains("'path' is not specified")) + + e = intercept[IllegalArgumentException] { + spark.range(1).coalesce(1).write.text("") + }.getMessage + assert(e.contains("Can not create a Path from an empty string")) + } + + test("illegal compression") { + withTempDir { dir => + val path = dir.getCanonicalPath + val df = spark.range(0, 10).selectExpr("CAST(id AS STRING) AS s") + val e = intercept[IllegalArgumentException] { + df.write.option("compression", "illegal").mode("overwrite").format("text").save(path) + }.getMessage + assert(e.contains("Codec [illegal] is not available. Known codecs are")) + } + } + private def testFile: String = { Thread.currentThread().getContextClassLoader.getResource("text-suite.txt").toString } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala index 91cf0dc960d58..32f709b3b667a 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala @@ -35,7 +35,7 @@ private[orc] class OrcOptions( if (!shortOrcCompressionCodecNames.contains(codecName)) { val availableCodecs = shortOrcCompressionCodecNames.keys.map(_.toLowerCase) throw new IllegalArgumentException(s"Codec [$codecName] " + - s"is not available. Available codecs are ${availableCodecs.mkString(", ")}.") + s"is not available. Known codecs are ${availableCodecs.mkString(", ")}.") } shortOrcCompressionCodecNames(codecName) } From 5e4a3c666dfb767215130df1a778e5f97d438c54 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Fri, 17 Jun 2016 12:58:56 -0700 Subject: [PATCH 2/8] add test cases. --- .../sql/execution/datasources/json/JsonSuite.scala | 11 +++++++++++ .../apache/spark/sql/hive/orc/OrcSourceSuite.scala | 4 ++++ 2 files changed, 15 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index 24c955dfd2e09..220e052cfa07c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -1674,4 +1674,15 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData { }.getMessage assert(e.contains("Can not create a Path from an empty string")) } + + test("illegal compression") { + withTempDir { dir => + val path = dir.getCanonicalPath + val df = spark.range(0, 10) + val e = intercept[IllegalArgumentException] { + df.write.option("compression", "illegal").mode("overwrite").format("json").save(path) + }.getMessage + assert(e.contains("Codec [illegal] is not available. Known codecs are")) + } + } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala index 871b9e02eb382..0720609a11a8b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala @@ -146,6 +146,10 @@ abstract class OrcSuite extends QueryTest with TestHiveSingleton with BeforeAndA sql("DROP TABLE IF EXISTS orcNullValues") } + + + + } class OrcSourceSuite extends OrcSuite { From 26437151ff0db4c0010510de047f81b1808890f4 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Fri, 17 Jun 2016 16:48:23 -0700 Subject: [PATCH 3/8] fix and test cases --- .../execution/datasources/DataSource.scala | 4 ++ .../execution/datasources/csv/CSVSuite.scala | 14 ++++++ .../datasources/json/JsonSuite.scala | 13 +++++ .../datasources/parquet/ParquetIOSuite.scala | 13 +++++ .../datasources/text/TextSuite.scala | 6 +++ .../sql/test/DataFrameReaderWriterSuite.scala | 17 +++++++ .../spark/sql/hive/orc/OrcSourceSuite.scala | 48 ++++++++++++++++++- 7 files changed, 113 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala index 7f3683fc98197..a4aa3bc70a5a4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala @@ -34,6 +34,7 @@ import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat import org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider import org.apache.spark.sql.execution.datasources.json.JsonFileFormat import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat +import org.apache.spark.sql.execution.datasources.text.TextFileFormat import org.apache.spark.sql.execution.streaming._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.sources._ @@ -376,6 +377,9 @@ case class DataSource( StructType(schema.filterNot(f => partitionColumns.exists(equality(_, f.name)))) }.orElse { + if (allPaths.isEmpty && !format.isInstanceOf[TextFileFormat]) { + throw new IllegalArgumentException(s"'path' is not specified") + } format.inferSchema( sparkSession, caseInsensitiveOptions, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index 34b8a342dc671..1f532f7e782bb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -354,6 +354,20 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { assert(e.contains("Can not create a Path from an empty string")) } + test("load API - empty path") { + val expectedErrorMsg = "'path' is not specified" + var e = intercept[IllegalArgumentException] { + spark.read.csv() + }.getMessage + assert(e.contains(expectedErrorMsg)) + + e = intercept[IllegalArgumentException] { + spark.read.format("csv").load() + }.getMessage + assert(e.contains(expectedErrorMsg)) + } + + test("save csv with quote") { withTempDir { dir => val csvDir = new File(dir, "csv").getCanonicalPath diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index 220e052cfa07c..65bac05219867 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -1675,6 +1675,19 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData { assert(e.contains("Can not create a Path from an empty string")) } + test("load API - empty path") { + val expectedErrorMsg = "'path' is not specified" + var e = intercept[IllegalArgumentException] { + spark.read.json() + }.getMessage + assert(e.contains(expectedErrorMsg)) + + e = intercept[IllegalArgumentException] { + spark.read.format("json").load() + }.getMessage + assert(e.contains(expectedErrorMsg)) + } + test("illegal compression") { withTempDir { dir => val path = dir.getCanonicalPath diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala index 4eeddd4282a5e..8eebd90ff5366 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala @@ -425,6 +425,19 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext { assert(e.contains("Can not create a Path from an empty string")) } + test("load API - empty path") { + val expectedErrorMsg = "'path' is not specified" + var e = intercept[IllegalArgumentException] { + spark.read.parquet() + }.getMessage + assert(e.contains(expectedErrorMsg)) + + e = intercept[IllegalArgumentException] { + spark.read.format("parquet").load() + }.getMessage + assert(e.contains(expectedErrorMsg)) + } + test("illegal compression") { withTempDir { dir => val path = dir.getCanonicalPath diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala index 54bd58c307adb..c71d0633d2bca 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala @@ -177,6 +177,12 @@ class TextSuite extends QueryTest with SharedSQLContext { } } + test("load API - empty path") { + val res = Seq.empty[String].toDF("value") + checkAnswer(spark.read.text(), res) + checkAnswer(spark.read.textFile().toDF(), res) + } + private def testFile: String = { Thread.currentThread().getContextClassLoader.getResource("text-suite.txt").toString } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala index 98e57b38044f2..4bb6c1c410f9b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala @@ -217,6 +217,23 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext { } } + test("illegal format name") { + val e = intercept[AnalysisException] { + spark.read.format("illegal").load("/test") + } + assert(e.getMessage.contains("Failed to find data source: illegal")) + } + + test("empty partitionBy") { + withTempDir { dir => + val path = dir.getCanonicalPath + val input = spark.range(10).toDF() + input.write.format("parquet").mode("overwrite").partitionBy().save(path) + val output = spark.read.parquet(path) + checkAnswer(input, output) + } + } + test("prevent all column partitioning") { withTempDir { dir => val path = dir.getCanonicalPath diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala index 0720609a11a8b..a70147e62cea3 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala @@ -21,15 +21,17 @@ import java.io.File import org.scalatest.BeforeAndAfterAll -import org.apache.spark.sql.{QueryTest, Row} +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.sources._ +import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types._ import org.apache.spark.util.Utils case class OrcData(intField: Int, stringField: String) -abstract class OrcSuite extends QueryTest with TestHiveSingleton with BeforeAndAfterAll { +abstract class OrcSuite extends QueryTest + with TestHiveSingleton with SQLTestUtils with BeforeAndAfterAll { import spark._ var orcTableDir: File = null @@ -147,9 +149,51 @@ abstract class OrcSuite extends QueryTest with TestHiveSingleton with BeforeAndA sql("DROP TABLE IF EXISTS orcNullValues") } + test("prevent all column partitioning") { + withTempDir { dir => + val path = dir.getCanonicalPath + val e = intercept[AnalysisException] { + spark.range(10).write.format("orc").mode("overwrite").partitionBy("id").save(path) + }.getMessage + assert(e.contains("Cannot use all columns for partition columns")) + } + } + test("save API - empty path or illegal path") { + var e = intercept[IllegalArgumentException] { + spark.range(1).coalesce(1).write.format("orc").save() + }.getMessage + assert(e.contains("'path' is not specified")) + e = intercept[IllegalArgumentException] { + spark.range(1).coalesce(1).write.orc("") + }.getMessage + assert(e.contains("Can not create a Path from an empty string")) + } + test("load API - empty path") { + val expectedErrorMsg = "'path' is not specified" + var e = intercept[IllegalArgumentException] { + spark.read.orc() + }.getMessage + assert(e.contains(expectedErrorMsg)) + + e = intercept[IllegalArgumentException] { + spark.read.format("orc").load().show() + }.getMessage + assert(e.contains(expectedErrorMsg)) + } + + test("illegal compression") { + withTempDir { dir => + val path = dir.getCanonicalPath + val df = spark.range(0, 10) + val e = intercept[IllegalArgumentException] { + df.write.option("compression", "illegal").mode("overwrite").format("orc").save(path) + }.getMessage + assert(e.contains("Codec [illegal] is not available. Known codecs are")) + } + } } class OrcSourceSuite extends OrcSuite { From 3007fe66d03a6a40dc530c13d44c27030118a8a4 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Tue, 21 Jun 2016 06:27:16 -0700 Subject: [PATCH 4/8] more test case --- .../sql/test/DataFrameReaderWriterSuite.scala | 37 ++++----------- .../spark/sql/hive/orc/OrcSourceSuite.scala | 47 ++++++++++++++++++- 2 files changed, 56 insertions(+), 28 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala index a1e90e29c78e7..7c9819d4e8be5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala @@ -212,7 +212,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be } test("illegal format name") { - val e = intercept[AnalysisException] { + val e = intercept[ClassNotFoundException] { spark.read.format("illegal").load("/test") } assert(e.getMessage.contains("Failed to find data source: illegal")) @@ -231,12 +231,10 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be test("prevent all column partitioning") { withTempDir { dir => val path = dir.getCanonicalPath - intercept[AnalysisException] { + val e = intercept[AnalysisException] { spark.range(10).write.format("parquet").mode("overwrite").partitionBy("id").save(path) - } - intercept[AnalysisException] { - spark.range(10).write.format("orc").mode("overwrite").partitionBy("id").save(path) - } + }.getMessage + assert(e.contains("Cannot use all columns for partition columns")) } } @@ -318,9 +316,10 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be val schema = df.schema // Reader, without user specified schema - intercept[AnalysisException] { + val e = intercept[IllegalArgumentException] { testRead(spark.read.json(), Seq.empty, schema) - } + }.getMessage + assert(e.contains("'path' is not specified")) testRead(spark.read.json(dir), data, schema) testRead(spark.read.json(dir, dir), data ++ data, schema) testRead(spark.read.json(Seq(dir, dir): _*), data ++ data, schema) @@ -344,9 +343,10 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be val schema = df.schema // Reader, without user specified schema - intercept[AnalysisException] { + val e = intercept[IllegalArgumentException] { testRead(spark.read.parquet(), Seq.empty, schema) - } + }.getMessage + assert(e.contains("'path' is not specified")) testRead(spark.read.parquet(dir), data, schema) testRead(spark.read.parquet(dir, dir), data ++ data, schema) testRead(spark.read.parquet(Seq(dir, dir): _*), data ++ data, schema) @@ -363,23 +363,6 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be spark.read.schema(userSchema).parquet(Seq(dir, dir): _*), expData ++ expData, userSchema) } - /** - * This only tests whether API compiles, but does not run it as orc() - * cannot be run without Hive classes. - */ - ignore("orc - API") { - // Reader, with user specified schema - // Refer to csv-specific test suites for behavior without user specified schema - spark.read.schema(userSchema).orc() - spark.read.schema(userSchema).orc(dir) - spark.read.schema(userSchema).orc(dir, dir, dir) - spark.read.schema(userSchema).orc(Seq(dir, dir): _*) - Option(dir).map(spark.read.schema(userSchema).orc) - - // Writer - spark.range(10).write.orc(dir) - } - private def testRead( df: => DataFrame, expectedResult: Seq[String], diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala index a70147e62cea3..e04b19995397a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala @@ -21,7 +21,7 @@ import java.io.File import org.scalatest.BeforeAndAfterAll -import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql._ import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.sources._ import org.apache.spark.sql.test.SQLTestUtils @@ -197,6 +197,8 @@ abstract class OrcSuite extends QueryTest } class OrcSourceSuite extends OrcSuite { + import spark.implicits._ + override def beforeAll(): Unit = { super.beforeAll() @@ -217,6 +219,49 @@ class OrcSourceSuite extends OrcSuite { """.stripMargin) } + test("orc - API") { + val userSchema = new StructType().add("s", StringType) + val data = Seq("1", "2", "3") + val dir = Utils.createTempDir(namePrefix = "input").getCanonicalPath + + // Writer + spark.createDataset(data).toDF("str").write.mode(SaveMode.Overwrite).orc(dir) + val df = spark.read.orc(dir) + checkAnswer(df, spark.createDataset(data).toDF()) + val schema = df.schema + + // Reader, without user specified schema + intercept[IllegalArgumentException] { + testRead(spark.read.orc(), Seq.empty, schema) + } + testRead(spark.read.orc(dir), data, schema) + testRead(spark.read.orc(dir, dir), data ++ data, schema) + testRead(spark.read.orc(Seq(dir, dir): _*), data ++ data, schema) + // Test explicit calls to single arg method - SPARK-16009 + testRead(Option(dir).map(spark.read.orc).get, data, schema) + + // Reader, with user specified schema, should just apply user schema on the file data + testRead(spark.read.schema(userSchema).orc(), Seq.empty, userSchema) + spark.read.schema(userSchema).orc(dir).printSchema() + + spark.read.schema(userSchema).orc(dir).explain(true) + + spark.read.schema(userSchema).orc().show() + spark.read.schema(userSchema).orc(dir).show() + val expData = Seq[String](null, null, null) + testRead(spark.read.schema(userSchema).orc(dir), expData, userSchema) + testRead(spark.read.schema(userSchema).orc(dir, dir), expData ++ expData, userSchema) + testRead(spark.read.schema(userSchema).orc(Seq(dir, dir): _*), expData ++ expData, userSchema) + + } + private def testRead( + df: => DataFrame, + expectedResult: Seq[String], + expectedSchema: StructType): Unit = { + checkAnswer(df, spark.createDataset(expectedResult).toDF()) + assert(df.schema === expectedSchema) + } + test("SPARK-12218 Converting conjunctions into ORC SearchArguments") { // The `LessThan` should be converted while the `StringContains` shouldn't val schema = new StructType( From a1ae7249322c17ea09be4e968535dc115b2acb64 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Tue, 21 Jun 2016 23:12:56 -0700 Subject: [PATCH 5/8] fix test case --- .../spark/sql/hive/orc/OrcSourceSuite.scala | 76 ++++++++++--------- 1 file changed, 39 insertions(+), 37 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala index e04b19995397a..e8d4dd4fa15ff 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala @@ -21,6 +21,7 @@ import java.io.File import org.scalatest.BeforeAndAfterAll +import org.apache.spark.SparkException import org.apache.spark.sql._ import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.sources._ @@ -32,7 +33,7 @@ case class OrcData(intField: Int, stringField: String) abstract class OrcSuite extends QueryTest with TestHiveSingleton with SQLTestUtils with BeforeAndAfterAll { - import spark._ + import spark.implicits._ var orcTableDir: File = null var orcTableAsDir: File = null @@ -194,30 +195,6 @@ abstract class OrcSuite extends QueryTest assert(e.contains("Codec [illegal] is not available. Known codecs are")) } } -} - -class OrcSourceSuite extends OrcSuite { - import spark.implicits._ - - override def beforeAll(): Unit = { - super.beforeAll() - - spark.sql( - s"""CREATE TEMPORARY TABLE normal_orc_source - |USING org.apache.spark.sql.hive.orc - |OPTIONS ( - | PATH '${new File(orcTableAsDir.getAbsolutePath).getCanonicalPath}' - |) - """.stripMargin) - - spark.sql( - s"""CREATE TEMPORARY TABLE normal_orc_as_source - |USING org.apache.spark.sql.hive.orc - |OPTIONS ( - | PATH '${new File(orcTableAsDir.getAbsolutePath).getCanonicalPath}' - |) - """.stripMargin) - } test("orc - API") { val userSchema = new StructType().add("s", StringType) @@ -240,20 +217,23 @@ class OrcSourceSuite extends OrcSuite { // Test explicit calls to single arg method - SPARK-16009 testRead(Option(dir).map(spark.read.orc).get, data, schema) - // Reader, with user specified schema, should just apply user schema on the file data + // Reader, with user specified schema, report an exception as schema in file different + // from user schema. testRead(spark.read.schema(userSchema).orc(), Seq.empty, userSchema) - spark.read.schema(userSchema).orc(dir).printSchema() - - spark.read.schema(userSchema).orc(dir).explain(true) - - spark.read.schema(userSchema).orc().show() - spark.read.schema(userSchema).orc(dir).show() - val expData = Seq[String](null, null, null) - testRead(spark.read.schema(userSchema).orc(dir), expData, userSchema) - testRead(spark.read.schema(userSchema).orc(dir, dir), expData ++ expData, userSchema) - testRead(spark.read.schema(userSchema).orc(Seq(dir, dir): _*), expData ++ expData, userSchema) - + var e = intercept[SparkException] { + testRead(spark.read.schema(userSchema).orc(dir), Seq.empty, userSchema) + }.getMessage + assert(e.contains("Field \"s\" does not exist")) + e = intercept[SparkException] { + testRead(spark.read.schema(userSchema).orc(dir, dir), Seq.empty, userSchema) + }.getMessage + assert(e.contains("Field \"s\" does not exist")) + e = intercept[SparkException] { + testRead(spark.read.schema(userSchema).orc(Seq(dir, dir): _*), Seq.empty, userSchema) + }.getMessage + assert(e.contains("Field \"s\" does not exist")) } + private def testRead( df: => DataFrame, expectedResult: Seq[String], @@ -261,6 +241,28 @@ class OrcSourceSuite extends OrcSuite { checkAnswer(df, spark.createDataset(expectedResult).toDF()) assert(df.schema === expectedSchema) } +} + +class OrcSourceSuite extends OrcSuite { + override def beforeAll(): Unit = { + super.beforeAll() + + spark.sql( + s"""CREATE TEMPORARY TABLE normal_orc_source + |USING org.apache.spark.sql.hive.orc + |OPTIONS ( + | PATH '${new File(orcTableAsDir.getAbsolutePath).getCanonicalPath}' + |) + """.stripMargin) + + spark.sql( + s"""CREATE TEMPORARY TABLE normal_orc_as_source + |USING org.apache.spark.sql.hive.orc + |OPTIONS ( + | PATH '${new File(orcTableAsDir.getAbsolutePath).getCanonicalPath}' + |) + """.stripMargin) + } test("SPARK-12218 Converting conjunctions into ORC SearchArguments") { // The `LessThan` should be converted while the `StringContains` shouldn't From 635046a10cc059a6ae8756fb7bc7167f5621255c Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Wed, 22 Jun 2016 09:04:51 -0700 Subject: [PATCH 6/8] fix test case --- .../sql/hive/MetastoreDataSourcesSuite.scala | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala index b028d49aff58d..5195cb754f399 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala @@ -537,18 +537,18 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv } test("path required error") { - assert( - intercept[AnalysisException] { - sparkSession.catalog.createExternalTable( - "createdJsonTable", - "org.apache.spark.sql.json", - Map.empty[String, String]) - - table("createdJsonTable") - }.getMessage.contains("Unable to infer schema"), - "We should complain that path is not specified.") + withTable("createdJsonTable") { + assert( + intercept[IllegalArgumentException] { + sparkSession.catalog.createExternalTable( + "createdJsonTable", + "org.apache.spark.sql.json", + Map.empty[String, String]) - sql("DROP TABLE IF EXISTS createdJsonTable") + table("createdJsonTable") + }.getMessage.contains("'path' is not specified"), + "We should complain that path is not specified.") + } } test("scan a parquet table created through a CTAS statement") { From b6bdf921eb947d615a346275477e69c1798d7217 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sat, 12 Nov 2016 10:34:56 -0800 Subject: [PATCH 7/8] fix nit --- .../org/apache/spark/sql/execution/datasources/DataSource.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala index e1ddaee7b12c4..878520ed9f252 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala @@ -324,7 +324,7 @@ case class DataSource( StructType(schema.filterNot(f => partitionColumns.exists(equality(_, f.name)))) }.orElse { if (allPaths.isEmpty && !format.isInstanceOf[TextFileFormat]) { - throw new IllegalArgumentException(s"'path' is not specified") + throw new IllegalArgumentException("'path' is not specified") } format.inferSchema( sparkSession, From 45110370fb1889f244a6750ef2a18dbc9f1ba9c2 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sat, 12 Nov 2016 21:00:36 -0800 Subject: [PATCH 8/8] fix test cases --- R/pkg/inst/tests/testthat/test_sparkSQL.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index ee48baa59c7af..6b76cc3e37bd5 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -2659,7 +2659,7 @@ test_that("Call DataFrameWriter.save() API in Java without path and check argume # It makes sure that we can omit path argument in write.df API and then it calls # DataFrameWriter.save() without path. expect_error(write.df(df, source = "csv"), - "Error in save : illegal argument - Expected exactly one path to be specified") + "Error in save : illegal argument - 'path' is not specified") expect_error(write.json(df, jsonPath), "Error in json : analysis error - path file:.*already exists") expect_error(write.text(df, jsonPath), @@ -2684,8 +2684,7 @@ test_that("Call DataFrameWriter.load() API in Java without path and check argume # It makes sure that we can omit path argument in read.df API and then it calls # DataFrameWriter.load() without path. expect_error(read.df(source = "json"), - paste("Error in loadDF : analysis error - Unable to infer schema for JSON at .", - "It must be specified manually")) + paste("Error in loadDF : illegal argument - 'path' is not specified")) expect_error(read.df("arbitrary_path"), "Error in loadDF : analysis error - Path does not exist") expect_error(read.json("arbitrary_path"), "Error in json : analysis error - Path does not exist") expect_error(read.text("arbitrary_path"), "Error in text : analysis error - Path does not exist")