From 6898f4a9a317a4116c426b4c7e53e4eb4fe3bf60 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Thu, 13 Oct 2016 00:51:01 -0700 Subject: [PATCH 1/2] Added test cases. --- .../org/apache/spark/sql/DataFrameSuite.scala | 18 ++++++++++++++++++ .../sql/sources/CreateTableAsSelectSuite.scala | 2 +- .../sql/hive/MetastoreRelationSuite.scala | 18 ++++++++++++++++-- 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 16cc368208485..4f5328d6c5fc3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -1615,4 +1615,22 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { qe.assertAnalyzed() } } + + test("SPARK-17409: Do Not Optimize Query in CTAS More Than Once") { + withTable("bar") { + withTempView("foo") { + withSQLConf(SQLConf.DEFAULT_DATA_SOURCE_NAME.key -> "json") { + sql("select 0 as id").createOrReplaceTempView("foo") + val df = sql("select * from foo group by id") + // If we optimize the query in CTAS more than once, the following saveAsTable will fail + // with the error: `GROUP BY position 0 is not in select list (valid range is [1, 1])` + df.write.mode("overwrite").saveAsTable("bar") + checkAnswer(spark.table("bar"), Row(0) :: Nil) + val tableMetadata = spark.sessionState.catalog.getTableMetadata(TableIdentifier("bar")) + assert(tableMetadata.provider == Some("json"), + "the expected table is a data source table using json") + } + } + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala index c39005f6a1063..5cc9467395adc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala @@ -238,7 +238,7 @@ class CreateTableAsSelectSuite } } - test("CTAS of decimal calculation") { + test("SPARK-17409: CTAS of decimal calculation") { withTable("tab2") { withTempView("tab1") { spark.range(99, 101).createOrReplaceTempView("tab1") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreRelationSuite.scala index 2f3055dcac4c5..f07df3ca27d7a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreRelationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreRelationSuite.scala @@ -17,12 +17,14 @@ package org.apache.spark.sql.hive -import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.{QueryTest, Row} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} +import org.apache.spark.sql.hive.test.TestHiveSingleton +import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.{IntegerType, StructField, StructType} -class MetastoreRelationSuite extends SparkFunSuite { +class MetastoreRelationSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { test("makeCopy and toJSON should work") { val table = CatalogTable( identifier = TableIdentifier("test", Some("db")), @@ -36,4 +38,16 @@ class MetastoreRelationSuite extends SparkFunSuite { // No exception should be thrown relation.toJSON } + + test("SPARK-17409: Do Not Optimize Query in CTAS More Than Once") { + withTable("bar") { + withTempView("foo") { + sql("select 0 as id").createOrReplaceTempView("foo") + sql("CREATE TABLE bar AS SELECT * FROM foo group by id") + checkAnswer(spark.table("bar"), Row(0) :: Nil) + val tableMetadata = spark.sessionState.catalog.getTableMetadata(TableIdentifier("bar")) + assert(tableMetadata.provider == Some("hive"), "the expected table is a Hive serde table") + } + } + } } From 765d104ea1471343cabc426ba974d3a035ac8f62 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Fri, 14 Oct 2016 14:50:52 -0700 Subject: [PATCH 2/2] address comments --- .../src/test/scala/org/apache/spark/sql/DataFrameSuite.scala | 2 +- .../org/apache/spark/sql/hive/MetastoreRelationSuite.scala | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 4f5328d6c5fc3..2f6b8b7a2a408 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -1616,7 +1616,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { } } - test("SPARK-17409: Do Not Optimize Query in CTAS More Than Once") { + test("SPARK-17409: Do Not Optimize Query in CTAS (Data source tables) More Than Once") { withTable("bar") { withTempView("foo") { withSQLConf(SQLConf.DEFAULT_DATA_SOURCE_NAME.key -> "json") { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreRelationSuite.scala index f07df3ca27d7a..bf11df25888c1 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreRelationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreRelationSuite.scala @@ -39,10 +39,12 @@ class MetastoreRelationSuite extends QueryTest with SQLTestUtils with TestHiveSi relation.toJSON } - test("SPARK-17409: Do Not Optimize Query in CTAS More Than Once") { + test("SPARK-17409: Do Not Optimize Query in CTAS (Hive Serde Table) More Than Once") { withTable("bar") { withTempView("foo") { sql("select 0 as id").createOrReplaceTempView("foo") + // If we optimize the query in CTAS more than once, the following saveAsTable will fail + // with the error: `GROUP BY position 0 is not in select list (valid range is [1, 1])` sql("CREATE TABLE bar AS SELECT * FROM foo group by id") checkAnswer(spark.table("bar"), Row(0) :: Nil) val tableMetadata = spark.sessionState.catalog.getTableMetadata(TableIdentifier("bar"))