Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1599,6 +1599,24 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
assert(df.persist.take(1).apply(0).toSeq(100).asInstanceOf[Long] == 100)
}

test("SPARK-17409: Do Not Optimize Query in CTAS (Data source tables) More Than Once") {
withTable("bar") {
withTempView("foo") {
withSQLConf(SQLConf.DEFAULT_DATA_SOURCE_NAME.key -> "json") {
sql("select 0 as id").createOrReplaceTempView("foo")
val df = sql("select * from foo group by id")
// If we optimize the query in CTAS more than once, the following saveAsTable will fail
// with the error: `GROUP BY position 0 is not in select list (valid range is [1, 1])`
df.write.mode("overwrite").saveAsTable("bar")
checkAnswer(spark.table("bar"), Row(0) :: Nil)
val tableMetadata = spark.sessionState.catalog.getTableMetadata(TableIdentifier("bar"))
assert(tableMetadata.provider == Some("json"),
"the expected table is a data source table using json")
}
}
}
}

test("copy results for sampling with replacement") {
val df = Seq((1, 0), (2, 0), (3, 0)).toDF("a", "b")
val sampleDf = df.sample(true, 2.00)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ class CreateTableAsSelectSuite
}
}

test("CTAS of decimal calculation") {
test("SPARK-17409: CTAS of decimal calculation") {
withTable("tab2") {
withTempView("tab1") {
spark.range(99, 101).createOrReplaceTempView("tab1")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@

package org.apache.spark.sql.hive

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.{QueryTest, Row}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.sql.types.{IntegerType, StructField, StructType}

class MetastoreRelationSuite extends SparkFunSuite {
class MetastoreRelationSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
test("makeCopy and toJSON should work") {
val table = CatalogTable(
identifier = TableIdentifier("test", Some("db")),
Expand All @@ -36,4 +38,18 @@ class MetastoreRelationSuite extends SparkFunSuite {
// No exception should be thrown
relation.toJSON
}

test("SPARK-17409: Do Not Optimize Query in CTAS (Hive Serde Table) More Than Once") {
withTable("bar") {
withTempView("foo") {
sql("select 0 as id").createOrReplaceTempView("foo")
// If we optimize the query in CTAS more than once, the following saveAsTable will fail
// with the error: `GROUP BY position 0 is not in select list (valid range is [1, 1])`
sql("CREATE TABLE bar AS SELECT * FROM foo group by id")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also put the comment of https://github.com/apache/spark/pull/15459/files#diff-5d2ebf4e9ca5a990136b276859769289R1626 at here? Also mention that this test is for hive format table?

checkAnswer(spark.table("bar"), Row(0) :: Nil)
val tableMetadata = spark.sessionState.catalog.getTableMetadata(TableIdentifier("bar"))
assert(tableMetadata.provider == Some("hive"), "the expected table is a Hive serde table")
}
}
}
}