-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-16036][SPARK-16037][SPARK-16034][SQL] Follow up code clean up and improvement #13766
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -435,7 +435,7 @@ case class DataSource( | |
| // If we are appending to a table that already exists, make sure the partitioning matches | ||
| // up. If we fail to load the table for whatever reason, ignore the check. | ||
| if (mode == SaveMode.Append) { | ||
| val existingColumns = Try { | ||
| val existingPartitionColumns = Try { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rename the variable since it is for partition columns not all columns. |
||
| resolveRelation() | ||
| .asInstanceOf[HadoopFsRelation] | ||
| .location | ||
|
|
@@ -444,13 +444,14 @@ case class DataSource( | |
| .fieldNames | ||
| .toSeq | ||
| }.getOrElse(Seq.empty[String]) | ||
| // TODO: Case sensitivity. | ||
| val sameColumns = | ||
| existingColumns.map(_.toLowerCase) == partitionColumns.map(_.toLowerCase) | ||
| if (existingColumns.size > 0 && !sameColumns) { | ||
| existingPartitionColumns.map(_.toLowerCase()) == partitionColumns.map(_.toLowerCase()) | ||
| if (existingPartitionColumns.size > 0 && !sameColumns) { | ||
| throw new AnalysisException( | ||
| s"""Requested partitioning does not match existing partitioning. | ||
| |Existing partitioning columns: | ||
| | ${existingColumns.mkString(", ")} | ||
| | ${existingPartitionColumns.mkString(", ")} | ||
| |Requested partitioning columns: | ||
| | ${partitionColumns.mkString(", ")} | ||
| |""".stripMargin) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -67,7 +67,7 @@ private[sql] class ResolveDataSource(sparkSession: SparkSession) extends Rule[Lo | |
| * table. It also does data type casting and field renaming, to make sure that the columns to be | ||
| * inserted have the correct data type and fields have the correct names. | ||
| */ | ||
| private[sql] object PreprocessTableInsertion extends Rule[LogicalPlan] { | ||
| private[sql] case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] { | ||
| private def preprocess( | ||
| insert: InsertIntoTable, | ||
| tblName: String, | ||
|
|
@@ -84,7 +84,13 @@ private[sql] object PreprocessTableInsertion extends Rule[LogicalPlan] { | |
| if (insert.partition.nonEmpty) { | ||
| // the query's partitioning must match the table's partitioning | ||
| // this is set for queries like: insert into ... partition (one = "a", two = <expr>) | ||
| if (insert.partition.keySet != partColNames.toSet) { | ||
| val samePartitionColumns = | ||
| if (conf.caseSensitiveAnalysis) { | ||
| insert.partition.keySet == partColNames.toSet | ||
| } else { | ||
| insert.partition.keySet.map(_.toLowerCase) == partColNames.map(_.toLowerCase).toSet | ||
| } | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is just a best effort to respect case sensitivity setting.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Partitioning columns are special since we use them to create directories in the file system. Once directories are created, case sensitivity setting will not affect anything.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, since the parser already turn column names in partition to their lower case form, this change does not really affect anything when we use case insensitive resolution. |
||
| if (!samePartitionColumns) { | ||
| throw new AnalysisException( | ||
| s""" | ||
| |Requested partitioning does not match the table $tblName: | ||
|
|
@@ -94,7 +100,8 @@ private[sql] object PreprocessTableInsertion extends Rule[LogicalPlan] { | |
| } | ||
| expectedColumns.map(castAndRenameChildOutput(insert, _)).getOrElse(insert) | ||
| } else { | ||
| // All partition columns are dynamic because this InsertIntoTable had no partitioning | ||
| // All partition columns are dynamic because because the InsertIntoTable command does | ||
| // not explicitly specify partitioning columns. | ||
| expectedColumns.map(castAndRenameChildOutput(insert, _)).getOrElse(insert) | ||
| .copy(partition = partColNames.map(_ -> None).toMap) | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -372,4 +372,24 @@ class InsertIntoHiveTableSuite extends QueryTest with TestHiveSingleton with Bef | |
| assert(!logical.resolved, "Should not resolve: missing partition data") | ||
| } | ||
| } | ||
|
|
||
| testPartitionedTable( | ||
| "SPARK-16036: better error message when insert into a table with mismatch schema") { | ||
| tableName => | ||
| val e = intercept[AnalysisException] { | ||
| sql(s"INSERT INTO TABLE $tableName PARTITION(b=1, c=2) SELECT 1, 2, 3") | ||
| } | ||
| assert(e.message.contains("the number of columns are different")) | ||
| } | ||
|
|
||
| testPartitionedTable( | ||
| "SPARK-16037: INSERT statement should match columns by position") { | ||
| tableName => | ||
| withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") { | ||
| sql(s"INSERT INTO TABLE $tableName SELECT 1, 2 AS c, 3 AS b") | ||
| checkAnswer(sql(s"SELECT a, b, c FROM $tableName"), Row(1, 2, 3)) | ||
| sql(s"INSERT OVERWRITE TABLE $tableName SELECT 1, 2, 3") | ||
| checkAnswer(sql(s"SELECT a, b, c FROM $tableName"), Row(1, 2, 3)) | ||
| } | ||
| } | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1033,6 +1033,41 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { | |
| sql("SELECT * FROM boom").queryExecution.analyzed | ||
| } | ||
|
|
||
| test("SPARK-3810: PreprocessTableInsertion static partitioning support") { | ||
| val analyzedPlan = { | ||
| loadTestTable("srcpart") | ||
| sql("DROP TABLE IF EXISTS withparts") | ||
| sql("CREATE TABLE withparts LIKE srcpart") | ||
| sql("INSERT INTO TABLE withparts PARTITION(ds='1', hr='2') SELECT key, value FROM src") | ||
| .queryExecution.analyzed | ||
| } | ||
|
|
||
| assertResult(1, "Duplicated project detected\n" + analyzedPlan) { | ||
| analyzedPlan.collect { | ||
| case _: Project => () | ||
| }.size | ||
| } | ||
| } | ||
|
|
||
| test("SPARK-3810: PreprocessTableInsertion dynamic partitioning support") { | ||
| val analyzedPlan = { | ||
| loadTestTable("srcpart") | ||
| sql("DROP TABLE IF EXISTS withparts") | ||
| sql("CREATE TABLE withparts LIKE srcpart") | ||
| sql("SET hive.exec.dynamic.partition.mode=nonstrict") | ||
|
|
||
| sql("CREATE TABLE IF NOT EXISTS withparts LIKE srcpart") | ||
| sql("INSERT INTO TABLE withparts PARTITION(ds, hr) SELECT key, value, '1', '2' FROM src") | ||
| .queryExecution.analyzed | ||
| } | ||
|
|
||
| assertResult(2, "Duplicated project detected\n" + analyzedPlan) { | ||
| analyzedPlan.collect { | ||
| case _: Project => () | ||
| }.size | ||
| } | ||
| } | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add these two tests back. |
||
|
|
||
| test("parse HQL set commands") { | ||
| // Adapted from its SQL counterpart. | ||
| val testKey = "spark.sql.key.usedfortestonly" | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See https://github.com/apache/spark/blob/branch-2.0/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala#L194