From b46659b692bdded6ef677a7c06564f7a4402fbcf Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Tue, 22 Dec 2020 17:56:42 +0800 Subject: [PATCH 1/2] [SPARK-33879][SQL] Char Varchar values fails w/ match error as partition columns --- .../sql/catalyst/catalog/interface.scala | 3 ++- .../spark/sql/CharVarcharTestSuite.scala | 20 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 5cb237688f875..df441370cf67b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -161,7 +161,8 @@ case class CatalogTablePartition( } else { spec(field.name) } - Cast(Literal(partValue), field.dataType, Option(timeZoneId)).eval() + val dt = CharVarcharUtils.replaceCharVarcharWithString(field.dataType) + Cast(Literal(partValue), dt, Option(timeZoneId)).eval() }) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala index b0f1198e46440..11d50b4e3d6d1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala @@ -356,6 +356,26 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { } } + test("char type comparison: partition pruning") { + withTable("t") { + sql(s"CREATE TABLE t(i INT, c1 CHAR(2), c2 VARCHAR(5)) USING $format PARTITIONED BY (c1, c2)") + sql("INSERT INTO t VALUES (1, 'a', 'a')") + Seq(("c1 = 'a'", true), + ("'a' = c1", true), + ("c1 = 'a '", true), + ("c1 > 'a'", false), + ("c1 IN ('a', 'b')", true), + ("c2 = 'a '", false), + ("c2 = 'a'", true), + ("c2 IN ('a', 'b')", true)).foreach { case (con, res) => + val df = spark.table("t") + withClue(con) { + checkAnswer(df.where(con), df.where(res.toString)) + } + } + } + } + test("char type comparison: join") { withTable("t1", "t2") { sql(s"CREATE TABLE t1(c CHAR(2)) USING $format") From 6e3e39b0ae523a3c1f0534d682be484e680dc3bf Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Wed, 23 Dec 2020 01:02:09 +0800 Subject: [PATCH 2/2] prunePartitionsByFilter --- .../spark/sql/catalyst/catalog/ExternalCatalogUtils.scala | 4 +++- .../org/apache/spark/sql/catalyst/catalog/interface.scala | 3 +-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala index ae3b75dc3334b..00445a1614257 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala @@ -26,6 +26,7 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.Resolver import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, BoundReference, Expression, Predicate} +import org.apache.spark.sql.catalyst.util.CharVarcharUtils object ExternalCatalogUtils { // This duplicates default value of Hive `ConfVars.DEFAULTPARTITIONNAME`, since catalyst doesn't @@ -135,7 +136,8 @@ object ExternalCatalogUtils { if (predicates.isEmpty) { inputPartitions } else { - val partitionSchema = catalogTable.partitionSchema + val partitionSchema = CharVarcharUtils.replaceCharVarcharWithStringInSchema( + catalogTable.partitionSchema) val partitionColumnNames = catalogTable.partitionColumnNames.toSet val nonPartitionPruningPredicates = predicates.filterNot { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index df441370cf67b..5cb237688f875 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -161,8 +161,7 @@ case class CatalogTablePartition( } else { spec(field.name) } - val dt = CharVarcharUtils.replaceCharVarcharWithString(field.dataType) - Cast(Literal(partValue), dt, Option(timeZoneId)).eval() + Cast(Literal(partValue), field.dataType, Option(timeZoneId)).eval() }) } }