[SPARK-33879][SQL] Char Varchar values fails w/ match error as partition columns

yaooqinn · HyukjinKwon · commit 2287f56a3e10 · 2020-12-23T16:14:27.000+09:00
### What changes were proposed in this pull request? ```sql spark-sql> select * from t10 where c0='abcd'; 20/12/22 15:43:38 ERROR SparkSQLDriver: Failed in [select * from t10 where c0='abcd'] scala.MatchError: CharType(10) (of class org.apache.spark.sql.types.CharType) at org.apache.spark.sql.catalyst.expressions.CastBase.cast(Cast.scala:815) at org.apache.spark.sql.catalyst.expressions.CastBase.cast$lzycompute(Cast.scala:842) at org.apache.spark.sql.catalyst.expressions.CastBase.cast(Cast.scala:842) at org.apache.spark.sql.catalyst.expressions.CastBase.nullSafeEval(Cast.scala:844) at org.apache.spark.sql.catalyst.expressions.UnaryExpression.eval(Expression.scala:476) at org.apache.spark.sql.catalyst.catalog.CatalogTablePartition.$anonfun$toRow$2(interface.scala:164) at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238) at scala.collection.Iterator.foreach(Iterator.scala:941) at scala.collection.Iterator.foreach$(Iterator.scala:941) at scala.collection.AbstractIterator.foreach(Iterator.scala:1429) at scala.collection.IterableLike.foreach(IterableLike.scala:74) at scala.collection.IterableLike.foreach$(IterableLike.scala:73) at org.apache.spark.sql.types.StructType.foreach(StructType.scala:102) at scala.collection.TraversableLike.map(TraversableLike.scala:238) at scala.collection.TraversableLike.map$(TraversableLike.scala:231) at org.apache.spark.sql.types.StructType.map(StructType.scala:102) at org.apache.spark.sql.catalyst.catalog.CatalogTablePartition.toRow(interface.scala:158) at org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils$.$anonfun$prunePartitionsByFilter$3(ExternalCatalogUtils.scala:157) at org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils$.$anonfun$prunePartitionsByFilter$3$adapted(ExternalCatalogUtils.scala:156) ``` c0 is a partition column, it fails in the partition pruning rule In this PR, we relace char/varchar w/ string type before the CAST happends ### Why are the changes needed? bugfix, see the case above ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? yes, new tests Closes #30887 from yaooqinn/SPARK-33879. Authored-by: Kent Yao <yaooqinn@hotmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, BoundReference, Expression, Predicate}
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 
 object ExternalCatalogUtils {
   // This duplicates default value of Hive `ConfVars.DEFAULTPARTITIONNAME`, since catalyst doesn't
@@ -135,7 +136,8 @@ object ExternalCatalogUtils {
     if (predicates.isEmpty) {
       inputPartitions
     } else {
-      val partitionSchema = catalogTable.partitionSchema
+      val partitionSchema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(
+        catalogTable.partitionSchema)
       val partitionColumnNames = catalogTable.partitionColumnNames.toSet
 
       val nonPartitionPruningPredicates = predicates.filterNot {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
@@ -356,6 +356,26 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
     }
   }
 
+  test("char type comparison: partition pruning") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i INT, c1 CHAR(2), c2 VARCHAR(5)) USING $format PARTITIONED BY (c1, c2)")
+      sql("INSERT INTO t VALUES (1, 'a', 'a')")
+      Seq(("c1 = 'a'", true),
+        ("'a' = c1", true),
+        ("c1 = 'a  '", true),
+        ("c1 > 'a'", false),
+        ("c1 IN ('a', 'b')", true),
+        ("c2 = 'a  '", false),
+        ("c2 = 'a'", true),
+        ("c2 IN ('a', 'b')", true)).foreach { case (con, res) =>
+        val df = spark.table("t")
+        withClue(con) {
+          checkAnswer(df.where(con), df.where(res.toString))
+        }
+      }
+    }
+  }
+
   test("char type comparison: join") {
     withTable("t1", "t2") {
       sql(s"CREATE TABLE t1(c CHAR(2)) USING $format")