Skip to content

Commit 2287f56

Browse files
yaooqinnHyukjinKwon
authored andcommitted
[SPARK-33879][SQL] Char Varchar values fails w/ match error as partition columns
### What changes were proposed in this pull request? ```sql spark-sql> select * from t10 where c0='abcd'; 20/12/22 15:43:38 ERROR SparkSQLDriver: Failed in [select * from t10 where c0='abcd'] scala.MatchError: CharType(10) (of class org.apache.spark.sql.types.CharType) at org.apache.spark.sql.catalyst.expressions.CastBase.cast(Cast.scala:815) at org.apache.spark.sql.catalyst.expressions.CastBase.cast$lzycompute(Cast.scala:842) at org.apache.spark.sql.catalyst.expressions.CastBase.cast(Cast.scala:842) at org.apache.spark.sql.catalyst.expressions.CastBase.nullSafeEval(Cast.scala:844) at org.apache.spark.sql.catalyst.expressions.UnaryExpression.eval(Expression.scala:476) at org.apache.spark.sql.catalyst.catalog.CatalogTablePartition.$anonfun$toRow$2(interface.scala:164) at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238) at scala.collection.Iterator.foreach(Iterator.scala:941) at scala.collection.Iterator.foreach$(Iterator.scala:941) at scala.collection.AbstractIterator.foreach(Iterator.scala:1429) at scala.collection.IterableLike.foreach(IterableLike.scala:74) at scala.collection.IterableLike.foreach$(IterableLike.scala:73) at org.apache.spark.sql.types.StructType.foreach(StructType.scala:102) at scala.collection.TraversableLike.map(TraversableLike.scala:238) at scala.collection.TraversableLike.map$(TraversableLike.scala:231) at org.apache.spark.sql.types.StructType.map(StructType.scala:102) at org.apache.spark.sql.catalyst.catalog.CatalogTablePartition.toRow(interface.scala:158) at org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils$.$anonfun$prunePartitionsByFilter$3(ExternalCatalogUtils.scala:157) at org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils$.$anonfun$prunePartitionsByFilter$3$adapted(ExternalCatalogUtils.scala:156) ``` c0 is a partition column, it fails in the partition pruning rule In this PR, we relace char/varchar w/ string type before the CAST happends ### Why are the changes needed? bugfix, see the case above ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? yes, new tests Closes #30887 from yaooqinn/SPARK-33879. Authored-by: Kent Yao <[email protected]> Signed-off-by: HyukjinKwon <[email protected]>
1 parent e853f06 commit 2287f56

File tree

2 files changed

+23
-1
lines changed

2 files changed

+23
-1
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import org.apache.spark.sql.AnalysisException
2626
import org.apache.spark.sql.catalyst.analysis.Resolver
2727
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
2828
import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, BoundReference, Expression, Predicate}
29+
import org.apache.spark.sql.catalyst.util.CharVarcharUtils
2930

3031
object ExternalCatalogUtils {
3132
// This duplicates default value of Hive `ConfVars.DEFAULTPARTITIONNAME`, since catalyst doesn't
@@ -135,7 +136,8 @@ object ExternalCatalogUtils {
135136
if (predicates.isEmpty) {
136137
inputPartitions
137138
} else {
138-
val partitionSchema = catalogTable.partitionSchema
139+
val partitionSchema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(
140+
catalogTable.partitionSchema)
139141
val partitionColumnNames = catalogTable.partitionColumnNames.toSet
140142

141143
val nonPartitionPruningPredicates = predicates.filterNot {

sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,26 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
356356
}
357357
}
358358

359+
test("char type comparison: partition pruning") {
360+
withTable("t") {
361+
sql(s"CREATE TABLE t(i INT, c1 CHAR(2), c2 VARCHAR(5)) USING $format PARTITIONED BY (c1, c2)")
362+
sql("INSERT INTO t VALUES (1, 'a', 'a')")
363+
Seq(("c1 = 'a'", true),
364+
("'a' = c1", true),
365+
("c1 = 'a '", true),
366+
("c1 > 'a'", false),
367+
("c1 IN ('a', 'b')", true),
368+
("c2 = 'a '", false),
369+
("c2 = 'a'", true),
370+
("c2 IN ('a', 'b')", true)).foreach { case (con, res) =>
371+
val df = spark.table("t")
372+
withClue(con) {
373+
checkAnswer(df.where(con), df.where(res.toString))
374+
}
375+
}
376+
}
377+
}
378+
359379
test("char type comparison: join") {
360380
withTable("t1", "t2") {
361381
sql(s"CREATE TABLE t1(c CHAR(2)) USING $format")

0 commit comments

Comments
 (0)