Skip to content

Commit 68265ac

Browse files
cloud-fanyhuai
authored andcommitted
[SPARK-12841][SQL][BRANCH-1.6] fix cast in filter
In SPARK-10743 we wrap cast with `UnresolvedAlias` to give `Cast` a better alias if possible. However, for cases like filter, the `UnresolvedAlias` can't be resolved and actually we don't need a better alias for this case. This PR move the cast wrapping logic to `Column.named` so that we will only do it when we need a alias name. backport #10781 to 1.6 Author: Wenchen Fan <[email protected]> Closes #10819 from cloud-fan/bug.
1 parent d43704d commit 68265ac

File tree

3 files changed

+18
-8
lines changed

3 files changed

+18
-8
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ class Analyzer(
148148
private def assignAliases(exprs: Seq[NamedExpression]) = {
149149
exprs.zipWithIndex.map {
150150
case (expr, i) =>
151-
expr transform {
151+
expr transformUp {
152152
case u @ UnresolvedAlias(child) => child match {
153153
case ne: NamedExpression => ne
154154
case e if !e.resolved => u

sql/core/src/main/scala/org/apache/spark/sql/Column.scala

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,15 @@ class Column(protected[sql] val expr: Expression) extends Logging {
132132
case explode: Explode => MultiAlias(explode, Nil)
133133
case jt: JsonTuple => MultiAlias(jt, Nil)
134134

135+
// If we have a top level Cast, there is a chance to give it a better alias, if there is a
136+
// NamedExpression under this Cast.
137+
case c: Cast => c.transformUp {
138+
case Cast(ne: NamedExpression, to) => UnresolvedAlias(Cast(ne, to))
139+
} match {
140+
case ne: NamedExpression => ne
141+
case other => Alias(expr, expr.prettyString)()
142+
}
143+
135144
case expr: Expression => Alias(expr, expr.prettyString)()
136145
}
137146

@@ -931,13 +940,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
931940
* @group expr_ops
932941
* @since 1.3.0
933942
*/
934-
def cast(to: DataType): Column = withExpr {
935-
expr match {
936-
// keeps the name of expression if possible when do cast.
937-
case ne: NamedExpression => UnresolvedAlias(Cast(expr, to))
938-
case _ => Cast(expr, to)
939-
}
940-
}
943+
def cast(to: DataType): Column = withExpr { Cast(expr, to) }
941944

942945
/**
943946
* Casts the column to a different data type, using the canonical string representation

sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -991,6 +991,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
991991
test("SPARK-10743: keep the name of expression if possible when do cast") {
992992
val df = (1 to 10).map(Tuple1.apply).toDF("i").as("src")
993993
assert(df.select($"src.i".cast(StringType)).columns.head === "i")
994+
assert(df.select($"src.i".cast(StringType).cast(IntegerType)).columns.head === "i")
994995
}
995996

996997
test("SPARK-11301: fix case sensitivity for filter on partitioned columns") {
@@ -1163,4 +1164,10 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
11631164
val primitiveUDF = udf((i: Int) => i * 2)
11641165
checkAnswer(df.select(primitiveUDF($"age")), Row(44) :: Row(null) :: Nil)
11651166
}
1167+
1168+
test("SPARK-12841: cast in filter") {
1169+
checkAnswer(
1170+
Seq(1 -> "a").toDF("i", "j").filter($"i".cast(StringType) === "1"),
1171+
Row(1, "a"))
1172+
}
11661173
}

0 commit comments

Comments
 (0)