Skip to content

Commit e00eace

Browse files
cloud-fandongjoon-hyun
authored andcommitted
[SPARK-47561][SQL] Fix analyzer rule order issues about Alias
### What changes were proposed in this pull request? We found two analyzer rule execution order issues in our internal workloads: - `CreateStruct.apply` creates `NamePlaceholder` for unresolved `NamedExpression`. However, with certain rule execution order, the `NamedExpression` may be removed (e.g. remove unnecessary `Alias`) before `NamePlaceholder` is resolved, then `NamePlaceholder` can't be resolved anymore. - UNPIVOT uses `UnresolvedAlias` to wrap `UnresolvedAttribute`. There is a conflict about how to determine the final alias name. If `ResolveAliases` runs first, then `UnresolvedAlias` will be removed and eventually the alias will be `b` for nested column `a.b`. If `ResolveReferences` runs first, then we resolve `a.b` first and then `UnresolvedAlias` will determine the alias as `a.b` not `b`. This PR fixes the two issues - `CreateStruct.apply` should determine the field name immediately if the input is `Alias` - The parser rule for UNPIVOT should follow how we parse SELECT and return `UnresolvedAttribute` directly without the `UnresolvedAlias` wrapper. It's a bit risky to fix the order issue between `ResolveAliases` and `ResolveReferences` as it can change the final query schema, we will save it for later. ### Why are the changes needed? fix unstable analyzer behavior with different rule execution orders. ### Does this PR introduce _any_ user-facing change? Yes, some failed queries can run now. The issue for UNPIVOT only affects the error message. ### How was this patch tested? verified by our internal workloads. The repro query is quite complicated to trigger a certain rule execution order so we won't add tests for it. The fix is quite obvious. ### Was this patch authored or co-authored using generative AI tooling? no Closes #45718 from cloud-fan/rule. Authored-by: Wenchen Fan <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 8e20f8e commit e00eace

File tree

3 files changed

+20
-22
lines changed

3 files changed

+20
-22
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,7 @@ object CreateStruct {
374374
// alias name inside CreateNamedStruct.
375375
case (u: UnresolvedAttribute, _) => Seq(Literal(u.nameParts.last), u)
376376
case (u @ UnresolvedExtractValue(_, e: Literal), _) if e.dataType == StringType => Seq(e, u)
377+
case (a: Alias, _) => Seq(Literal(a.name), a)
377378
case (e: NamedExpression, _) if e.resolved => Seq(Literal(e.name), e)
378379
case (e: NamedExpression, _) => Seq(NamePlaceholder, e)
379380
case (g @ GetStructField(_, _, Some(name)), _) => Seq(Literal(name), g)

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1346,7 +1346,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
13461346
* Create an Unpivot column.
13471347
*/
13481348
override def visitUnpivotColumn(ctx: UnpivotColumnContext): NamedExpression = withOrigin(ctx) {
1349-
UnresolvedAlias(UnresolvedAttribute(visitMultipartIdentifier(ctx.multipartIdentifier)))
1349+
UnresolvedAttribute(visitMultipartIdentifier(ctx.multipartIdentifier))
13501350
}
13511351

13521352
/**

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/UnpivotParserSuite.scala

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class UnpivotParserSuite extends AnalysisTest {
3939
"SELECT * FROM t UNPIVOT (val FOR col in (a, b))",
4040
Unpivot(
4141
None,
42-
Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
42+
Some(Seq(Seq($"a"), Seq($"b"))),
4343
None,
4444
"col",
4545
Seq("val"),
@@ -59,7 +59,7 @@ class UnpivotParserSuite extends AnalysisTest {
5959
sql,
6060
Unpivot(
6161
None,
62-
Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
62+
Some(Seq(Seq($"a"), Seq($"b"))),
6363
Some(Seq(Some("A"), None)),
6464
"col",
6565
Seq("val"),
@@ -76,7 +76,7 @@ class UnpivotParserSuite extends AnalysisTest {
7676
"SELECT * FROM t UNPIVOT ((val1, val2) FOR col in ((a, b), (c, d)))",
7777
Unpivot(
7878
None,
79-
Some(Seq(Seq($"a", $"b").map(UnresolvedAlias(_)), Seq($"c", $"d").map(UnresolvedAlias(_)))),
79+
Some(Seq(Seq($"a", $"b"), Seq($"c", $"d"))),
8080
None,
8181
"col",
8282
Seq("val1", "val2"),
@@ -96,10 +96,7 @@ class UnpivotParserSuite extends AnalysisTest {
9696
sql,
9797
Unpivot(
9898
None,
99-
Some(Seq(
100-
Seq($"a", $"b").map(UnresolvedAlias(_)),
101-
Seq($"c", $"d").map(UnresolvedAlias(_))
102-
)),
99+
Some(Seq(Seq($"a", $"b"), Seq($"c", $"d"))),
103100
Some(Seq(Some("first"), None)),
104101
"col",
105102
Seq("val1", "val2"),
@@ -132,7 +129,7 @@ class UnpivotParserSuite extends AnalysisTest {
132129
sql,
133130
Unpivot(
134131
None,
135-
Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
132+
Some(Seq(Seq($"a"), Seq($"b"))),
136133
None,
137134
"col",
138135
Seq("val"),
@@ -169,7 +166,7 @@ class UnpivotParserSuite extends AnalysisTest {
169166
"SELECT * FROM t UNPIVOT EXCLUDE NULLS (val FOR col in (a, b))",
170167
Unpivot(
171168
None,
172-
Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
169+
Some(Seq(Seq($"a"), Seq($"b"))),
173170
None,
174171
"col",
175172
Seq("val"),
@@ -184,7 +181,7 @@ class UnpivotParserSuite extends AnalysisTest {
184181
"SELECT * FROM t UNPIVOT INCLUDE NULLS (val FOR col in (a, b))",
185182
Unpivot(
186183
None,
187-
Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
184+
Some(Seq(Seq($"a"), Seq($"b"))),
188185
None,
189186
"col",
190187
Seq("val"),
@@ -199,7 +196,7 @@ class UnpivotParserSuite extends AnalysisTest {
199196
"SELECT * FROM t1 UNPIVOT (val FOR col in (a, b)) JOIN t2",
200197
Unpivot(
201198
None,
202-
Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
199+
Some(Seq(Seq($"a"), Seq($"b"))),
203200
None,
204201
"col",
205202
Seq("val"),
@@ -211,7 +208,7 @@ class UnpivotParserSuite extends AnalysisTest {
211208
"SELECT * FROM t1 JOIN t2 UNPIVOT (val FOR col in (a, b))",
212209
Unpivot(
213210
None,
214-
Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
211+
Some(Seq(Seq($"a"), Seq($"b"))),
215212
None,
216213
"col",
217214
Seq("val"),
@@ -224,7 +221,7 @@ class UnpivotParserSuite extends AnalysisTest {
224221
table("t1").join(
225222
Unpivot(
226223
None,
227-
Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
224+
Some(Seq(Seq($"a"), Seq($"b"))),
228225
None,
229226
"col",
230227
Seq("val"),
@@ -239,7 +236,7 @@ class UnpivotParserSuite extends AnalysisTest {
239236
"SELECT * FROM t1 UNPIVOT (val FOR col in (a, b)), t2",
240237
Unpivot(
241238
None,
242-
Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
239+
Some(Seq(Seq($"a"), Seq($"b"))),
243240
None,
244241
"col",
245242
Seq("val"),
@@ -251,7 +248,7 @@ class UnpivotParserSuite extends AnalysisTest {
251248
"SELECT * FROM t1, t2 UNPIVOT (val FOR col in (a, b))",
252249
Unpivot(
253250
None,
254-
Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
251+
Some(Seq(Seq($"a"), Seq($"b"))),
255252
None,
256253
"col",
257254
Seq("val"),
@@ -267,7 +264,7 @@ class UnpivotParserSuite extends AnalysisTest {
267264
table("t1").join(
268265
Unpivot(
269266
None,
270-
Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
267+
Some(Seq(Seq($"a"), Seq($"b"))),
271268
None,
272269
"col",
273270
Seq("val"),
@@ -282,7 +279,7 @@ class UnpivotParserSuite extends AnalysisTest {
282279
table("t1").join(
283280
Unpivot(
284281
None,
285-
Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
282+
Some(Seq(Seq($"a"), Seq($"b"))),
286283
None,
287284
"col",
288285
Seq("val"),
@@ -296,7 +293,7 @@ class UnpivotParserSuite extends AnalysisTest {
296293
"SELECT * FROM t1, t2 JOIN t3 UNPIVOT (val FOR col in (a, b))",
297294
Unpivot(
298295
None,
299-
Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
296+
Some(Seq(Seq($"a"), Seq($"b"))),
300297
None,
301298
"col",
302299
Seq("val"),
@@ -311,7 +308,7 @@ class UnpivotParserSuite extends AnalysisTest {
311308
table("t1").join(
312309
Unpivot(
313310
None,
314-
Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
311+
Some(Seq(Seq($"a"), Seq($"b"))),
315312
None,
316313
"col",
317314
Seq("val"),
@@ -326,13 +323,13 @@ class UnpivotParserSuite extends AnalysisTest {
326323
"SELECT * FROM t1 UNPIVOT (val FOR col in (a, b)) UNPIVOT (val FOR col in (a, b))",
327324
Unpivot(
328325
None,
329-
Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
326+
Some(Seq(Seq($"a"), Seq($"b"))),
330327
None,
331328
"col",
332329
Seq("val"),
333330
Unpivot(
334331
None,
335-
Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
332+
Some(Seq(Seq($"a"), Seq($"b"))),
336333
None,
337334
"col",
338335
Seq("val"),

0 commit comments

Comments
 (0)