Skip to content

Commit d5c33c6

Browse files
committed
[SPARK-48307][SQL][FOLLOWUP] Allow outer references in un-referenced CTE relations
### What changes were proposed in this pull request? This is a followup of #46617 . Subquery expression has a bunch of correlation checks which need to match certain plan shapes. We broke this by leaving `WithCTE` in the plan for un-referenced CTE relations. This PR fixes the issue by skipping CTE plan nodes in correlated subquery expression checks. ### Why are the changes needed? bug fix ### Does this PR introduce _any_ user-facing change? no bug is not released yet. ### How was this patch tested? new tests ### Was this patch authored or co-authored using generative AI tooling? no Closes #46869 from cloud-fan/check. Authored-by: Wenchen Fan <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent 490a4b3 commit d5c33c6

File tree

9 files changed

+181
-0
lines changed

9 files changed

+181
-0
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1371,6 +1371,13 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
13711371
aggregated,
13721372
canContainOuter && SQLConf.get.getConf(SQLConf.DECORRELATE_OFFSET_ENABLED))
13731373

1374+
// We always inline CTE relations before analysis check, and only un-referenced CTE
1375+
// relations will be kept in the plan. Here we should simply skip them and check the
1376+
// children, as un-referenced CTE relations won't be executed anyway and doesn't need to
1377+
// be restricted by the current subquery correlation limitations.
1378+
case _: WithCTE | _: CTERelationDef =>
1379+
plan.children.foreach(p => checkPlan(p, aggregated, canContainOuter))
1380+
13741381
// Category 4: Any other operators not in the above 3 categories
13751382
// cannot be on a correlation path, that is they are allowed only
13761383
// under a correlation point but they and their descendant operators

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -911,6 +911,10 @@ case class WithCTE(plan: LogicalPlan, cteDefs: Seq[CTERelationDef]) extends Logi
911911
def withNewPlan(newPlan: LogicalPlan): WithCTE = {
912912
withNewChildren(children.init :+ newPlan).asInstanceOf[WithCTE]
913913
}
914+
915+
override def maxRows: Option[Long] = plan.maxRows
916+
917+
override def maxRowsPerPartition: Option[Long] = plan.maxRowsPerPartition
914918
}
915919

916920
/**

sql/core/src/test/resources/sql-tests/analyzer-results/cte-legacy.sql.out

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,30 @@ Project [scalar-subquery#x [] AS scalarsubquery()#x]
4343
+- OneRowRelation
4444

4545

46+
-- !query
47+
SELECT (
48+
WITH unreferenced AS (SELECT id)
49+
SELECT 1
50+
) FROM range(1)
51+
-- !query analysis
52+
Project [scalar-subquery#x [] AS scalarsubquery()#x]
53+
: +- Project [1 AS 1#x]
54+
: +- OneRowRelation
55+
+- Range (0, 1, step=1)
56+
57+
58+
-- !query
59+
SELECT (
60+
WITH unreferenced AS (SELECT 1)
61+
SELECT id
62+
) FROM range(1)
63+
-- !query analysis
64+
Project [scalar-subquery#x [id#xL] AS scalarsubquery(id)#xL]
65+
: +- Project [outer(id#xL)]
66+
: +- OneRowRelation
67+
+- Range (0, 1, step=1)
68+
69+
4670
-- !query
4771
SELECT * FROM
4872
(

sql/core/src/test/resources/sql-tests/analyzer-results/cte-nested.sql.out

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,40 @@ Project [scalar-subquery#x [] AS scalarsubquery()#x]
5858
+- OneRowRelation
5959

6060

61+
-- !query
62+
SELECT (
63+
WITH unreferenced AS (SELECT id)
64+
SELECT 1
65+
) FROM range(1)
66+
-- !query analysis
67+
Project [scalar-subquery#x [id#xL] AS scalarsubquery(id)#x]
68+
: +- WithCTE
69+
: :- CTERelationDef xxxx, false
70+
: : +- SubqueryAlias unreferenced
71+
: : +- Project [outer(id#xL)]
72+
: : +- OneRowRelation
73+
: +- Project [1 AS 1#x]
74+
: +- OneRowRelation
75+
+- Range (0, 1, step=1)
76+
77+
78+
-- !query
79+
SELECT (
80+
WITH unreferenced AS (SELECT 1)
81+
SELECT id
82+
) FROM range(1)
83+
-- !query analysis
84+
Project [scalar-subquery#x [id#xL] AS scalarsubquery(id)#xL]
85+
: +- WithCTE
86+
: :- CTERelationDef xxxx, false
87+
: : +- SubqueryAlias unreferenced
88+
: : +- Project [1 AS 1#x]
89+
: : +- OneRowRelation
90+
: +- Project [outer(id#xL)]
91+
: +- OneRowRelation
92+
+- Range (0, 1, step=1)
93+
94+
6195
-- !query
6296
SELECT * FROM
6397
(

sql/core/src/test/resources/sql-tests/analyzer-results/cte-nonlegacy.sql.out

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,40 @@ Project [scalar-subquery#x [] AS scalarsubquery()#x]
5858
+- OneRowRelation
5959

6060

61+
-- !query
62+
SELECT (
63+
WITH unreferenced AS (SELECT id)
64+
SELECT 1
65+
) FROM range(1)
66+
-- !query analysis
67+
Project [scalar-subquery#x [id#xL] AS scalarsubquery(id)#x]
68+
: +- WithCTE
69+
: :- CTERelationDef xxxx, false
70+
: : +- SubqueryAlias unreferenced
71+
: : +- Project [outer(id#xL)]
72+
: : +- OneRowRelation
73+
: +- Project [1 AS 1#x]
74+
: +- OneRowRelation
75+
+- Range (0, 1, step=1)
76+
77+
78+
-- !query
79+
SELECT (
80+
WITH unreferenced AS (SELECT 1)
81+
SELECT id
82+
) FROM range(1)
83+
-- !query analysis
84+
Project [scalar-subquery#x [id#xL] AS scalarsubquery(id)#xL]
85+
: +- WithCTE
86+
: :- CTERelationDef xxxx, false
87+
: : +- SubqueryAlias unreferenced
88+
: : +- Project [1 AS 1#x]
89+
: : +- OneRowRelation
90+
: +- Project [outer(id#xL)]
91+
: +- OneRowRelation
92+
+- Range (0, 1, step=1)
93+
94+
6195
-- !query
6296
SELECT * FROM
6397
(

sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,18 @@ SELECT (
1717
SELECT * FROM t
1818
);
1919

20+
-- un-referenced CTE in subquery expression: outer reference in CTE relation
21+
SELECT (
22+
WITH unreferenced AS (SELECT id)
23+
SELECT 1
24+
) FROM range(1);
25+
26+
-- un-referenced CTE in subquery expression: outer reference in CTE main query
27+
SELECT (
28+
WITH unreferenced AS (SELECT 1)
29+
SELECT id
30+
) FROM range(1);
31+
2032
-- Make sure CTE in subquery is scoped to that subquery rather than global
2133
-- the 2nd half of the union should fail because the cte is scoped to the first half
2234
SELECT * FROM

sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,28 @@ struct<scalarsubquery():int>
3333
1
3434

3535

36+
-- !query
37+
SELECT (
38+
WITH unreferenced AS (SELECT id)
39+
SELECT 1
40+
) FROM range(1)
41+
-- !query schema
42+
struct<scalarsubquery():int>
43+
-- !query output
44+
1
45+
46+
47+
-- !query
48+
SELECT (
49+
WITH unreferenced AS (SELECT 1)
50+
SELECT id
51+
) FROM range(1)
52+
-- !query schema
53+
struct<scalarsubquery(id):bigint>
54+
-- !query output
55+
0
56+
57+
3658
-- !query
3759
SELECT * FROM
3860
(

sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,28 @@ struct<scalarsubquery():int>
3333
1
3434

3535

36+
-- !query
37+
SELECT (
38+
WITH unreferenced AS (SELECT id)
39+
SELECT 1
40+
) FROM range(1)
41+
-- !query schema
42+
struct<scalarsubquery(id):int>
43+
-- !query output
44+
1
45+
46+
47+
-- !query
48+
SELECT (
49+
WITH unreferenced AS (SELECT 1)
50+
SELECT id
51+
) FROM range(1)
52+
-- !query schema
53+
struct<scalarsubquery(id):bigint>
54+
-- !query output
55+
0
56+
57+
3658
-- !query
3759
SELECT * FROM
3860
(

sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,28 @@ struct<scalarsubquery():int>
3333
1
3434

3535

36+
-- !query
37+
SELECT (
38+
WITH unreferenced AS (SELECT id)
39+
SELECT 1
40+
) FROM range(1)
41+
-- !query schema
42+
struct<scalarsubquery(id):int>
43+
-- !query output
44+
1
45+
46+
47+
-- !query
48+
SELECT (
49+
WITH unreferenced AS (SELECT 1)
50+
SELECT id
51+
) FROM range(1)
52+
-- !query schema
53+
struct<scalarsubquery(id):bigint>
54+
-- !query output
55+
0
56+
57+
3658
-- !query
3759
SELECT * FROM
3860
(

0 commit comments

Comments
 (0)