You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[SPARK-16621][SQL] Generate stable SQLs in SQLBuilder
## What changes were proposed in this pull request?
Currently, the generated SQLs have not-stable IDs for generated attributes.
The stable generated SQL will give more benefit for understanding or testing the queries.
This PR provides stable SQL generation by the followings.
- Provide unique ids for generated subqueries, `gen_subquery_xxx`.
- Provide unique and stable ids for generated attributes, `gen_attr_xxx`.
**Before**
```scala
scala> new org.apache.spark.sql.catalyst.SQLBuilder(sql("select 1")).toSQL
res0: String = SELECT `gen_attr_0` AS `1` FROM (SELECT 1 AS `gen_attr_0`) AS gen_subquery_0
scala> new org.apache.spark.sql.catalyst.SQLBuilder(sql("select 1")).toSQL
res1: String = SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4`) AS gen_subquery_0
```
**After**
```scala
scala> new org.apache.spark.sql.catalyst.SQLBuilder(sql("select 1")).toSQL
res1: String = SELECT `gen_attr_0` AS `1` FROM (SELECT 1 AS `gen_attr_0`) AS gen_subquery_0
scala> new org.apache.spark.sql.catalyst.SQLBuilder(sql("select 1")).toSQL
res2: String = SELECT `gen_attr_0` AS `1` FROM (SELECT 1 AS `gen_attr_0`) AS gen_subquery_0
```
## How was this patch tested?
Pass the existing Jenkins tests.
Author: Dongjoon Hyun <[email protected]>
Closes#14257 from dongjoon-hyun/SPARK-16621.
SELECT`gen_attr`AS`count(value)`FROM (SELECT`gen_attr`FROM (SELECTcount(`gen_attr`) AS`gen_attr`, max(`gen_attr`) AS`gen_attr`FROM (SELECT`key`AS`gen_attr`, `value`AS`gen_attr`FROM`default`.`parquet_t1`) AS gen_subquery_0 GROUP BY`gen_attr`HAVING (`gen_attr`> CAST(0ASBIGINT))) AS gen_subquery_1) AS gen_subquery_2
4
+
SELECT`gen_attr_0`AS`count(value)`FROM (SELECT`gen_attr_0`FROM (SELECTcount(`gen_attr_3`) AS`gen_attr_0`, max(`gen_attr_2`) AS`gen_attr_1`FROM (SELECT`key`AS`gen_attr_2`, `value`AS`gen_attr_3`FROM`default`.`parquet_t1`) AS gen_subquery_0 GROUP BY`gen_attr_2`HAVING (`gen_attr_1`> CAST(0ASBIGINT))) AS gen_subquery_1) AS gen_subquery_2
SELECT`gen_attr`AS`count(value)`FROM (SELECT`gen_attr`FROM (SELECTcount(`gen_attr`) AS`gen_attr`, max(`gen_attr`) AS`gen_attr`FROM (SELECT`key`AS`gen_attr`, `value`AS`gen_attr`FROM`default`.`parquet_t1`) AS gen_subquery_0 GROUP BY`gen_attr`ORDER BY`gen_attr`ASC) AS gen_subquery_1) AS gen_subquery_2
4
+
SELECT`gen_attr_0`AS`count(value)`FROM (SELECT`gen_attr_0`FROM (SELECTcount(`gen_attr_3`) AS`gen_attr_0`, max(`gen_attr_2`) AS`gen_attr_1`FROM (SELECT`key`AS`gen_attr_2`, `value`AS`gen_attr_3`FROM`default`.`parquet_t1`) AS gen_subquery_0 GROUP BY`gen_attr_2`ORDER BY`gen_attr_1`ASC) AS gen_subquery_1) AS gen_subquery_2
SELECT`gen_attr`AS`count(value)`FROM (SELECT`gen_attr`FROM (SELECTcount(`gen_attr`) AS`gen_attr`, `gen_attr`AS`gen_attr`, max(`gen_attr`) AS`gen_attr`FROM (SELECT`key`AS`gen_attr`, `value`AS`gen_attr`FROM`default`.`parquet_t1`) AS gen_subquery_0 GROUP BY`gen_attr`ORDER BY`gen_attr`ASC, `gen_attr`ASC) AS gen_subquery_1) AS gen_subquery_2
4
+
SELECT`gen_attr_0`AS`count(value)`FROM (SELECT`gen_attr_0`FROM (SELECTcount(`gen_attr_4`) AS`gen_attr_0`, `gen_attr_3`AS`gen_attr_1`, max(`gen_attr_3`) AS`gen_attr_2`FROM (SELECT`key`AS`gen_attr_3`, `value`AS`gen_attr_4`FROM`default`.`parquet_t1`) AS gen_subquery_0 GROUP BY`gen_attr_3`ORDER BY`gen_attr_1`ASC, `gen_attr_2`ASC) AS gen_subquery_1) AS gen_subquery_2
SELECT`gen_attr`AS`(max(c) + count(a) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))`FROM (SELECT (`gen_attr`+`gen_attr`) AS`gen_attr`FROM (SELECT gen_subquery_1.`gen_attr`, gen_subquery_1.`gen_attr`, count(`gen_attr`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS`gen_attr`FROM (SELECTmax(`gen_attr`) AS`gen_attr`, `gen_attr`FROM (SELECT`a`AS`gen_attr`, `b`AS`gen_attr`, `c`AS`gen_attr`, `d`AS`gen_attr`FROM`default`.`parquet_t2`) AS gen_subquery_0 GROUP BY`gen_attr`, `gen_attr`) AS gen_subquery_1) AS gen_subquery_2) AS gen_subquery_3
4
+
SELECT`gen_attr_0`AS`(max(c) + count(a) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))`FROM (SELECT (`gen_attr_1`+`gen_attr_2`) AS`gen_attr_0`FROM (SELECT gen_subquery_1.`gen_attr_1`, gen_subquery_1.`gen_attr_3`, count(`gen_attr_3`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS`gen_attr_2`FROM (SELECTmax(`gen_attr_5`) AS`gen_attr_1`, `gen_attr_3`FROM (SELECT`a`AS`gen_attr_3`, `b`AS`gen_attr_4`, `c`AS`gen_attr_5`, `d`AS`gen_attr_6`FROM`default`.`parquet_t2`) AS gen_subquery_0 GROUP BY`gen_attr_3`, `gen_attr_4`) AS gen_subquery_1) AS gen_subquery_2) AS gen_subquery_3
SELECT`gen_attr`AS`CASE WHEN ((id % CAST(2 AS BIGINT)) > CAST(0 AS BIGINT)) THEN 0 WHEN ((id % CAST(2 AS BIGINT)) = CAST(0 AS BIGINT)) THEN 1 END`FROM (SELECT CASE WHEN ((`gen_attr` % CAST(2ASBIGINT)) > CAST(0ASBIGINT)) THEN 0 WHEN ((`gen_attr` % CAST(2ASBIGINT)) = CAST(0ASBIGINT)) THEN 1 END AS`gen_attr`FROM (SELECT`id`AS`gen_attr`FROM`default`.`parquet_t0`) AS gen_subquery_0) AS gen_subquery_1
4
+
SELECT`gen_attr_0`AS`CASE WHEN ((id % CAST(2 AS BIGINT)) > CAST(0 AS BIGINT)) THEN 0 WHEN ((id % CAST(2 AS BIGINT)) = CAST(0 AS BIGINT)) THEN 1 END`FROM (SELECT CASE WHEN ((`gen_attr_1` % CAST(2ASBIGINT)) > CAST(0ASBIGINT)) THEN 0 WHEN ((`gen_attr_1` % CAST(2ASBIGINT)) = CAST(0ASBIGINT)) THEN 1 END AS`gen_attr_0`FROM (SELECT`id`AS`gen_attr_1`FROM`default`.`parquet_t0`) AS gen_subquery_0) AS gen_subquery_1
SELECT`gen_attr`AS`CASE WHEN ((id % CAST(2 AS BIGINT)) > CAST(0 AS BIGINT)) THEN 0 ELSE 1 END`FROM (SELECT CASE WHEN ((`gen_attr` % CAST(2ASBIGINT)) > CAST(0ASBIGINT)) THEN 0 ELSE 1 END AS`gen_attr`FROM (SELECT`id`AS`gen_attr`FROM`default`.`parquet_t0`) AS gen_subquery_0) AS gen_subquery_1
4
+
SELECT`gen_attr_0`AS`CASE WHEN ((id % CAST(2 AS BIGINT)) > CAST(0 AS BIGINT)) THEN 0 ELSE 1 END`FROM (SELECT CASE WHEN ((`gen_attr_1` % CAST(2ASBIGINT)) > CAST(0ASBIGINT)) THEN 0 ELSE 1 END AS`gen_attr_0`FROM (SELECT`id`AS`gen_attr_1`FROM`default`.`parquet_t0`) AS gen_subquery_0) AS gen_subquery_1
SELECT`gen_attr`AS`CASE WHEN (id = CAST(0 AS BIGINT)) THEN foo WHEN (id = CAST(1 AS BIGINT)) THEN bar END`FROM (SELECT CASE WHEN (`gen_attr`= CAST(0ASBIGINT)) THEN "foo" WHEN (`gen_attr`= CAST(1ASBIGINT)) THEN "bar" END AS`gen_attr`FROM (SELECT`id`AS`gen_attr`FROM`default`.`parquet_t0`) AS gen_subquery_0) AS gen_subquery_1
4
+
SELECT`gen_attr_0`AS`CASE WHEN (id = CAST(0 AS BIGINT)) THEN foo WHEN (id = CAST(1 AS BIGINT)) THEN bar END`FROM (SELECT CASE WHEN (`gen_attr_1`= CAST(0ASBIGINT)) THEN "foo" WHEN (`gen_attr_1`= CAST(1ASBIGINT)) THEN "bar" END AS`gen_attr_0`FROM (SELECT`id`AS`gen_attr_1`FROM`default`.`parquet_t0`) AS gen_subquery_0) AS gen_subquery_1
SELECT`gen_attr`AS`CASE WHEN (id = CAST(0 AS BIGINT)) THEN foo WHEN (id = CAST(1 AS BIGINT)) THEN bar ELSE baz END`FROM (SELECT CASE WHEN (`gen_attr`= CAST(0ASBIGINT)) THEN "foo" WHEN (`gen_attr`= CAST(1ASBIGINT)) THEN "bar" ELSE "baz" END AS`gen_attr`FROM (SELECT`id`AS`gen_attr`FROM`default`.`parquet_t0`) AS gen_subquery_0) AS gen_subquery_1
4
+
SELECT`gen_attr_0`AS`CASE WHEN (id = CAST(0 AS BIGINT)) THEN foo WHEN (id = CAST(1 AS BIGINT)) THEN bar ELSE baz END`FROM (SELECT CASE WHEN (`gen_attr_1`= CAST(0ASBIGINT)) THEN "foo" WHEN (`gen_attr_1`= CAST(1ASBIGINT)) THEN "bar" ELSE "baz" END AS`gen_attr_0`FROM (SELECT`id`AS`gen_attr_1`FROM`default`.`parquet_t0`) AS gen_subquery_0) AS gen_subquery_1
SELECT`gen_attr`AS`id`FROM (SELECT`gen_attr`FROM (SELECT`id`AS`gen_attr`FROM`default`.`parquet_t0`) AS gen_subquery_0 CLUSTER BY `gen_attr`) AS parquet_t0
4
+
SELECT`gen_attr_0`AS`id`FROM (SELECT`gen_attr_0`FROM (SELECT`id`AS`gen_attr_0`FROM`default`.`parquet_t0`) AS gen_subquery_0 CLUSTER BY `gen_attr_0`) AS parquet_t0
0 commit comments