Skip to content

Commit 296fe49

Browse files
committed
[SPARK-40260][SQL] Use error classes in the compilation errors of GROUP BY a position
### What changes were proposed in this pull request? In the PR, I propose to the following new error classes: - GROUP_BY_POS_OUT_OF_RANGE - GROUP_BY_POS_REFERS_AGG_EXPR and migrate 2 compilation exceptions related to GROUP BY a position onto them. ### Why are the changes needed? The migration onto error classes makes the errors searchable in docs, and allows to edit error's text messages w/o modifying the source code. ### Does this PR introduce _any_ user-facing change? Yes, in some sense because it modifies user-facing error messages. ### How was this patch tested? By running the affected test suites: ``` $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite" $ build/sbt "core/testOnly *SparkThrowableSuite" ``` Closes #37712 from MaxGekk/group-ref-agg-error. Lead-authored-by: Max Gekk <[email protected]> Co-authored-by: Maxim Gekk <[email protected]> Signed-off-by: Max Gekk <[email protected]>
1 parent cee30cb commit 296fe49

File tree

6 files changed

+107
-17
lines changed

6 files changed

+107
-17
lines changed

core/src/main/resources/error/error-classes.json

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,18 @@
136136
"Grouping sets size cannot be greater than <maxSize>"
137137
]
138138
},
139+
"GROUP_BY_POS_OUT_OF_RANGE" : {
140+
"message" : [
141+
"GROUP BY position <index> is not in select list (valid range is [1, <size>])."
142+
],
143+
"sqlState" : "42000"
144+
},
145+
"GROUP_BY_POS_REFERS_AGG_EXPR" : {
146+
"message" : [
147+
"GROUP BY <index> refers to an expression <aggExpr> that contains an aggregate function. Aggregate functions are not allowed in GROUP BY."
148+
],
149+
"sqlState" : "42000"
150+
},
139151
"INCOMPARABLE_PIVOT_COLUMN" : {
140152
"message" : [
141153
"Invalid pivot column <columnName>. Pivot columns must be comparable."

sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ class AnalysisException protected[sql] (
100100
line = origin.line,
101101
startPosition = origin.startPosition,
102102
errorClass = Some(errorClass),
103-
errorSubClass = Some(errorSubClass),
103+
errorSubClass = Option(errorSubClass),
104104
messageParameters = messageParameters)
105105

106106
def copy(

sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -366,14 +366,15 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
366366
def groupByPositionRefersToAggregateFunctionError(
367367
index: Int,
368368
expr: Expression): Throwable = {
369-
new AnalysisException(s"GROUP BY $index refers to an expression that is or contains " +
370-
"an aggregate function. Aggregate functions are not allowed in GROUP BY, " +
371-
s"but got ${expr.sql}")
369+
new AnalysisException(
370+
errorClass = "GROUP_BY_POS_REFERS_AGG_EXPR",
371+
messageParameters = Array(index.toString, expr.sql))
372372
}
373373

374374
def groupByPositionRangeError(index: Int, size: Int): Throwable = {
375-
new AnalysisException(s"GROUP BY position $index is not in select list " +
376-
s"(valid range is [1, $size])")
375+
new AnalysisException(
376+
errorClass = "GROUP_BY_POS_OUT_OF_RANGE",
377+
messageParameters = Array(index.toString, size.toString))
377378
}
378379

379380
def generatorNotExpectedError(name: FunctionIdentifier, classCanonicalName: String): Throwable = {

sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out

Lines changed: 72 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,14 @@ select a, b from data group by -1
9292
struct<>
9393
-- !query output
9494
org.apache.spark.sql.AnalysisException
95-
GROUP BY position -1 is not in select list (valid range is [1, 2]); line 1 pos 31
95+
{
96+
"errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
97+
"sqlState" : "42000",
98+
"messageParameters" : {
99+
"index" : "-1",
100+
"size" : "2"
101+
}
102+
}
96103

97104

98105
-- !query
@@ -101,7 +108,14 @@ select a, b from data group by 0
101108
struct<>
102109
-- !query output
103110
org.apache.spark.sql.AnalysisException
104-
GROUP BY position 0 is not in select list (valid range is [1, 2]); line 1 pos 31
111+
{
112+
"errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
113+
"sqlState" : "42000",
114+
"messageParameters" : {
115+
"index" : "0",
116+
"size" : "2"
117+
}
118+
}
105119

106120

107121
-- !query
@@ -110,7 +124,14 @@ select a, b from data group by 3
110124
struct<>
111125
-- !query output
112126
org.apache.spark.sql.AnalysisException
113-
GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 31
127+
{
128+
"errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
129+
"sqlState" : "42000",
130+
"messageParameters" : {
131+
"index" : "3",
132+
"size" : "2"
133+
}
134+
}
114135

115136

116137
-- !query
@@ -119,7 +140,14 @@ select a, b, sum(b) from data group by 3
119140
struct<>
120141
-- !query output
121142
org.apache.spark.sql.AnalysisException
122-
GROUP BY 3 refers to an expression that is or contains an aggregate function. Aggregate functions are not allowed in GROUP BY, but got sum(data.b) AS `sum(b)`; line 1 pos 39
143+
{
144+
"errorClass" : "GROUP_BY_POS_REFERS_AGG_EXPR",
145+
"sqlState" : "42000",
146+
"messageParameters" : {
147+
"index" : "3",
148+
"aggExpr" : "sum(data.b) AS `sum(b)`"
149+
}
150+
}
123151

124152

125153
-- !query
@@ -128,7 +156,14 @@ select a, b, sum(b) + 2 from data group by 3
128156
struct<>
129157
-- !query output
130158
org.apache.spark.sql.AnalysisException
131-
GROUP BY 3 refers to an expression that is or contains an aggregate function. Aggregate functions are not allowed in GROUP BY, but got (sum(data.b) + CAST(2 AS BIGINT)) AS `(sum(b) + 2)`; line 1 pos 43
159+
{
160+
"errorClass" : "GROUP_BY_POS_REFERS_AGG_EXPR",
161+
"sqlState" : "42000",
162+
"messageParameters" : {
163+
"index" : "3",
164+
"aggExpr" : "(sum(data.b) + CAST(2 AS BIGINT)) AS `(sum(b) + 2)`"
165+
}
166+
}
132167

133168

134169
-- !query
@@ -349,7 +384,14 @@ select a, b, count(1) from data group by a, -1
349384
struct<>
350385
-- !query output
351386
org.apache.spark.sql.AnalysisException
352-
GROUP BY position -1 is not in select list (valid range is [1, 3]); line 1 pos 44
387+
{
388+
"errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
389+
"sqlState" : "42000",
390+
"messageParameters" : {
391+
"index" : "-1",
392+
"size" : "3"
393+
}
394+
}
353395

354396

355397
-- !query
@@ -358,7 +400,14 @@ select a, b, count(1) from data group by a, 3
358400
struct<>
359401
-- !query output
360402
org.apache.spark.sql.AnalysisException
361-
GROUP BY 3 refers to an expression that is or contains an aggregate function. Aggregate functions are not allowed in GROUP BY, but got count(1) AS `count(1)`; line 1 pos 44
403+
{
404+
"errorClass" : "GROUP_BY_POS_REFERS_AGG_EXPR",
405+
"sqlState" : "42000",
406+
"messageParameters" : {
407+
"index" : "3",
408+
"aggExpr" : "count(1) AS `count(1)`"
409+
}
410+
}
362411

363412

364413
-- !query
@@ -367,7 +416,14 @@ select a, b, count(1) from data group by cube(-1, 2)
367416
struct<>
368417
-- !query output
369418
org.apache.spark.sql.AnalysisException
370-
GROUP BY position -1 is not in select list (valid range is [1, 3]); line 1 pos 46
419+
{
420+
"errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
421+
"sqlState" : "42000",
422+
"messageParameters" : {
423+
"index" : "-1",
424+
"size" : "3"
425+
}
426+
}
371427

372428

373429
-- !query
@@ -376,7 +432,14 @@ select a, b, count(1) from data group by cube(1, 3)
376432
struct<>
377433
-- !query output
378434
org.apache.spark.sql.AnalysisException
379-
GROUP BY 3 refers to an expression that is or contains an aggregate function. Aggregate functions are not allowed in GROUP BY, but got count(1) AS `count(1)`; line 1 pos 49
435+
{
436+
"errorClass" : "GROUP_BY_POS_REFERS_AGG_EXPR",
437+
"sqlState" : "42000",
438+
"messageParameters" : {
439+
"index" : "3",
440+
"aggExpr" : "count(1) AS `count(1)`"
441+
}
442+
}
380443

381444

382445
-- !query

sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,14 @@ SELECT c, count(*) FROM test_missing_target GROUP BY 3
207207
struct<>
208208
-- !query output
209209
org.apache.spark.sql.AnalysisException
210-
GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 53
210+
{
211+
"errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
212+
"sqlState" : "42000",
213+
"messageParameters" : {
214+
"index" : "3",
215+
"size" : "2"
216+
}
217+
}
211218

212219

213220
-- !query

sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,14 @@ SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 3
210210
struct<>
211211
-- !query output
212212
org.apache.spark.sql.AnalysisException
213-
GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 63
213+
{
214+
"errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
215+
"sqlState" : "42000",
216+
"messageParameters" : {
217+
"index" : "3",
218+
"size" : "2"
219+
}
220+
}
214221

215222

216223
-- !query

0 commit comments

Comments
 (0)