Skip to content

Commit 34c624e

Browse files
dtenedorHyukjinKwon
authored andcommitted
[SPARK-42874][SQL] Enable new golden file test framework for analysis for all input files
### What changes were proposed in this pull request? This PR enables the new golden file test framework for analysis for all input files. Background: * In #40449 we added the ability to exercise the analyzer on the SQL queries in existing golden files in the `sql/core/src/test/resources/sql-tests/inputs` directory, writing separate output test files in the new `sql/core/src/test/resources/sql-tests/analyzer-results` directory in additional to the original output directory for full end-to-end query execution results. * That PR also added an allowlist of input files to include in this new dual-run mode. * In this PR, we remove that allowlist exercise the new dual-run mode for all the input files. We also extend the analyzer testing to support separate test cases in ANSI-mode, TimestampNTZ, and UDFs. ### Why are the changes needed? This improves test coverage and helps prevent against accidental regressions in the future as we edit the code. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? This PR adds testing only. Closes #40496 from dtenedor/add-all-test-files. Authored-by: Daniel Tenedorio <[email protected]> Signed-off-by: Hyukjin Kwon <[email protected]>
1 parent c75b689 commit 34c624e

File tree

251 files changed

+154290
-51
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

251 files changed

+154290
-51
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,16 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
238238
transformAllExpressionsWithPruning(AlwaysProcess.fn, UnknownRuleId)(rule)
239239
}
240240

241+
/**
242+
* A variant of [[transformAllExpressions]] which considers plan nodes inside subqueries as well.
243+
*/
244+
def transformAllExpressionsWithSubqueries(
245+
rule: PartialFunction[Expression, Expression]): this.type = {
246+
transformWithSubqueries {
247+
case q => q.transformExpressions(rule).asInstanceOf[PlanType]
248+
}.asInstanceOf[this.type]
249+
}
250+
241251
/**
242252
* Returns the result of running [[transformExpressionsWithPruning]] on this node
243253
* and all its children. Note that this method skips expressions inside subqueries.

sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -129,13 +129,7 @@ select
129129
array_contains(timestamp_array, timestamp '2016-11-15 20:54:00.000'), array_contains(timestamp_array, timestamp '2016-01-01 20:54:00.000')
130130
from primitive_arrays
131131
-- !query analysis
132-
Project [array_contains(boolean_array#x, true) AS array_contains(boolean_array, true)#x, array_contains(boolean_array#x, false) AS array_contains(boolean_array, false)#x, array_contains(tinyint_array#x, 2) AS array_contains(tinyint_array, 2)#x, array_contains(tinyint_array#x, 0) AS array_contains(tinyint_array, 0)#x, array_contains(smallint_array#x, 2) AS array_contains(smallint_array, 2)#x, array_contains(smallint_array#x, 0) AS array_contains(smallint_array, 0)#x, array_contains(int_array#x, 2) AS array_contains(int_array, 2)#x, array_contains(int_array#x, 0) AS array_contains(int_array, 0)#x, array_contains(bigint_array#x, 2) AS array_contains(bigint_array, 2)#x, array_contains(bigint_array#x, 0) AS array_contains(bigint_array, 0)#x, array_contains(decimal_array#x, 9223372036854775809) AS array_contains(decimal_array, 9223372036854775809)#x, array_contains(decimal_array#x, cast(1 as decimal(19,0))) AS array_contains(decimal_array, 1)#x, array_contains(double_array#x, 2.0) AS array_contains(double_array, 2.0)#x, array_contains(double_array#x, 0.0) AS array_contains(double_array, 0.0)#x, array_contains(float_array#x, cast(2.0 as float)) AS array_contains(float_array, 2.0)#x, array_contains(float_array#x, cast(0.0 as float)) AS array_contains(float_array, 0.0)#x, array_contains(date_array#x, 2016-03-14) AS array_contains(date_array, DATE '2016-03-14')#x, array_contains(date_array#x, 2016-01-01) AS array_contains(date_array, DATE '2016-01-01')#x, array_contains(timestamp_array#x, 2016-11-15 20:54:00) AS array_contains(timestamp_array, TIMESTAMP '2016-11-15 20:54:00')#x, array_contains(timestamp_array#x, 2016-01-01 20:54:00) AS array_contains(timestamp_array, TIMESTAMP '2016-01-01 20:54:00')#x]
133-
+- SubqueryAlias primitive_arrays
134-
+- View (`primitive_arrays`, [boolean_array#x,tinyint_array#x,smallint_array#x,int_array#x,bigint_array#x,decimal_array#x,double_array#x,float_array#x,date_array#x,timestamp_array#x])
135-
+- Project [cast(boolean_array#x as array<boolean>) AS boolean_array#x, cast(tinyint_array#x as array<tinyint>) AS tinyint_array#x, cast(smallint_array#x as array<smallint>) AS smallint_array#x, cast(int_array#x as array<int>) AS int_array#x, cast(bigint_array#x as array<bigint>) AS bigint_array#x, cast(decimal_array#x as array<decimal(19,0)>) AS decimal_array#x, cast(double_array#x as array<double>) AS double_array#x, cast(float_array#x as array<float>) AS float_array#x, cast(date_array#x as array<date>) AS date_array#x, cast(timestamp_array#x as array<timestamp>) AS timestamp_array#x]
136-
+- Project [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x]
137-
+- SubqueryAlias primitive_arrays
138-
+- LocalRelation [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x]
132+
[Analyzer test output redacted due to nondeterminism]
139133

140134

141135
-- !query
@@ -235,7 +229,7 @@ select
235229
size(timestamp_array)
236230
from primitive_arrays
237231
-- !query analysis
238-
Project [size(boolean_array#x, true) AS size(boolean_array)#x, size(tinyint_array#x, true) AS size(tinyint_array)#x, size(smallint_array#x, true) AS size(smallint_array)#x, size(int_array#x, true) AS size(int_array)#x, size(bigint_array#x, true) AS size(bigint_array)#x, size(decimal_array#x, true) AS size(decimal_array)#x, size(double_array#x, true) AS size(double_array)#x, size(float_array#x, true) AS size(float_array)#x, size(date_array#x, true) AS size(date_array)#x, size(timestamp_array#x, true) AS size(timestamp_array)#x]
232+
Project [size(boolean_array#x, false) AS size(boolean_array)#x, size(tinyint_array#x, false) AS size(tinyint_array)#x, size(smallint_array#x, false) AS size(smallint_array)#x, size(int_array#x, false) AS size(int_array)#x, size(bigint_array#x, false) AS size(bigint_array)#x, size(decimal_array#x, false) AS size(decimal_array)#x, size(double_array#x, false) AS size(double_array)#x, size(float_array#x, false) AS size(float_array)#x, size(date_array#x, false) AS size(date_array)#x, size(timestamp_array#x, false) AS size(timestamp_array)#x]
239233
+- SubqueryAlias primitive_arrays
240234
+- View (`primitive_arrays`, [boolean_array#x,tinyint_array#x,smallint_array#x,int_array#x,bigint_array#x,decimal_array#x,double_array#x,float_array#x,date_array#x,timestamp_array#x])
241235
+- Project [cast(boolean_array#x as array<boolean>) AS boolean_array#x, cast(tinyint_array#x as array<tinyint>) AS tinyint_array#x, cast(smallint_array#x as array<smallint>) AS smallint_array#x, cast(int_array#x as array<int>) AS int_array#x, cast(bigint_array#x as array<bigint>) AS bigint_array#x, cast(decimal_array#x as array<decimal(19,0)>) AS decimal_array#x, cast(double_array#x as array<double>) AS double_array#x, cast(float_array#x as array<float>) AS float_array#x, cast(date_array#x as array<date>) AS date_array#x, cast(timestamp_array#x as array<timestamp>) AS timestamp_array#x]
@@ -247,70 +241,70 @@ Project [size(boolean_array#x, true) AS size(boolean_array)#x, size(tinyint_arra
247241
-- !query
248242
select element_at(array(1, 2, 3), 5)
249243
-- !query analysis
250-
Project [element_at(array(1, 2, 3), 5, None, false) AS element_at(array(1, 2, 3), 5)#x]
244+
Project [element_at(array(1, 2, 3), 5, None, true) AS element_at(array(1, 2, 3), 5)#x]
251245
+- OneRowRelation
252246

253247

254248
-- !query
255249
select element_at(array(1, 2, 3), -5)
256250
-- !query analysis
257-
Project [element_at(array(1, 2, 3), -5, None, false) AS element_at(array(1, 2, 3), -5)#x]
251+
Project [element_at(array(1, 2, 3), -5, None, true) AS element_at(array(1, 2, 3), -5)#x]
258252
+- OneRowRelation
259253

260254

261255
-- !query
262256
select element_at(array(1, 2, 3), 0)
263257
-- !query analysis
264-
Project [element_at(array(1, 2, 3), 0, None, false) AS element_at(array(1, 2, 3), 0)#x]
258+
Project [element_at(array(1, 2, 3), 0, None, true) AS element_at(array(1, 2, 3), 0)#x]
265259
+- OneRowRelation
266260

267261

268262
-- !query
269263
select elt(4, '123', '456')
270264
-- !query analysis
271-
Project [elt(4, 123, 456, false) AS elt(4, 123, 456)#x]
265+
Project [elt(4, 123, 456, true) AS elt(4, 123, 456)#x]
272266
+- OneRowRelation
273267

274268

275269
-- !query
276270
select elt(0, '123', '456')
277271
-- !query analysis
278-
Project [elt(0, 123, 456, false) AS elt(0, 123, 456)#x]
272+
Project [elt(0, 123, 456, true) AS elt(0, 123, 456)#x]
279273
+- OneRowRelation
280274

281275

282276
-- !query
283277
select elt(-1, '123', '456')
284278
-- !query analysis
285-
Project [elt(-1, 123, 456, false) AS elt(-1, 123, 456)#x]
279+
Project [elt(-1, 123, 456, true) AS elt(-1, 123, 456)#x]
286280
+- OneRowRelation
287281

288282

289283
-- !query
290284
select elt(null, '123', '456')
291285
-- !query analysis
292-
Project [elt(cast(null as int), 123, 456, false) AS elt(NULL, 123, 456)#x]
286+
Project [elt(cast(null as int), 123, 456, true) AS elt(NULL, 123, 456)#x]
293287
+- OneRowRelation
294288

295289

296290
-- !query
297291
select elt(null, '123', null)
298292
-- !query analysis
299-
Project [elt(cast(null as int), 123, cast(null as string), false) AS elt(NULL, 123, NULL)#x]
293+
Project [elt(cast(null as int), 123, cast(null as string), true) AS elt(NULL, 123, NULL)#x]
300294
+- OneRowRelation
301295

302296

303297
-- !query
304298
select elt(1, '123', null)
305299
-- !query analysis
306-
Project [elt(1, 123, cast(null as string), false) AS elt(1, 123, NULL)#x]
300+
Project [elt(1, 123, cast(null as string), true) AS elt(1, 123, NULL)#x]
307301
+- OneRowRelation
308302

309303

310304
-- !query
311305
select elt(2, '123', null)
312306
-- !query analysis
313-
Project [elt(2, 123, cast(null as string), false) AS elt(2, 123, NULL)#x]
307+
Project [elt(2, 123, cast(null as string), true) AS elt(2, 123, NULL)#x]
314308
+- OneRowRelation
315309

316310

@@ -383,21 +377,21 @@ org.apache.spark.sql.AnalysisException
383377
-- !query
384378
select size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)))
385379
-- !query analysis
386-
Project [size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2), true) AS size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x]
380+
Project [size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2), false) AS size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x]
387381
+- OneRowRelation
388382

389383

390384
-- !query
391385
select size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10)))
392386
-- !query analysis
393-
Project [size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2, 3), true) AS size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x]
387+
Project [size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2, 3), false) AS size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x]
394388
+- OneRowRelation
395389

396390

397391
-- !query
398392
select size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10)))
399393
-- !query analysis
400-
Project [size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10), 0, 1, 2, 3), true) AS size(arrays_zip(array(1, 2, 3), array(4), NULL, array(7, 8, 9, 10)))#x]
394+
Project [size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10), 0, 1, 2, 3), false) AS size(arrays_zip(array(1, 2, 3), array(4), NULL, array(7, 8, 9, 10)))#x]
401395
+- OneRowRelation
402396

403397

0 commit comments

Comments
 (0)