Skip to content

Commit 399aa01

Browse files
jiangxb1987cloud-fan
authored andcommitted
[SPARK-21366][SQL][TEST] Add sql test for window functions
## What changes were proposed in this pull request? Add sql test for window functions, also remove uncecessary test cases in `WindowQuerySuite`. ## How was this patch tested? Added `window.sql` and the corresponding output file. Author: Xingbo Jiang <[email protected]> Closes #18591 from jiangxb1987/window. (cherry picked from commit 66d2168) Signed-off-by: Wenchen Fan <[email protected]>
1 parent edcd9fb commit 399aa01

File tree

3 files changed

+273
-27
lines changed

3 files changed

+273
-27
lines changed
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
-- Test data.
2+
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
3+
(null, "a"), (1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"), (null, null), (3, null)
4+
AS testData(val, cate);
5+
6+
-- RowsBetween
7+
SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData
8+
ORDER BY cate, val;
9+
SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
10+
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
11+
12+
-- RangeBetween
13+
SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val RANGE 1 PRECEDING) FROM testData
14+
ORDER BY cate, val;
15+
SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
16+
RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
17+
18+
-- RangeBetween with reverse OrderBy
19+
SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val DESC
20+
RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
21+
22+
-- Window functions
23+
SELECT val, cate,
24+
max(val) OVER w AS max,
25+
min(val) OVER w AS min,
26+
min(val) OVER w AS min,
27+
count(val) OVER w AS count,
28+
sum(val) OVER w AS sum,
29+
avg(val) OVER w AS avg,
30+
stddev(val) OVER w AS stddev,
31+
first_value(val) OVER w AS first_value,
32+
first_value(val, true) OVER w AS first_value_ignore_null,
33+
first_value(val, false) OVER w AS first_value_contain_null,
34+
last_value(val) OVER w AS last_value,
35+
last_value(val, true) OVER w AS last_value_ignore_null,
36+
last_value(val, false) OVER w AS last_value_contain_null,
37+
rank() OVER w AS rank,
38+
dense_rank() OVER w AS dense_rank,
39+
cume_dist() OVER w AS cume_dist,
40+
percent_rank() OVER w AS percent_rank,
41+
ntile(2) OVER w AS ntile,
42+
row_number() OVER w AS row_number,
43+
var_pop(val) OVER w AS var_pop,
44+
var_samp(val) OVER w AS var_samp,
45+
approx_count_distinct(val) OVER w AS approx_count_distinct
46+
FROM testData
47+
WINDOW w AS (PARTITION BY cate ORDER BY val)
48+
ORDER BY cate, val;
49+
50+
-- Null inputs
51+
SELECT val, cate, avg(null) OVER(PARTITION BY cate ORDER BY val) FROM testData ORDER BY cate, val;
52+
53+
-- OrderBy not specified
54+
SELECT val, cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER BY cate, val;
55+
56+
-- Over clause is empty
57+
SELECT val, cate, sum(val) OVER(), avg(val) OVER() FROM testData ORDER BY cate, val;
58+
59+
-- first_value()/last_value() over ()
60+
SELECT val, cate,
61+
first_value(false) OVER w AS first_value,
62+
first_value(true, true) OVER w AS first_value_ignore_null,
63+
first_value(false, false) OVER w AS first_value_contain_null,
64+
last_value(false) OVER w AS last_value,
65+
last_value(true, true) OVER w AS last_value_ignore_null,
66+
last_value(false, false) OVER w AS last_value_contain_null
67+
FROM testData
68+
WINDOW w AS ()
69+
ORDER BY cate, val;
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
-- Automatically generated by SQLQueryTestSuite
2+
-- Number of queries: 11
3+
4+
5+
-- !query 0
6+
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
7+
(null, "a"), (1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"), (null, null), (3, null)
8+
AS testData(val, cate)
9+
-- !query 0 schema
10+
struct<>
11+
-- !query 0 output
12+
13+
14+
15+
-- !query 1
16+
SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData
17+
ORDER BY cate, val
18+
-- !query 1 schema
19+
struct<val:int,cate:string,count(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND CURRENT ROW):bigint>
20+
-- !query 1 output
21+
NULL NULL 0
22+
3 NULL 1
23+
NULL a 0
24+
1 a 1
25+
1 a 1
26+
2 a 1
27+
1 b 1
28+
2 b 1
29+
3 b 1
30+
31+
32+
-- !query 2
33+
SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
34+
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) FROM testData ORDER BY cate, val
35+
-- !query 2 schema
36+
struct<val:int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING):bigint>
37+
-- !query 2 output
38+
NULL NULL 3
39+
3 NULL 3
40+
NULL a 1
41+
1 a 2
42+
1 a 4
43+
2 a 4
44+
1 b 3
45+
2 b 6
46+
3 b 6
47+
48+
49+
-- !query 3
50+
SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val RANGE 1 PRECEDING) FROM testData
51+
ORDER BY cate, val
52+
-- !query 3 schema
53+
struct<val:int,cate:string,count(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN 1 PRECEDING AND CURRENT ROW):bigint>
54+
-- !query 3 output
55+
NULL NULL 0
56+
3 NULL 1
57+
NULL a 0
58+
1 a 2
59+
1 a 2
60+
2 a 3
61+
1 b 1
62+
2 b 2
63+
3 b 2
64+
65+
66+
-- !query 4
67+
SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
68+
RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
69+
-- !query 4 schema
70+
struct<val:int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint>
71+
-- !query 4 output
72+
NULL NULL NULL
73+
3 NULL 3
74+
NULL a NULL
75+
1 a 4
76+
1 a 4
77+
2 a 2
78+
1 b 3
79+
2 b 5
80+
3 b 3
81+
82+
83+
-- !query 5
84+
SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val DESC
85+
RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
86+
-- !query 5 schema
87+
struct<val:int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint>
88+
-- !query 5 output
89+
NULL NULL NULL
90+
3 NULL 3
91+
NULL a NULL
92+
1 a 2
93+
1 a 2
94+
2 a 4
95+
1 b 1
96+
2 b 3
97+
3 b 5
98+
99+
100+
-- !query 6
101+
SELECT val, cate,
102+
max(val) OVER w AS max,
103+
min(val) OVER w AS min,
104+
min(val) OVER w AS min,
105+
count(val) OVER w AS count,
106+
sum(val) OVER w AS sum,
107+
avg(val) OVER w AS avg,
108+
stddev(val) OVER w AS stddev,
109+
first_value(val) OVER w AS first_value,
110+
first_value(val, true) OVER w AS first_value_ignore_null,
111+
first_value(val, false) OVER w AS first_value_contain_null,
112+
last_value(val) OVER w AS last_value,
113+
last_value(val, true) OVER w AS last_value_ignore_null,
114+
last_value(val, false) OVER w AS last_value_contain_null,
115+
rank() OVER w AS rank,
116+
dense_rank() OVER w AS dense_rank,
117+
cume_dist() OVER w AS cume_dist,
118+
percent_rank() OVER w AS percent_rank,
119+
ntile(2) OVER w AS ntile,
120+
row_number() OVER w AS row_number,
121+
var_pop(val) OVER w AS var_pop,
122+
var_samp(val) OVER w AS var_samp,
123+
approx_count_distinct(val) OVER w AS approx_count_distinct
124+
FROM testData
125+
WINDOW w AS (PARTITION BY cate ORDER BY val)
126+
ORDER BY cate, val
127+
-- !query 6 schema
128+
struct<val:int,cate:string,max:int,min:int,min:int,count:bigint,sum:bigint,avg:double,stddev:double,first_value:int,first_value_ignore_null:int,first_value_contain_null:int,last_value:int,last_value_ignore_null:int,last_value_contain_null:int,rank:int,dense_rank:int,cume_dist:double,percent_rank:double,ntile:int,row_number:int,var_pop:double,var_samp:double,approx_count_distinct:bigint>
129+
-- !query 6 output
130+
NULL NULL NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 1 0.5 0.0 1 1 NULL NULL 0
131+
3 NULL 3 3 3 1 3 3.0 NaN NULL 3 NULL 3 3 3 2 2 1.0 1.0 2 2 0.0 NaN 1
132+
NULL a NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 1 0.25 0.0 1 1 NULL NULL 0
133+
1 a 1 1 1 2 2 1.0 0.0 NULL 1 NULL 1 1 1 2 2 0.75 0.3333333333333333 1 2 0.0 0.0 1
134+
1 a 1 1 1 2 2 1.0 0.0 NULL 1 NULL 1 1 1 2 2 0.75 0.3333333333333333 2 3 0.0 0.0 1
135+
2 a 2 1 1 3 4 1.3333333333333333 0.5773502691896258 NULL 1 NULL 2 2 2 4 3 1.0 1.0 2 4 0.22222222222222224 0.33333333333333337 2
136+
1 b 1 1 1 1 1 1.0 NaN 1 1 1 1 1 1 1 1 0.3333333333333333 0.0 1 1 0.0 NaN 1
137+
2 b 2 1 1 2 3 1.5 0.7071067811865476 1 1 1 2 2 2 2 2 0.6666666666666666 0.5 1 2 0.25 0.5 2
138+
3 b 3 1 1 3 6 2.0 1.0 1 1 1 3 3 3 3 3 1.0 1.0 2 3 0.6666666666666666 1.0 3
139+
140+
141+
-- !query 7
142+
SELECT val, cate, avg(null) OVER(PARTITION BY cate ORDER BY val) FROM testData ORDER BY cate, val
143+
-- !query 7 schema
144+
struct<val:int,cate:string,avg(CAST(NULL AS DOUBLE)) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double>
145+
-- !query 7 output
146+
NULL NULL NULL
147+
3 NULL NULL
148+
NULL a NULL
149+
1 a NULL
150+
1 a NULL
151+
2 a NULL
152+
1 b NULL
153+
2 b NULL
154+
3 b NULL
155+
156+
157+
-- !query 8
158+
SELECT val, cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER BY cate, val
159+
-- !query 8 schema
160+
struct<>
161+
-- !query 8 output
162+
org.apache.spark.sql.AnalysisException
163+
Window function row_number() requires window to be ordered, please add ORDER BY clause. For example SELECT row_number()(value_expr) OVER (PARTITION BY window_partition ORDER BY window_ordering) from table;
164+
165+
166+
-- !query 9
167+
SELECT val, cate, sum(val) OVER(), avg(val) OVER() FROM testData ORDER BY cate, val
168+
-- !query 9 schema
169+
struct<val:int,cate:string,sum(CAST(val AS BIGINT)) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint,avg(CAST(val AS BIGINT)) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double>
170+
-- !query 9 output
171+
NULL NULL 13 1.8571428571428572
172+
3 NULL 13 1.8571428571428572
173+
NULL a 13 1.8571428571428572
174+
1 a 13 1.8571428571428572
175+
1 a 13 1.8571428571428572
176+
2 a 13 1.8571428571428572
177+
1 b 13 1.8571428571428572
178+
2 b 13 1.8571428571428572
179+
3 b 13 1.8571428571428572
180+
181+
182+
-- !query 10
183+
SELECT val, cate,
184+
first_value(false) OVER w AS first_value,
185+
first_value(true, true) OVER w AS first_value_ignore_null,
186+
first_value(false, false) OVER w AS first_value_contain_null,
187+
last_value(false) OVER w AS last_value,
188+
last_value(true, true) OVER w AS last_value_ignore_null,
189+
last_value(false, false) OVER w AS last_value_contain_null
190+
FROM testData
191+
WINDOW w AS ()
192+
ORDER BY cate, val
193+
-- !query 10 schema
194+
struct<val:int,cate:string,first_value:boolean,first_value_ignore_null:boolean,first_value_contain_null:boolean,last_value:boolean,last_value_ignore_null:boolean,last_value_contain_null:boolean>
195+
-- !query 10 output
196+
NULL NULL false true false false true false
197+
3 NULL false true false false true false
198+
NULL a false true false false true false
199+
1 a false true false false true false
200+
1 a false true false false true false
201+
2 a false true false false true false
202+
1 b false true false false true false
203+
2 b false true false false true false
204+
3 b false true false false true false

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -232,31 +232,4 @@ class WindowQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleto
232232
Row("Manufacturer#5", "almond azure blanched chiffon midnight", 23, 315.9225931564038, 315.9225931564038, 46, 99807.08486666666, -0.9978877469246935, -5664.856666666666)))
233233
// scalastyle:on
234234
}
235-
236-
test("null arguments") {
237-
checkAnswer(sql("""
238-
|select p_mfgr, p_name, p_size,
239-
|sum(null) over(distribute by p_mfgr sort by p_name) as sum,
240-
|avg(null) over(distribute by p_mfgr sort by p_name) as avg
241-
|from part
242-
""".stripMargin),
243-
sql("""
244-
|select p_mfgr, p_name, p_size,
245-
|null as sum,
246-
|null as avg
247-
|from part
248-
""".stripMargin))
249-
}
250-
251-
test("SPARK-16646: LAST_VALUE(FALSE) OVER ()") {
252-
checkAnswer(sql("SELECT LAST_VALUE(FALSE) OVER ()"), Row(false))
253-
checkAnswer(sql("SELECT LAST_VALUE(FALSE, FALSE) OVER ()"), Row(false))
254-
checkAnswer(sql("SELECT LAST_VALUE(TRUE, TRUE) OVER ()"), Row(true))
255-
}
256-
257-
test("SPARK-16646: FIRST_VALUE(FALSE) OVER ()") {
258-
checkAnswer(sql("SELECT FIRST_VALUE(FALSE) OVER ()"), Row(false))
259-
checkAnswer(sql("SELECT FIRST_VALUE(FALSE, FALSE) OVER ()"), Row(false))
260-
checkAnswer(sql("SELECT FIRST_VALUE(TRUE, TRUE) OVER ()"), Row(true))
261-
}
262235
}

0 commit comments

Comments
 (0)