Skip to content

Commit 7d43b4c

Browse files
nsycacmonkey
authored andcommitted
[SPARK-18873][SQL][TEST] New test cases for scalar subquery (part 2 of 2) - scalar subquery in predicate context
## What changes were proposed in this pull request? This PR adds new test cases for scalar subquery in predicate context ## How was this patch tested? The test result is compared with the result run from another SQL engine (in this case is IBM DB2). If the result are equivalent, we assume the result is correct. Author: Nattavut Sutyanyong <[email protected]> Closes apache#16798 from nsyca/18873-2.
1 parent b06a4fc commit 7d43b4c

File tree

4 files changed

+701
-66
lines changed

4 files changed

+701
-66
lines changed

sql/core/src/test/resources/sql-tests/inputs/scalar-subquery.sql

Lines changed: 0 additions & 20 deletions
This file was deleted.
Lines changed: 271 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,271 @@
1+
-- A test suite for scalar subquery in predicate context
2+
3+
CREATE OR REPLACE TEMPORARY VIEW p AS VALUES (1, 1) AS T(pk, pv);
4+
CREATE OR REPLACE TEMPORARY VIEW c AS VALUES (1, 1) AS T(ck, cv);
5+
6+
-- SPARK-18814.1: Simplified version of TPCDS-Q32
7+
SELECT pk, cv
8+
FROM p, c
9+
WHERE p.pk = c.ck
10+
AND c.cv = (SELECT avg(c1.cv)
11+
FROM c c1
12+
WHERE c1.ck = p.pk);
13+
14+
-- SPARK-18814.2: Adding stack of aggregates
15+
SELECT pk, cv
16+
FROM p, c
17+
WHERE p.pk = c.ck
18+
AND c.cv = (SELECT max(avg)
19+
FROM (SELECT c1.cv, avg(c1.cv) avg
20+
FROM c c1
21+
WHERE c1.ck = p.pk
22+
GROUP BY c1.cv));
23+
24+
create temporary view t1 as select * from values
25+
('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'),
26+
('val1b', 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
27+
('val1a', 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
28+
('val1a', 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
29+
('val1c', 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
30+
('val1d', null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null),
31+
('val1d', null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null),
32+
('val1e', 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
33+
('val1e', 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
34+
('val1d', 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
35+
('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
36+
('val1e', 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
37+
as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i);
38+
39+
create temporary view t2 as select * from values
40+
('val2a', 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
41+
('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
42+
('val1b', 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
43+
('val1c', 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
44+
('val1b', null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null),
45+
('val2e', 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
46+
('val1f', 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
47+
('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
48+
('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
49+
('val1c', 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
50+
('val1e', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
51+
('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
52+
('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
53+
as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i);
54+
55+
create temporary view t3 as select * from values
56+
('val3a', 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
57+
('val3a', 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
58+
('val1b', 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
59+
('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
60+
('val1b', 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
61+
('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
62+
('val3c', 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
63+
('val3c', 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
64+
('val1b', null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null),
65+
('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
66+
('val3b', 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
67+
('val3b', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
68+
as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i);
69+
70+
-- Group 1: scalar subquery in predicate context
71+
-- no correlation
72+
-- TC 01.01
73+
SELECT t1a, t1b
74+
FROM t1
75+
WHERE t1c = (SELECT max(t2c)
76+
FROM t2);
77+
78+
-- TC 01.02
79+
SELECT t1a, t1d, t1f
80+
FROM t1
81+
WHERE t1c = (SELECT max(t2c)
82+
FROM t2)
83+
AND t1b > (SELECT min(t3b)
84+
FROM t3);
85+
86+
-- TC 01.03
87+
SELECT t1a, t1h
88+
FROM t1
89+
WHERE t1c = (SELECT max(t2c)
90+
FROM t2)
91+
OR t1b = (SELECT min(t3b)
92+
FROM t3
93+
WHERE t3b > 10);
94+
95+
-- TC 01.04
96+
-- scalar subquery over outer join
97+
SELECT t1a, t1b, t2d
98+
FROM t1 LEFT JOIN t2
99+
ON t1a = t2a
100+
WHERE t1b = (SELECT min(t3b)
101+
FROM t3);
102+
103+
-- TC 01.05
104+
-- test casting
105+
SELECT t1a, t1b, t1g
106+
FROM t1
107+
WHERE t1c + 5 = (SELECT max(t2e)
108+
FROM t2);
109+
110+
-- TC 01.06
111+
-- test casting
112+
SELECT t1a, t1h
113+
FROM t1
114+
WHERE date(t1h) = (SELECT min(t2i)
115+
FROM t2);
116+
117+
-- TC 01.07
118+
-- same table, expressions in scalar subquery
119+
SELECT t2d, t1a
120+
FROM t1, t2
121+
WHERE t1b = t2b
122+
AND t2c + 1 = (SELECT max(t2c) + 1
123+
FROM t2, t1
124+
WHERE t2b = t1b);
125+
126+
-- TC 01.08
127+
-- same table
128+
SELECT DISTINCT t2a, max_t1g
129+
FROM t2, (SELECT max(t1g) max_t1g, t1a
130+
FROM t1
131+
GROUP BY t1a) t1
132+
WHERE t2a = t1a
133+
AND max_t1g = (SELECT max(t1g)
134+
FROM t1);
135+
136+
-- TC 01.09
137+
-- more than one scalar subquery
138+
SELECT t3b, t3c
139+
FROM t3
140+
WHERE (SELECT max(t3c)
141+
FROM t3
142+
WHERE t3b > 10) >=
143+
(SELECT min(t3b)
144+
FROM t3
145+
WHERE t3c > 0)
146+
AND (t3b is null or t3c is null);
147+
148+
-- Group 2: scalar subquery in predicate context
149+
-- with correlation
150+
-- TC 02.01
151+
SELECT t1a
152+
FROM t1
153+
WHERE t1a < (SELECT max(t2a)
154+
FROM t2
155+
WHERE t2c = t1c
156+
GROUP BY t2c);
157+
158+
-- TC 02.02
159+
SELECT t1a, t1c
160+
FROM t1
161+
WHERE (SELECT max(t2a)
162+
FROM t2
163+
WHERE t2c = t1c
164+
GROUP BY t2c) IS NULL;
165+
166+
-- TC 02.03
167+
SELECT t1a
168+
FROM t1
169+
WHERE t1a = (SELECT max(t2a)
170+
FROM t2
171+
WHERE t2c = t1c
172+
GROUP BY t2c
173+
HAVING count(*) >= 0)
174+
OR t1i > '2014-12-31';
175+
176+
-- TC 02.04
177+
-- t1 on the right of an outer join
178+
-- can be reduced to inner join
179+
SELECT count(t1a)
180+
FROM t1 RIGHT JOIN t2
181+
ON t1d = t2d
182+
WHERE t1a < (SELECT max(t2a)
183+
FROM t2
184+
WHERE t2c = t1c
185+
GROUP BY t2c);
186+
187+
-- TC 02.05
188+
SELECT t1a
189+
FROM t1
190+
WHERE t1b <= (SELECT max(t2b)
191+
FROM t2
192+
WHERE t2c = t1c
193+
GROUP BY t2c)
194+
AND t1b >= (SELECT min(t2b)
195+
FROM t2
196+
WHERE t2c = t1c
197+
GROUP BY t2c);
198+
199+
-- TC 02.06
200+
-- set op
201+
SELECT t1a
202+
FROM t1
203+
WHERE t1a <= (SELECT max(t2a)
204+
FROM t2
205+
WHERE t2c = t1c
206+
GROUP BY t2c)
207+
INTERSECT
208+
SELECT t1a
209+
FROM t1
210+
WHERE t1a >= (SELECT min(t2a)
211+
FROM t2
212+
WHERE t2c = t1c
213+
GROUP BY t2c);
214+
215+
-- TC 02.07.01
216+
-- set op
217+
SELECT t1a
218+
FROM t1
219+
WHERE t1a <= (SELECT max(t2a)
220+
FROM t2
221+
WHERE t2c = t1c
222+
GROUP BY t2c)
223+
UNION ALL
224+
SELECT t1a
225+
FROM t1
226+
WHERE t1a >= (SELECT min(t2a)
227+
FROM t2
228+
WHERE t2c = t1c
229+
GROUP BY t2c);
230+
231+
-- TC 02.07.02
232+
-- set op
233+
SELECT t1a
234+
FROM t1
235+
WHERE t1a <= (SELECT max(t2a)
236+
FROM t2
237+
WHERE t2c = t1c
238+
GROUP BY t2c)
239+
UNION DISTINCT
240+
SELECT t1a
241+
FROM t1
242+
WHERE t1a >= (SELECT min(t2a)
243+
FROM t2
244+
WHERE t2c = t1c
245+
GROUP BY t2c);
246+
247+
-- TC 02.08
248+
-- set op
249+
SELECT t1a
250+
FROM t1
251+
WHERE t1a <= (SELECT max(t2a)
252+
FROM t2
253+
WHERE t2c = t1c
254+
GROUP BY t2c)
255+
MINUS
256+
SELECT t1a
257+
FROM t1
258+
WHERE t1a >= (SELECT min(t2a)
259+
FROM t2
260+
WHERE t2c = t1c
261+
GROUP BY t2c);
262+
263+
-- TC 02.09
264+
-- in HAVING clause
265+
SELECT t1a
266+
FROM t1
267+
GROUP BY t1a, t1c
268+
HAVING max(t1b) <= (SELECT max(t2b)
269+
FROM t2
270+
WHERE t2c = t1c
271+
GROUP BY t2c);

sql/core/src/test/resources/sql-tests/results/scalar-subquery.sql.out

Lines changed: 0 additions & 46 deletions
This file was deleted.

0 commit comments

Comments
 (0)