Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,285 @@
-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
--
-- Window Functions Testing
-- https://github.com/postgres/postgres/blob/REL_12_STABLE/src/test/regress/sql/window.sql#L320-562

CREATE TABLE empsalary (
depname string,
empno integer,
salary int,
enroll_date date
) USING parquet;

-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp
-- CREATE TEMP VIEW v_window AS
-- SELECT i, min(i) over (order by i range between '1 day' preceding and '10 days' following) as min_i
-- FROM range(now(), now()+'100 days', '1 hour') i;

-- RANGE offset PRECEDING/FOLLOWING tests

SELECT sum(unique1) over (order by four range between 2 preceding and 1 preceding),
unique1, four
FROM tenk1 WHERE unique1 < 10;

SELECT sum(unique1) over (order by four desc range between 2 preceding and 1 preceding),
unique1, four
FROM tenk1 WHERE unique1 < 10;

-- [SPARK-28428] Spark `exclude` always expecting `()`
-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 preceding exclude no others),
-- unique1, four
-- FROM tenk1 WHERE unique1 < 10;

-- [SPARK-28428] Spark `exclude` always expecting `()`
-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 preceding exclude current row),
-- unique1, four
-- FROM tenk1 WHERE unique1 < 10;

-- [SPARK-28428] Spark `exclude` always expecting `()`
-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 preceding exclude group),
-- unique1, four
-- FROM tenk1 WHERE unique1 < 10;

-- [SPARK-28428] Spark `exclude` always expecting `()`
-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 preceding exclude ties),
-- unique1, four
-- FROM tenk1 WHERE unique1 < 10;

-- [SPARK-28428] Spark `exclude` always expecting `()`
-- SELECT sum(unique1) over (order by four range between 2 preceding and 6 following exclude ties),
-- unique1, four
-- FROM tenk1 WHERE unique1 < 10;

-- [SPARK-28428] Spark `exclude` always expecting `()`
-- SELECT sum(unique1) over (order by four range between 2 preceding and 6 following exclude group),
-- unique1, four
-- FROM tenk1 WHERE unique1 < 10;

SELECT sum(unique1) over (partition by four order by unique1 range between 5 preceding and 6 following),
unique1, four
FROM tenk1 WHERE unique1 < 10;

-- [SPARK-28428] Spark `exclude` always expecting `()`
-- SELECT sum(unique1) over (partition by four order by unique1 range between 5 preceding and 6 following
-- exclude current row),unique1, four
-- FROM tenk1 WHERE unique1 < 10;

-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp
-- select sum(salary) over (order by enroll_date range between '1 year' preceding and '1 year' following),
-- salary, enroll_date from empsalary;

-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp
-- select sum(salary) over (order by enroll_date desc range between '1 year' preceding and '1 year' following),
-- salary, enroll_date from empsalary;

-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp
-- select sum(salary) over (order by enroll_date desc range between '1 year' following and '1 year' following),
-- salary, enroll_date from empsalary;

-- [SPARK-28428] Spark `exclude` always expecting `()`
-- select sum(salary) over (order by enroll_date range between '1 year' preceding and '1 year' following
-- exclude current row), salary, enroll_date from empsalary;

-- [SPARK-28428] Spark `exclude` always expecting `()`
-- select sum(salary) over (order by enroll_date range between '1 year' preceding and '1 year' following
-- exclude group), salary, enroll_date from empsalary;

-- [SPARK-28428] Spark `exclude` always expecting `()`
-- select sum(salary) over (order by enroll_date range between '1 year' preceding and '1 year' following
-- exclude ties), salary, enroll_date from empsalary;

-- [SPARK-27951] ANSI SQL: NTH_VALUE function
-- select first_value(salary) over(order by salary range between 1000 preceding and 1000 following),
-- lead(salary) over(order by salary range between 1000 preceding and 1000 following),
-- nth_value(salary, 1) over(order by salary range between 1000 preceding and 1000 following),
-- salary from empsalary;

select last(salary) over(order by salary range between 1000 preceding and 1000 following),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: It would be better if the SQL keywords are all in UPPERCASE.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can make them uppercase, but, are you sure? I think that in PostgreSQL they are not always uppercase, such that the diff will be totally different.

lag(salary) over(order by salary range between 1000 preceding and 1000 following),
salary from empsalary;

-- [SPARK-27951] ANSI SQL: NTH_VALUE function
-- select first_value(salary) over(order by salary range between 1000 following and 3000 following
-- exclude current row),
-- lead(salary) over(order by salary range between 1000 following and 3000 following exclude ties),
-- nth_value(salary, 1) over(order by salary range between 1000 following and 3000 following
-- exclude ties),
-- salary from empsalary;

-- [SPARK-28428] Spark `exclude` always expecting `()`
-- select last(salary) over(order by salary range between 1000 following and 3000 following
-- exclude group),
-- lag(salary) over(order by salary range between 1000 following and 3000 following exclude group),
-- salary from empsalary;

-- [SPARK-28428] Spark `exclude` always expecting `()`
-- select first(salary) over(order by enroll_date range between unbounded preceding and '1 year' following
-- exclude ties),
-- last(salary) over(order by enroll_date range between unbounded preceding and '1 year' following),
-- salary, enroll_date from empsalary;

-- [SPARK-28428] Spark `exclude` always expecting `()`
-- select first(salary) over(order by enroll_date range between unbounded preceding and '1 year' following
-- exclude ties),
-- last(salary) over(order by enroll_date range between unbounded preceding and '1 year' following
-- exclude ties),
-- salary, enroll_date from empsalary;

-- [SPARK-28428] Spark `exclude` always expecting `()`
-- select first(salary) over(order by enroll_date range between unbounded preceding and '1 year' following
-- exclude group),
-- last(salary) over(order by enroll_date range between unbounded preceding and '1 year' following
-- exclude group),
-- salary, enroll_date from empsalary;

-- [SPARK-28428] Spark `exclude` always expecting `()`
-- select first(salary) over(order by enroll_date range between unbounded preceding and '1 year' following
-- exclude current row),
-- last(salary) over(order by enroll_date range between unbounded preceding and '1 year' following
-- exclude current row),
-- salary, enroll_date from empsalary;

-- RANGE offset PRECEDING/FOLLOWING with null values
select ss.id, ss.y,
first(ss.y) over w,
last(ss.y) over w
from
(select x.id, x.id as y from range(1,6) as x
union all select null, 42
union all select null, 43) ss
window w as
(order by ss.id asc nulls first range between 2 preceding and 2 following);

select ss.id, ss.y,
first(ss.y) over w,
last(ss.y) over w
from
(select x.id, x.id as y from range(1,6) as x
union all select null, 42
union all select null, 43) ss
window w as
(order by ss.id asc nulls last range between 2 preceding and 2 following);

select ss.id, ss.y,
first(ss.y) over w,
last(ss.y) over w
from
(select x.id, x.id as y from range(1,6) as x
union all select null, 42
union all select null, 43) ss
window w as
(order by ss.id desc nulls first range between 2 preceding and 2 following);

select ss.id, ss.y,
first(ss.y) over w,
last(ss.y) over w
from
(select x.id, x.id as y from range(1,6) as x
union all select null, 42
union all select null, 43) ss
window w as
(order by ss.id desc nulls last range between 2 preceding and 2 following);

-- Check overflow behavior for various integer sizes

select x.id, last(x.id) over (order by x.id range between current row and 2147450884 following)
from range(32764, 32767) x;

select x.id, last(x.id) over (order by x.id desc range between current row and 2147450885 following)
from range(-32766, -32765) x;

select x.id, last(x.id) over (order by x.id range between current row and 4 following)
from range(2147483644, 2147483647) x;

select x.id, last(x.id) over (order by x.id desc range between current row and 5 following)
from range(-2147483646, -2147483645) x;

select x.id, last(x.id) over (order by x.id range between current row and 4 following)
from range(9223372036854775804, 9223372036854775807) x;

select x.id, last(x.id) over (order by x.id desc range between current row and 5 following)
from range(-9223372036854775806, -9223372036854775805) x;

-- Test in_range for other numeric datatypes

create table numerics (
id int,
f_float4 float,
f_float8 float,
f_numeric int
) using parquet;

insert into numerics values
(1, -3, -3, -3),
(2, -1, -1, -1),
(3, 0, 0, 0),
(4, 1.1, 1.1, 1.1),
(5, 1.12, 1.12, 1.12),
(6, 2, 2, 2),
(7, 100, 100, 100);
-- (8, 'infinity', 'infinity', '1000'),
-- (9, 'NaN', 'NaN', 'NaN'),
-- (0, '-infinity', '-infinity', '-1000'); -- numeric type lacks infinities

select id, f_float4, first(id) over w, last(id) over w
from numerics
window w as (order by f_float4 range between
1 preceding and 1 following);

select id, f_float4, first(id) over w, last(id) over w
from numerics
window w as (order by f_float4 range between
1 preceding and 1.1 following);

select id, f_float4, first(id) over w, last(id) over w
from numerics
window w as (order by f_float4 range between
'inf' preceding and 'inf' following);

select id, f_float4, first(id) over w, last(id) over w
from numerics
window w as (order by f_float4 range between
1.1 preceding and 'NaN' following); -- error, NaN disallowed

select id, f_float8, first(id) over w, last(id) over w
from numerics
window w as (order by f_float8 range between
1 preceding and 1 following);

select id, f_float8, first(id) over w, last(id) over w
from numerics
window w as (order by f_float8 range between
1 preceding and 1.1 following);

select id, f_float8, first(id) over w, last(id) over w
from numerics
window w as (order by f_float8 range between
'inf' preceding and 'inf' following);

select id, f_float8, first(id) over w, last(id) over w
from numerics
window w as (order by f_float8 range between
1.1 preceding and 'NaN' following); -- error, NaN disallowed

select id, f_numeric, first(id) over w, last(id) over w
from numerics
window w as (order by f_numeric range between
1 preceding and 1 following);

select id, f_numeric, first(id) over w, last(id) over w
from numerics
window w as (order by f_numeric range between
1 preceding and 1.1 following);

select id, f_numeric, first(id) over w, last(id) over w
from numerics
window w as (order by f_numeric range between
1 preceding and 1.1 following); -- currently unsupported

select id, f_numeric, first(id) over w, last(id) over w
from numerics
window w as (order by f_numeric range between
1.1 preceding and 'NaN' following); -- error, NaN disallowed

drop table empsalary;
drop table numerics;
Loading