From 2d86681e0ba8bc5ef13f5630fc640d4042ddba7a Mon Sep 17 00:00:00 2001 From: DylanGuedes Date: Mon, 14 Oct 2019 21:29:24 -0300 Subject: [PATCH] adds window_part2.sql Signed-off-by: DylanGuedes --- .../inputs/postgreSQL/window_part2.sql | 285 +++++++++++ .../results/postgreSQL/window_part2.sql.out | 479 ++++++++++++++++++ 2 files changed, 764 insertions(+) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part2.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part2.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part2.sql new file mode 100644 index 000000000000..728e8cab0c3b --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part2.sql @@ -0,0 +1,285 @@ +-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group +-- +-- Window Functions Testing +-- https://github.com/postgres/postgres/blob/REL_12_STABLE/src/test/regress/sql/window.sql#L320-562 + +CREATE TABLE empsalary ( + depname string, + empno integer, + salary int, + enroll_date date +) USING parquet; + +-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp +-- CREATE TEMP VIEW v_window AS +-- SELECT i, min(i) over (order by i range between '1 day' preceding and '10 days' following) as min_i +-- FROM range(now(), now()+'100 days', '1 hour') i; + +-- RANGE offset PRECEDING/FOLLOWING tests + +SELECT sum(unique1) over (order by four range between 2 preceding and 1 preceding), +unique1, four +FROM tenk1 WHERE unique1 < 10; + +SELECT sum(unique1) over (order by four desc range between 2 preceding and 1 preceding), +unique1, four +FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 preceding exclude no others), +-- unique1, four +-- FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 preceding exclude current row), +-- unique1, four +-- FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 preceding exclude group), +-- unique1, four +-- FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 preceding exclude ties), +-- unique1, four +-- FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- SELECT sum(unique1) over (order by four range between 2 preceding and 6 following exclude ties), +-- unique1, four +-- FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- SELECT sum(unique1) over (order by four range between 2 preceding and 6 following exclude group), +-- unique1, four +-- FROM tenk1 WHERE unique1 < 10; + +SELECT sum(unique1) over (partition by four order by unique1 range between 5 preceding and 6 following), +unique1, four +FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- SELECT sum(unique1) over (partition by four order by unique1 range between 5 preceding and 6 following +-- exclude current row),unique1, four +-- FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp +-- select sum(salary) over (order by enroll_date range between '1 year' preceding and '1 year' following), +-- salary, enroll_date from empsalary; + +-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp +-- select sum(salary) over (order by enroll_date desc range between '1 year' preceding and '1 year' following), +-- salary, enroll_date from empsalary; + +-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp +-- select sum(salary) over (order by enroll_date desc range between '1 year' following and '1 year' following), +-- salary, enroll_date from empsalary; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- select sum(salary) over (order by enroll_date range between '1 year' preceding and '1 year' following +-- exclude current row), salary, enroll_date from empsalary; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- select sum(salary) over (order by enroll_date range between '1 year' preceding and '1 year' following +-- exclude group), salary, enroll_date from empsalary; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- select sum(salary) over (order by enroll_date range between '1 year' preceding and '1 year' following +-- exclude ties), salary, enroll_date from empsalary; + +-- [SPARK-27951] ANSI SQL: NTH_VALUE function +-- select first_value(salary) over(order by salary range between 1000 preceding and 1000 following), +-- lead(salary) over(order by salary range between 1000 preceding and 1000 following), +-- nth_value(salary, 1) over(order by salary range between 1000 preceding and 1000 following), +-- salary from empsalary; + +select last(salary) over(order by salary range between 1000 preceding and 1000 following), +lag(salary) over(order by salary range between 1000 preceding and 1000 following), +salary from empsalary; + +-- [SPARK-27951] ANSI SQL: NTH_VALUE function +-- select first_value(salary) over(order by salary range between 1000 following and 3000 following +-- exclude current row), +-- lead(salary) over(order by salary range between 1000 following and 3000 following exclude ties), +-- nth_value(salary, 1) over(order by salary range between 1000 following and 3000 following +-- exclude ties), +-- salary from empsalary; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- select last(salary) over(order by salary range between 1000 following and 3000 following +-- exclude group), +-- lag(salary) over(order by salary range between 1000 following and 3000 following exclude group), +-- salary from empsalary; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- select first(salary) over(order by enroll_date range between unbounded preceding and '1 year' following +-- exclude ties), +-- last(salary) over(order by enroll_date range between unbounded preceding and '1 year' following), +-- salary, enroll_date from empsalary; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- select first(salary) over(order by enroll_date range between unbounded preceding and '1 year' following +-- exclude ties), +-- last(salary) over(order by enroll_date range between unbounded preceding and '1 year' following +-- exclude ties), +-- salary, enroll_date from empsalary; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- select first(salary) over(order by enroll_date range between unbounded preceding and '1 year' following +-- exclude group), +-- last(salary) over(order by enroll_date range between unbounded preceding and '1 year' following +-- exclude group), +-- salary, enroll_date from empsalary; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- select first(salary) over(order by enroll_date range between unbounded preceding and '1 year' following +-- exclude current row), +-- last(salary) over(order by enroll_date range between unbounded preceding and '1 year' following +-- exclude current row), +-- salary, enroll_date from empsalary; + +-- RANGE offset PRECEDING/FOLLOWING with null values +select ss.id, ss.y, + first(ss.y) over w, + last(ss.y) over w +from + (select x.id, x.id as y from range(1,6) as x + union all select null, 42 + union all select null, 43) ss +window w as + (order by ss.id asc nulls first range between 2 preceding and 2 following); + +select ss.id, ss.y, + first(ss.y) over w, + last(ss.y) over w +from + (select x.id, x.id as y from range(1,6) as x + union all select null, 42 + union all select null, 43) ss +window w as + (order by ss.id asc nulls last range between 2 preceding and 2 following); + +select ss.id, ss.y, + first(ss.y) over w, + last(ss.y) over w +from + (select x.id, x.id as y from range(1,6) as x + union all select null, 42 + union all select null, 43) ss +window w as + (order by ss.id desc nulls first range between 2 preceding and 2 following); + +select ss.id, ss.y, + first(ss.y) over w, + last(ss.y) over w +from + (select x.id, x.id as y from range(1,6) as x + union all select null, 42 + union all select null, 43) ss +window w as + (order by ss.id desc nulls last range between 2 preceding and 2 following); + +-- Check overflow behavior for various integer sizes + +select x.id, last(x.id) over (order by x.id range between current row and 2147450884 following) +from range(32764, 32767) x; + +select x.id, last(x.id) over (order by x.id desc range between current row and 2147450885 following) +from range(-32766, -32765) x; + +select x.id, last(x.id) over (order by x.id range between current row and 4 following) +from range(2147483644, 2147483647) x; + +select x.id, last(x.id) over (order by x.id desc range between current row and 5 following) +from range(-2147483646, -2147483645) x; + +select x.id, last(x.id) over (order by x.id range between current row and 4 following) +from range(9223372036854775804, 9223372036854775807) x; + +select x.id, last(x.id) over (order by x.id desc range between current row and 5 following) +from range(-9223372036854775806, -9223372036854775805) x; + +-- Test in_range for other numeric datatypes + +create table numerics ( + id int, + f_float4 float, + f_float8 float, + f_numeric int +) using parquet; + +insert into numerics values +(1, -3, -3, -3), +(2, -1, -1, -1), +(3, 0, 0, 0), +(4, 1.1, 1.1, 1.1), +(5, 1.12, 1.12, 1.12), +(6, 2, 2, 2), +(7, 100, 100, 100); +-- (8, 'infinity', 'infinity', '1000'), +-- (9, 'NaN', 'NaN', 'NaN'), +-- (0, '-infinity', '-infinity', '-1000'); -- numeric type lacks infinities + +select id, f_float4, first(id) over w, last(id) over w +from numerics +window w as (order by f_float4 range between + 1 preceding and 1 following); + +select id, f_float4, first(id) over w, last(id) over w +from numerics +window w as (order by f_float4 range between + 1 preceding and 1.1 following); + +select id, f_float4, first(id) over w, last(id) over w +from numerics +window w as (order by f_float4 range between + 'inf' preceding and 'inf' following); + +select id, f_float4, first(id) over w, last(id) over w +from numerics +window w as (order by f_float4 range between + 1.1 preceding and 'NaN' following); -- error, NaN disallowed + +select id, f_float8, first(id) over w, last(id) over w +from numerics +window w as (order by f_float8 range between + 1 preceding and 1 following); + +select id, f_float8, first(id) over w, last(id) over w +from numerics +window w as (order by f_float8 range between + 1 preceding and 1.1 following); + +select id, f_float8, first(id) over w, last(id) over w +from numerics +window w as (order by f_float8 range between + 'inf' preceding and 'inf' following); + +select id, f_float8, first(id) over w, last(id) over w +from numerics +window w as (order by f_float8 range between + 1.1 preceding and 'NaN' following); -- error, NaN disallowed + +select id, f_numeric, first(id) over w, last(id) over w +from numerics +window w as (order by f_numeric range between + 1 preceding and 1 following); + +select id, f_numeric, first(id) over w, last(id) over w +from numerics +window w as (order by f_numeric range between + 1 preceding and 1.1 following); + +select id, f_numeric, first(id) over w, last(id) over w +from numerics +window w as (order by f_numeric range between + 1 preceding and 1.1 following); -- currently unsupported + +select id, f_numeric, first(id) over w, last(id) over w +from numerics +window w as (order by f_numeric range between + 1.1 preceding and 'NaN' following); -- error, NaN disallowed + +drop table empsalary; +drop table numerics; diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out new file mode 100644 index 000000000000..0d2c78847b97 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out @@ -0,0 +1,479 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 31 + + +-- !query 0 +CREATE TABLE empsalary ( + depname string, + empno integer, + salary int, + enroll_date date +) USING parquet +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +SELECT sum(unique1) over (order by four range between 2 preceding and 1 preceding), +unique1, four +FROM tenk1 WHERE unique1 < 10 +-- !query 1 schema +struct +-- !query 1 output +12 1 1 +12 5 1 +12 9 1 +23 3 3 +23 7 3 +27 2 2 +27 6 2 +NULL 0 0 +NULL 4 0 +NULL 8 0 + + +-- !query 2 +SELECT sum(unique1) over (order by four desc range between 2 preceding and 1 preceding), +unique1, four +FROM tenk1 WHERE unique1 < 10 +-- !query 2 schema +struct +-- !query 2 output +10 2 2 +10 6 2 +18 1 1 +18 5 1 +18 9 1 +23 0 0 +23 4 0 +23 8 0 +NULL 3 3 +NULL 7 3 + + +-- !query 3 +SELECT sum(unique1) over (partition by four order by unique1 range between 5 preceding and 6 following), +unique1, four +FROM tenk1 WHERE unique1 < 10 +-- !query 3 schema +struct +-- !query 3 output +10 3 3 +10 7 3 +12 4 0 +12 8 0 +14 9 1 +15 5 1 +4 0 0 +6 1 1 +8 2 2 +8 6 2 + + +-- !query 4 +select last(salary) over(order by salary range between 1000 preceding and 1000 following), +lag(salary) over(order by salary range between 1000 preceding and 1000 following), +salary from empsalary +-- !query 4 schema +struct<> +-- !query 4 output +org.apache.spark.sql.AnalysisException +Window Frame specifiedwindowframe(RangeFrame, -1000, 1000) must match the required frame specifiedwindowframe(RowFrame, -1, -1); + + +-- !query 5 +select ss.id, ss.y, + first(ss.y) over w, + last(ss.y) over w +from + (select x.id, x.id as y from range(1,6) as x + union all select null, 42 + union all select null, 43) ss +window w as + (order by ss.id asc nulls first range between 2 preceding and 2 following) +-- !query 5 schema +struct +-- !query 5 output +1 1 1 3 +2 2 1 4 +3 3 1 5 +4 4 2 5 +5 5 3 5 +NULL 42 42 43 +NULL 43 42 43 + + +-- !query 6 +select ss.id, ss.y, + first(ss.y) over w, + last(ss.y) over w +from + (select x.id, x.id as y from range(1,6) as x + union all select null, 42 + union all select null, 43) ss +window w as + (order by ss.id asc nulls last range between 2 preceding and 2 following) +-- !query 6 schema +struct +-- !query 6 output +1 1 1 3 +2 2 1 4 +3 3 1 5 +4 4 2 5 +5 5 3 5 +NULL 42 42 43 +NULL 43 42 43 + + +-- !query 7 +select ss.id, ss.y, + first(ss.y) over w, + last(ss.y) over w +from + (select x.id, x.id as y from range(1,6) as x + union all select null, 42 + union all select null, 43) ss +window w as + (order by ss.id desc nulls first range between 2 preceding and 2 following) +-- !query 7 schema +struct +-- !query 7 output +1 1 3 1 +2 2 4 1 +3 3 5 1 +4 4 5 2 +5 5 5 3 +NULL 42 42 43 +NULL 43 42 43 + + +-- !query 8 +select ss.id, ss.y, + first(ss.y) over w, + last(ss.y) over w +from + (select x.id, x.id as y from range(1,6) as x + union all select null, 42 + union all select null, 43) ss +window w as + (order by ss.id desc nulls last range between 2 preceding and 2 following) +-- !query 8 schema +struct +-- !query 8 output +1 1 3 1 +2 2 4 1 +3 3 5 1 +4 4 5 2 +5 5 5 3 +NULL 42 42 43 +NULL 43 42 43 + + +-- !query 9 +select x.id, last(x.id) over (order by x.id range between current row and 2147450884 following) +from range(32764, 32767) x +-- !query 9 schema +struct +-- !query 9 output +32764 32766 +32765 32766 +32766 32766 + + +-- !query 10 +select x.id, last(x.id) over (order by x.id desc range between current row and 2147450885 following) +from range(-32766, -32765) x +-- !query 10 schema +struct +-- !query 10 output +-32766 -32766 + + +-- !query 11 +select x.id, last(x.id) over (order by x.id range between current row and 4 following) +from range(2147483644, 2147483647) x +-- !query 11 schema +struct +-- !query 11 output +2147483644 2147483646 +2147483645 2147483646 +2147483646 2147483646 + + +-- !query 12 +select x.id, last(x.id) over (order by x.id desc range between current row and 5 following) +from range(-2147483646, -2147483645) x +-- !query 12 schema +struct +-- !query 12 output +-2147483646 -2147483646 + + +-- !query 13 +select x.id, last(x.id) over (order by x.id range between current row and 4 following) +from range(9223372036854775804, 9223372036854775807) x +-- !query 13 schema +struct<> +-- !query 13 output +java.lang.ArithmeticException +long overflow + + +-- !query 14 +select x.id, last(x.id) over (order by x.id desc range between current row and 5 following) +from range(-9223372036854775806, -9223372036854775805) x +-- !query 14 schema +struct<> +-- !query 14 output +java.lang.ArithmeticException +long overflow + + +-- !query 15 +create table numerics ( + id int, + f_float4 float, + f_float8 float, + f_numeric int +) using parquet +-- !query 15 schema +struct<> +-- !query 15 output + + + +-- !query 16 +insert into numerics values +(1, -3, -3, -3), +(2, -1, -1, -1), +(3, 0, 0, 0), +(4, 1.1, 1.1, 1.1), +(5, 1.12, 1.12, 1.12), +(6, 2, 2, 2), +(7, 100, 100, 100) +-- !query 16 schema +struct<> +-- !query 16 output + + + +-- !query 17 +select id, f_float4, first(id) over w, last(id) over w +from numerics +window w as (order by f_float4 range between + 1 preceding and 1 following) +-- !query 17 schema +struct +-- !query 17 output +1 -3.0 1 1 +2 -1.0 2 3 +3 0.0 2 3 +4 1.1 4 6 +5 1.12 4 6 +6 2.0 4 6 +7 100.0 7 7 + + +-- !query 18 +select id, f_float4, first(id) over w, last(id) over w +from numerics +window w as (order by f_float4 range between + 1 preceding and 1.1 following) +-- !query 18 schema +struct +-- !query 18 output +1 -3.0 1 1 +2 -1.0 2 3 +3 0.0 2 4 +4 1.1 4 6 +5 1.12 4 6 +6 2.0 4 6 +7 100.0 7 7 + + +-- !query 19 +select id, f_float4, first(id) over w, last(id) over w +from numerics +window w as (order by f_float4 range between + 'inf' preceding and 'inf' following) +-- !query 19 schema +struct +-- !query 19 output +1 -3.0 1 7 +2 -1.0 1 7 +3 0.0 1 7 +4 1.1 1 7 +5 1.12 1 7 +6 2.0 1 7 +7 100.0 1 7 + + +-- !query 20 +select id, f_float4, first(id) over w, last(id) over w +from numerics +window w as (order by f_float4 range between + 1.1 preceding and 'NaN' following) +-- !query 20 schema +struct +-- !query 20 output +1 -3.0 1 7 +2 -1.0 2 7 +3 0.0 2 7 +4 1.1 3 7 +5 1.12 4 7 +6 2.0 4 7 +7 100.0 7 7 + + +-- !query 21 +select id, f_float8, first(id) over w, last(id) over w +from numerics +window w as (order by f_float8 range between + 1 preceding and 1 following) +-- !query 21 schema +struct +-- !query 21 output +1 -3.0 1 1 +2 -1.0 2 3 +3 0.0 2 3 +4 1.1 4 6 +5 1.12 4 6 +6 2.0 4 6 +7 100.0 7 7 + + +-- !query 22 +select id, f_float8, first(id) over w, last(id) over w +from numerics +window w as (order by f_float8 range between + 1 preceding and 1.1 following) +-- !query 22 schema +struct +-- !query 22 output +1 -3.0 1 1 +2 -1.0 2 3 +3 0.0 2 4 +4 1.1 4 6 +5 1.12 4 6 +6 2.0 4 6 +7 100.0 7 7 + + +-- !query 23 +select id, f_float8, first(id) over w, last(id) over w +from numerics +window w as (order by f_float8 range between + 'inf' preceding and 'inf' following) +-- !query 23 schema +struct +-- !query 23 output +1 -3.0 1 7 +2 -1.0 1 7 +3 0.0 1 7 +4 1.1 1 7 +5 1.12 1 7 +6 2.0 1 7 +7 100.0 1 7 + + +-- !query 24 +select id, f_float8, first(id) over w, last(id) over w +from numerics +window w as (order by f_float8 range between + 1.1 preceding and 'NaN' following) +-- !query 24 schema +struct +-- !query 24 output +1 -3.0 1 7 +2 -1.0 2 7 +3 0.0 2 7 +4 1.1 3 7 +5 1.12 4 7 +6 2.0 4 7 +7 100.0 7 7 + + +-- !query 25 +select id, f_numeric, first(id) over w, last(id) over w +from numerics +window w as (order by f_numeric range between + 1 preceding and 1 following) +-- !query 25 schema +struct +-- !query 25 output +1 -3 1 1 +2 -1 2 3 +3 0 2 5 +4 1 3 6 +5 1 3 6 +6 2 4 6 +7 100 7 7 + + +-- !query 26 +select id, f_numeric, first(id) over w, last(id) over w +from numerics +window w as (order by f_numeric range between + 1 preceding and 1.1 following) +-- !query 26 schema +struct +-- !query 26 output +1 -3 1 1 +2 -1 2 3 +3 0 2 5 +4 1 3 6 +5 1 3 6 +6 2 4 6 +7 100 7 7 + + +-- !query 27 +select id, f_numeric, first(id) over w, last(id) over w +from numerics +window w as (order by f_numeric range between + 1 preceding and 1.1 following) +-- !query 27 schema +struct +-- !query 27 output +1 -3 1 1 +2 -1 2 3 +3 0 2 5 +4 1 3 6 +5 1 3 6 +6 2 4 6 +7 100 7 7 + + +-- !query 28 +select id, f_numeric, first(id) over w, last(id) over w +from numerics +window w as (order by f_numeric range between + 1.1 preceding and 'NaN' following) +-- !query 28 schema +struct +-- !query 28 output +1 -3 NULL NULL +2 -1 NULL NULL +3 0 NULL NULL +4 1 NULL NULL +5 1 NULL NULL +6 2 NULL NULL +7 100 NULL NULL + + +-- !query 29 +drop table empsalary +-- !query 29 schema +struct<> +-- !query 29 output + + + +-- !query 30 +drop table numerics +-- !query 30 schema +struct<> +-- !query 30 output +