From e04d7b60e9dcc5ce18b34596998aab050e796a17 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sun, 14 Jul 2019 23:18:29 +0800 Subject: [PATCH 1/3] add select_implicit.sql --- .../inputs/pgSQL/select_implicit.sql | 158 ++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_implicit.sql diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_implicit.sql b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_implicit.sql new file mode 100644 index 0000000000000..2878cfc7d77e1 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_implicit.sql @@ -0,0 +1,158 @@ +-- +-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group +-- +-- +-- SELECT_IMPLICIT +-- Test cases for queries with ordering terms missing from the target list. +-- This used to be called "junkfilter.sql". +-- The parser uses the term "resjunk" to handle these cases. +-- - thomas 1998-07-09 +-- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select.sql +-- + +-- load test data +CREATE TABLE test_missing_target (a int, b int, c char(8), d char); +INSERT INTO test_missing_target VALUES (0, 1, 'XXXX', 'A'); +INSERT INTO test_missing_target VALUES (1, 2, 'ABAB', 'b'); +INSERT INTO test_missing_target VALUES (2, 2, 'ABAB', 'c'); +INSERT INTO test_missing_target VALUES (3, 3, 'BBBB', 'D'); +INSERT INTO test_missing_target VALUES (4, 3, 'BBBB', 'e'); +INSERT INTO test_missing_target VALUES (5, 3, 'bbbb', 'F'); +INSERT INTO test_missing_target VALUES (6, 4, 'cccc', 'g'); +INSERT INTO test_missing_target VALUES (7, 4, 'cccc', 'h'); +INSERT INTO test_missing_target VALUES (8, 4, 'CCCC', 'I'); +INSERT INTO test_missing_target VALUES (9, 4, 'CCCC', 'j'); + + +-- w/ existing GROUP BY target +SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c; + +-- w/o existing GROUP BY target using a relation name in GROUP BY clause +SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c; + +-- w/o existing GROUP BY target and w/o existing a different ORDER BY target +-- failure expected +SELECT count(*) FROM test_missing_target GROUP BY a ORDER BY b; + +-- w/o existing GROUP BY target and w/o existing same ORDER BY target +SELECT count(*) FROM test_missing_target GROUP BY b ORDER BY b; + +-- w/ existing GROUP BY target using a relation name in target +SELECT test_missing_target.b, count(*) + FROM test_missing_target GROUP BY b ORDER BY b; + +-- w/o existing GROUP BY target +SELECT c FROM test_missing_target ORDER BY a; + +-- w/o existing ORDER BY target +SELECT count(*) FROM test_missing_target GROUP BY b ORDER BY b desc; + +-- group using reference number +SELECT count(*) FROM test_missing_target ORDER BY 1 desc; + +-- order using reference number +SELECT c, count(*) FROM test_missing_target GROUP BY 1 ORDER BY 1; + +-- group using reference number out of range +-- failure expected +SELECT c, count(*) FROM test_missing_target GROUP BY 3; + +-- group w/o existing GROUP BY and ORDER BY target under ambiguous condition +-- failure expected +SELECT count(*) FROM test_missing_target x, test_missing_target y + WHERE x.a = y.a + GROUP BY b ORDER BY b; + +-- order w/ target under ambiguous condition +-- failure NOT expected +SELECT a, a FROM test_missing_target + ORDER BY a; + +-- order expression w/ target under ambiguous condition +-- failure NOT expected +SELECT a/2, a/2 FROM test_missing_target + ORDER BY a/2; + +-- group expression w/ target under ambiguous condition +-- failure NOT expected +SELECT a/2, a/2 FROM test_missing_target + GROUP BY a/2 ORDER BY a/2; + +-- group w/ existing GROUP BY target under ambiguous condition +SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y + WHERE x.a = y.a + GROUP BY x.b ORDER BY x.b; + +-- group w/o existing GROUP BY target under ambiguous condition +SELECT count(*) FROM test_missing_target x, test_missing_target y + WHERE x.a = y.a + GROUP BY x.b ORDER BY x.b; + +-- group w/o existing GROUP BY target under ambiguous condition +-- into a table +SELECT count(*) INTO TABLE test_missing_target2 +FROM test_missing_target x, test_missing_target y + WHERE x.a = y.a + GROUP BY x.b ORDER BY x.b; +SELECT * FROM test_missing_target2; + + +-- Functions and expressions + +-- w/ existing GROUP BY target +SELECT a%2, count(b) FROM test_missing_target +GROUP BY test_missing_target.a%2 +ORDER BY test_missing_target.a%2; + +-- w/o existing GROUP BY target using a relation name in GROUP BY clause +SELECT count(c) FROM test_missing_target +GROUP BY lower(test_missing_target.c) +ORDER BY lower(test_missing_target.c); + +-- w/o existing GROUP BY target and w/o existing a different ORDER BY target +-- failure expected +SELECT count(a) FROM test_missing_target GROUP BY a ORDER BY b; + +-- w/o existing GROUP BY target and w/o existing same ORDER BY target +SELECT count(b) FROM test_missing_target GROUP BY b/2 ORDER BY b/2; + +-- w/ existing GROUP BY target using a relation name in target +SELECT lower(test_missing_target.c), count(c) + FROM test_missing_target GROUP BY lower(c) ORDER BY lower(c); + +-- w/o existing GROUP BY target +SELECT a FROM test_missing_target ORDER BY upper(d); + +-- w/o existing ORDER BY target +SELECT count(b) FROM test_missing_target + GROUP BY (b + 1) / 2 ORDER BY (b + 1) / 2 desc; + +-- group w/o existing GROUP BY and ORDER BY target under ambiguous condition +-- failure expected +SELECT count(x.a) FROM test_missing_target x, test_missing_target y + WHERE x.a = y.a + GROUP BY b/2 ORDER BY b/2; + +-- group w/ existing GROUP BY target under ambiguous condition +SELECT x.b/2, count(x.b) FROM test_missing_target x, test_missing_target y + WHERE x.a = y.a + GROUP BY x.b/2 ORDER BY x.b/2; + +-- group w/o existing GROUP BY target under ambiguous condition +-- failure expected due to ambiguous b in count(b) +SELECT count(b) FROM test_missing_target x, test_missing_target y + WHERE x.a = y.a + GROUP BY x.b/2; + +-- group w/o existing GROUP BY target under ambiguous condition +-- into a table +SELECT count(x.b) INTO TABLE test_missing_target3 +FROM test_missing_target x, test_missing_target y + WHERE x.a = y.a + GROUP BY x.b/2 ORDER BY x.b/2; +SELECT * FROM test_missing_target3; + +-- Cleanup +DROP TABLE test_missing_target; +DROP TABLE test_missing_target2; +DROP TABLE test_missing_target3; From a183e2d911fa1f7d5b16da706a8ae24cbf434549 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sun, 14 Jul 2019 23:36:58 +0800 Subject: [PATCH 2/3] Add result --- .../inputs/pgSQL/select_implicit.sql | 30 +- .../results/pgSQL/select_implicit.sql.out | 425 ++++++++++++++++++ 2 files changed, 441 insertions(+), 14 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/results/pgSQL/select_implicit.sql.out diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_implicit.sql b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_implicit.sql index 2878cfc7d77e1..54b3083a9f4a2 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_implicit.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_implicit.sql @@ -7,11 +7,11 @@ -- This used to be called "junkfilter.sql". -- The parser uses the term "resjunk" to handle these cases. -- - thomas 1998-07-09 --- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select.sql +-- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select_implicit.sql -- -- load test data -CREATE TABLE test_missing_target (a int, b int, c char(8), d char); +CREATE TABLE test_missing_target (a int, b int, c string, d string) using parquet; INSERT INTO test_missing_target VALUES (0, 1, 'XXXX', 'A'); INSERT INTO test_missing_target VALUES (1, 2, 'ABAB', 'b'); INSERT INTO test_missing_target VALUES (2, 2, 'ABAB', 'c'); @@ -88,13 +88,14 @@ SELECT count(*) FROM test_missing_target x, test_missing_target y WHERE x.a = y.a GROUP BY x.b ORDER BY x.b; +-- [SPARK-28329] SELECT INTO syntax -- group w/o existing GROUP BY target under ambiguous condition -- into a table -SELECT count(*) INTO TABLE test_missing_target2 -FROM test_missing_target x, test_missing_target y - WHERE x.a = y.a - GROUP BY x.b ORDER BY x.b; -SELECT * FROM test_missing_target2; +-- SELECT count(*) INTO TABLE test_missing_target2 +-- FROM test_missing_target x, test_missing_target y +-- WHERE x.a = y.a +-- GROUP BY x.b ORDER BY x.b; +-- SELECT * FROM test_missing_target2; -- Functions and expressions @@ -144,15 +145,16 @@ SELECT count(b) FROM test_missing_target x, test_missing_target y WHERE x.a = y.a GROUP BY x.b/2; +-- [SPARK-28329] SELECT INTO syntax -- group w/o existing GROUP BY target under ambiguous condition -- into a table -SELECT count(x.b) INTO TABLE test_missing_target3 -FROM test_missing_target x, test_missing_target y - WHERE x.a = y.a - GROUP BY x.b/2 ORDER BY x.b/2; -SELECT * FROM test_missing_target3; +-- SELECT count(x.b) INTO TABLE test_missing_target3 +-- FROM test_missing_target x, test_missing_target y +-- WHERE x.a = y.a +-- GROUP BY x.b/2 ORDER BY x.b/2; +-- SELECT * FROM test_missing_target3; -- Cleanup DROP TABLE test_missing_target; -DROP TABLE test_missing_target2; -DROP TABLE test_missing_target3; +-- DROP TABLE test_missing_target2; +-- DROP TABLE test_missing_target3; diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/select_implicit.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/select_implicit.sql.out new file mode 100644 index 0000000000000..e9ba62801d6a2 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/pgSQL/select_implicit.sql.out @@ -0,0 +1,425 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 38 + + +-- !query 0 +CREATE TABLE test_missing_target (a int, b int, c string, d string) using parquet +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +INSERT INTO test_missing_target VALUES (0, 1, 'XXXX', 'A') +-- !query 1 schema +struct<> +-- !query 1 output + + + +-- !query 2 +INSERT INTO test_missing_target VALUES (1, 2, 'ABAB', 'b') +-- !query 2 schema +struct<> +-- !query 2 output + + + +-- !query 3 +INSERT INTO test_missing_target VALUES (2, 2, 'ABAB', 'c') +-- !query 3 schema +struct<> +-- !query 3 output + + + +-- !query 4 +INSERT INTO test_missing_target VALUES (3, 3, 'BBBB', 'D') +-- !query 4 schema +struct<> +-- !query 4 output + + + +-- !query 5 +INSERT INTO test_missing_target VALUES (4, 3, 'BBBB', 'e') +-- !query 5 schema +struct<> +-- !query 5 output + + + +-- !query 6 +INSERT INTO test_missing_target VALUES (5, 3, 'bbbb', 'F') +-- !query 6 schema +struct<> +-- !query 6 output + + + +-- !query 7 +INSERT INTO test_missing_target VALUES (6, 4, 'cccc', 'g') +-- !query 7 schema +struct<> +-- !query 7 output + + + +-- !query 8 +INSERT INTO test_missing_target VALUES (7, 4, 'cccc', 'h') +-- !query 8 schema +struct<> +-- !query 8 output + + + +-- !query 9 +INSERT INTO test_missing_target VALUES (8, 4, 'CCCC', 'I') +-- !query 9 schema +struct<> +-- !query 9 output + + + +-- !query 10 +INSERT INTO test_missing_target VALUES (9, 4, 'CCCC', 'j') +-- !query 10 schema +struct<> +-- !query 10 output + + + +-- !query 11 +SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c +-- !query 11 schema +struct +-- !query 11 output +ABAB 2 +BBBB 2 +CCCC 2 +XXXX 1 +bbbb 1 +cccc 2 + + +-- !query 12 +SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c +-- !query 12 schema +struct +-- !query 12 output +2 +2 +2 +1 +1 +2 + + +-- !query 13 +SELECT count(*) FROM test_missing_target GROUP BY a ORDER BY b +-- !query 13 schema +struct<> +-- !query 13 output +org.apache.spark.sql.AnalysisException +cannot resolve '`b`' given input columns: [count(1)]; line 1 pos 61 + + +-- !query 14 +SELECT count(*) FROM test_missing_target GROUP BY b ORDER BY b +-- !query 14 schema +struct +-- !query 14 output +1 +2 +3 +4 + + +-- !query 15 +SELECT test_missing_target.b, count(*) + FROM test_missing_target GROUP BY b ORDER BY b +-- !query 15 schema +struct +-- !query 15 output +1 1 +2 2 +3 3 +4 4 + + +-- !query 16 +SELECT c FROM test_missing_target ORDER BY a +-- !query 16 schema +struct +-- !query 16 output +XXXX +ABAB +ABAB +BBBB +BBBB +bbbb +cccc +cccc +CCCC +CCCC + + +-- !query 17 +SELECT count(*) FROM test_missing_target GROUP BY b ORDER BY b desc +-- !query 17 schema +struct +-- !query 17 output +4 +3 +2 +1 + + +-- !query 18 +SELECT count(*) FROM test_missing_target ORDER BY 1 desc +-- !query 18 schema +struct +-- !query 18 output +10 + + +-- !query 19 +SELECT c, count(*) FROM test_missing_target GROUP BY 1 ORDER BY 1 +-- !query 19 schema +struct +-- !query 19 output +ABAB 2 +BBBB 2 +CCCC 2 +XXXX 1 +bbbb 1 +cccc 2 + + +-- !query 20 +SELECT c, count(*) FROM test_missing_target GROUP BY 3 +-- !query 20 schema +struct<> +-- !query 20 output +org.apache.spark.sql.AnalysisException +GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 53 + + +-- !query 21 +SELECT count(*) FROM test_missing_target x, test_missing_target y + WHERE x.a = y.a + GROUP BY b ORDER BY b +-- !query 21 schema +struct<> +-- !query 21 output +org.apache.spark.sql.AnalysisException +Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10 + + +-- !query 22 +SELECT a, a FROM test_missing_target + ORDER BY a +-- !query 22 schema +struct +-- !query 22 output +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 + + +-- !query 23 +SELECT a/2, a/2 FROM test_missing_target + ORDER BY a/2 +-- !query 23 schema +struct<(CAST(a AS DOUBLE) / CAST(2 AS DOUBLE)):double,(CAST(a AS DOUBLE) / CAST(2 AS DOUBLE)):double> +-- !query 23 output +0.0 0.0 +0.5 0.5 +1.0 1.0 +1.5 1.5 +2.0 2.0 +2.5 2.5 +3.0 3.0 +3.5 3.5 +4.0 4.0 +4.5 4.5 + + +-- !query 24 +SELECT a/2, a/2 FROM test_missing_target + GROUP BY a/2 ORDER BY a/2 +-- !query 24 schema +struct<(CAST(a AS DOUBLE) / CAST(2 AS DOUBLE)):double,(CAST(a AS DOUBLE) / CAST(2 AS DOUBLE)):double> +-- !query 24 output +0.0 0.0 +0.5 0.5 +1.0 1.0 +1.5 1.5 +2.0 2.0 +2.5 2.5 +3.0 3.0 +3.5 3.5 +4.0 4.0 +4.5 4.5 + + +-- !query 25 +SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y + WHERE x.a = y.a + GROUP BY x.b ORDER BY x.b +-- !query 25 schema +struct +-- !query 25 output +1 1 +2 2 +3 3 +4 4 + + +-- !query 26 +SELECT count(*) FROM test_missing_target x, test_missing_target y + WHERE x.a = y.a + GROUP BY x.b ORDER BY x.b +-- !query 26 schema +struct +-- !query 26 output +1 +2 +3 +4 + + +-- !query 27 +SELECT a%2, count(b) FROM test_missing_target +GROUP BY test_missing_target.a%2 +ORDER BY test_missing_target.a%2 +-- !query 27 schema +struct<(a % 2):int,count(b):bigint> +-- !query 27 output +0 5 +1 5 + + +-- !query 28 +SELECT count(c) FROM test_missing_target +GROUP BY lower(test_missing_target.c) +ORDER BY lower(test_missing_target.c) +-- !query 28 schema +struct +-- !query 28 output +2 +3 +4 +1 + + +-- !query 29 +SELECT count(a) FROM test_missing_target GROUP BY a ORDER BY b +-- !query 29 schema +struct<> +-- !query 29 output +org.apache.spark.sql.AnalysisException +cannot resolve '`b`' given input columns: [count(a)]; line 1 pos 61 + + +-- !query 30 +SELECT count(b) FROM test_missing_target GROUP BY b/2 ORDER BY b/2 +-- !query 30 schema +struct +-- !query 30 output +1 +2 +3 +4 + + +-- !query 31 +SELECT lower(test_missing_target.c), count(c) + FROM test_missing_target GROUP BY lower(c) ORDER BY lower(c) +-- !query 31 schema +struct +-- !query 31 output +abab 2 +bbbb 3 +cccc 4 +xxxx 1 + + +-- !query 32 +SELECT a FROM test_missing_target ORDER BY upper(d) +-- !query 32 schema +struct +-- !query 32 output +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 + + +-- !query 33 +SELECT count(b) FROM test_missing_target + GROUP BY (b + 1) / 2 ORDER BY (b + 1) / 2 desc +-- !query 33 schema +struct +-- !query 33 output +4 +3 +2 +1 + + +-- !query 34 +SELECT count(x.a) FROM test_missing_target x, test_missing_target y + WHERE x.a = y.a + GROUP BY b/2 ORDER BY b/2 +-- !query 34 schema +struct<> +-- !query 34 output +org.apache.spark.sql.AnalysisException +Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10 + + +-- !query 35 +SELECT x.b/2, count(x.b) FROM test_missing_target x, test_missing_target y + WHERE x.a = y.a + GROUP BY x.b/2 ORDER BY x.b/2 +-- !query 35 schema +struct<(CAST(b AS DOUBLE) / CAST(2 AS DOUBLE)):double,count(b):bigint> +-- !query 35 output +0.5 1 +1.0 2 +1.5 3 +2.0 4 + + +-- !query 36 +SELECT count(b) FROM test_missing_target x, test_missing_target y + WHERE x.a = y.a + GROUP BY x.b/2 +-- !query 36 schema +struct<> +-- !query 36 output +org.apache.spark.sql.AnalysisException +Reference 'b' is ambiguous, could be: x.b, y.b.; line 1 pos 13 + + +-- !query 37 +DROP TABLE test_missing_target +-- !query 37 schema +struct<> +-- !query 37 output + From c00b9d84e02bb6bd2f3d6bfa04649478ae370f0f Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 17 Jul 2019 00:03:23 +0800 Subject: [PATCH 3/3] Enable PREFER_INTEGRAL_DIVISION for PostgreSQL test --- .../results/pgSQL/select_implicit.sql.out | 55 ++++++++----------- 1 file changed, 23 insertions(+), 32 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/select_implicit.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/select_implicit.sql.out index e9ba62801d6a2..0675820b381da 100644 --- a/sql/core/src/test/resources/sql-tests/results/pgSQL/select_implicit.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/pgSQL/select_implicit.sql.out @@ -239,36 +239,31 @@ struct SELECT a/2, a/2 FROM test_missing_target ORDER BY a/2 -- !query 23 schema -struct<(CAST(a AS DOUBLE) / CAST(2 AS DOUBLE)):double,(CAST(a AS DOUBLE) / CAST(2 AS DOUBLE)):double> +struct<(a div 2):int,(a div 2):int> -- !query 23 output -0.0 0.0 -0.5 0.5 -1.0 1.0 -1.5 1.5 -2.0 2.0 -2.5 2.5 -3.0 3.0 -3.5 3.5 -4.0 4.0 -4.5 4.5 +0 0 +0 0 +1 1 +1 1 +2 2 +2 2 +3 3 +3 3 +4 4 +4 4 -- !query 24 SELECT a/2, a/2 FROM test_missing_target GROUP BY a/2 ORDER BY a/2 -- !query 24 schema -struct<(CAST(a AS DOUBLE) / CAST(2 AS DOUBLE)):double,(CAST(a AS DOUBLE) / CAST(2 AS DOUBLE)):double> +struct<(a div 2):int,(a div 2):int> -- !query 24 output -0.0 0.0 -0.5 0.5 -1.0 1.0 -1.5 1.5 -2.0 2.0 -2.5 2.5 -3.0 3.0 -3.5 3.5 -4.0 4.0 -4.5 4.5 +0 0 +1 1 +2 2 +3 3 +4 4 -- !query 25 @@ -336,8 +331,7 @@ SELECT count(b) FROM test_missing_target GROUP BY b/2 ORDER BY b/2 struct -- !query 30 output 1 -2 -3 +5 4 @@ -376,10 +370,8 @@ SELECT count(b) FROM test_missing_target -- !query 33 schema struct -- !query 33 output -4 +7 3 -2 -1 -- !query 34 @@ -398,12 +390,11 @@ SELECT x.b/2, count(x.b) FROM test_missing_target x, test_missing_target y WHERE x.a = y.a GROUP BY x.b/2 ORDER BY x.b/2 -- !query 35 schema -struct<(CAST(b AS DOUBLE) / CAST(2 AS DOUBLE)):double,count(b):bigint> +struct<(b div 2):int,count(b):bigint> -- !query 35 output -0.5 1 -1.0 2 -1.5 3 -2.0 4 +0 1 +1 5 +2 4 -- !query 36