diff --git a/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql b/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql index f62b10ca0037..79815099c59a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql @@ -4,17 +4,17 @@ select -100; select +230; select -5.2; select +6.8e0; -select -key, +key from testdata where key = 2; -select -(key + 1), - key + 1, +(key + 5) from testdata where key = 1; -select -max(key), +max(key) from testdata; +select -key, +key from uniqueRowData where key = 2; +select -(key + 1), - key + 1, +(key + 5) from uniqueRowData where key = 1; +select -max(key), +max(key) from uniqueRowData; select - (-10); -select + (-key) from testdata where key = 32; -select - (+max(key)) from testdata; +select + (-key) from uniqueRowData where key = 32; +select - (+max(key)) from uniqueRowData; select - - 3; select - + 20; select + + 100; -select - - max(key) from testdata; -select + - key from testdata where key = 33; +select - - max(key) from uniqueRowData; +select + - key from uniqueRowData where key = 33; -- div select 5 / 2; diff --git a/sql/core/src/test/resources/sql-tests/inputs/limit.sql b/sql/core/src/test/resources/sql-tests/inputs/limit.sql index 2ea35f7f3a5c..a37c631f015e 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/limit.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/limit.sql @@ -1,23 +1,23 @@ -- limit on various data types -select * from testdata limit 2; +select * from uniqueRowData limit 2; select * from arraydata limit 2; select * from mapdata limit 2; -- foldable non-literal in limit -select * from testdata limit 2 + 1; +select * from uniqueRowData limit 2 + 1; -select * from testdata limit CAST(1 AS int); +select * from uniqueRowData limit CAST(1 AS int); -- limit must be non-negative -select * from testdata limit -1; +select * from uniqueRowData limit -1; -- limit must be foldable -select * from testdata limit key > 3; +select * from uniqueRowData limit key > 3; -- limit must be integer -select * from testdata limit true; -select * from testdata limit 'a'; +select * from uniqueRowData limit true; +select * from uniqueRowData limit 'a'; -- limit within a subquery select * from (select * from range(10) limit 5) where id > 3; diff --git a/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql b/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql new file mode 100644 index 000000000000..dc8c858ae2dd --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql @@ -0,0 +1,334 @@ +-- A data set containing uppercase column names and column values +CREATE OR REPLACE TEMPORARY VIEW lowerCaseData AS SELECT * FROM VALUES +(1, "a"), +(2, "b"), +(3, "c"), +(4, "d") +as lowerCaseData(n, l); + +-- A data set containing uppercase column names and column values +CREATE OR REPLACE TEMPORARY VIEW upperCaseData AS SELECT * FROM VALUES +(1, "A"), +(2, "B"), +(3, "C"), +(4, "D"), +(5, "E"), +(6, "F") +as upperCaseData(N, L); + +-- A data set containing null +CREATE OR REPLACE TEMPORARY VIEW nullData AS SELECT * FROM VALUES +(201, null), +(86, "val_86"), +(null, "val_null"), +(165, "val_165"), +(null, null), +(330, "val_330"), +(165, null) +as nullData(key, value); + +-- A data set containing duplicate rows +CREATE OR REPLACE TEMPORARY VIEW duplicateRowData AS SELECT * FROM VALUES +(251, "val_251"), +(86, "val_86"), +(165, "val_165"), +(330, "val_330"), +(165, "val_165") +as duplicateRowData(key, value); + +-- basic full outer join +SELECT * FROM + (SELECT * FROM upperCaseData WHERE N <= 4) leftTable FULL OUTER JOIN + (SELECT * FROM upperCaseData WHERE N >= 3) rightTable + ON leftTable.N = rightTable.N; + +-- basic right outer join +SELECT * FROM lowercasedata l RIGHT OUTER JOIN uppercasedata u ON l.n = u.N; + +-- basic left outer join +SELECT * FROM uppercasedata u LEFT OUTER JOIN lowercasedata l ON l.n = u.N; + +-- left-outer join over two nested table expressions +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 + ) a + LEFT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c; + +-- right-outer join over two nested table expressions +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 + ) a + RIGHT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c; + +-- full-outer join over two nested table expressions +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 + ) a + FULL OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c; + +-- full-outer join + left-outer join over nested table expressions +SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 10 and src1.key < 150 + ) a + FULL OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 150 and src2.key < 300 + ) b + ON (a.c1 = b.c3) + LEFT OUTER JOIN + ( + SELECT src3.key AS c5, src3.value AS c6 + FROM duplicateRowData src3 WHERE src3.key > 200 and src3.key < 400 + ) c + ON (a.c1 = c.c5) +) c; + +-- left-outer join + join condition + filter +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 + ) a + LEFT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c +where c.c3 IS NULL AND c.c1 IS NOT NULL; + +-- full outer join over Aggregate +SELECT a.key, a.value, b.key, b.value +FROM + ( + SELECT src1.key as key, count(src1.value) AS value + FROM duplicateRowData src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value + FROM nullData src2 group by src2.key + ) b +ON (a.key = b.key); + +-- full outer join + multi distinct +SELECT a.key, a.value, b.key, b.value1, b.value2 +FROM + ( + SELECT src1.key as key, count(src1.value) AS value + FROM duplicateRowData src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value1, + count(distinct(src2.key)) AS value2 + FROM nullData src2 group by src2.key + ) b +ON (a.key = b.key); + +-- inner join + right-outer join #1 +SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 +FROM ( + SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, + src3.value as v3 + FROM duplicateRowData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200) + RIGHT OUTER JOIN duplicateRowData src3 ON (src1.key = src3.key AND src3.key < 300) + SORT BY k1,v1,k2,v2,k3,v3 +)a; + +-- inner join + right-outer join #2 +SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 +FROM ( + SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, + src3.value as v3 + FROM duplicateRowData src1 + JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key < 100) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src1.key = src3.key AND src3.key < 300) + SORT BY k1,v1,k2,v2,k3,v3 +)a; + +-- left outer join + right outer join +SELECT * +FROM + duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) +SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- left outer + right outer +SELECT * FROM duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- right outer + right outer +SELECT * FROM duplicateRowData src1 + RIGHT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- left outer + left outer +SELECT * FROM duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- right outer + left outer +SELECT * FROM duplicateRowData src1 + RIGHT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- inner + left outer +SELECT * FROM duplicateRowData src1 + JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN duplicateRowData src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- inner + right outer +SELECT * FROM duplicateRowData src1 + JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- left + inner outer +SELECT * FROM duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- right + inner join +SELECT * FROM duplicateRowData src1 + RIGHT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- left outer join with sorted by nested table expression +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +LEFT OUTER JOIN +(SELECT nullData.* FROM nullData sort by value) Y +ON (x.key = Y.key) +select Y.key,Y.value; + +-- right outer join with sorted by nested table expression +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +RIGHT OUTER JOIN +(SELECT nullData.* FROM nullData sort by value) Y +ON (x.key = Y.key) +select Y.key,Y.value; + +-- inner + left outer with sorted by nested table expression +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value; + +-- left + left outer with sorted by nested table expression +FROM +(SELECT nullData.* FROM nullData sort by key) x +LEFT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT nullData.* FROM nullData sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value; + +-- left + right outer with sorted by nested table expression +FROM +(SELECT nullData.* FROM nullData sort by key) x +LEFT OUTER JOIN +(SELECT nullData.* FROM nullData sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value; + +-- right + right outer with sorted by nested table expression +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +RIGHT OUTER JOIN +(SELECT nullData.* FROM nullData sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value; + +-- right outer + inner with sorted by nested table expression +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +RIGHT OUTER JOIN +(SELECT nullData.* FROM nullData sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value; diff --git a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out index 6abe048af477..c1d3e6a898a5 100644 --- a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out @@ -35,7 +35,7 @@ struct<6.8:double> -- !query 4 -select -key, +key from testdata where key = 2 +select -key, +key from uniqueRowData where key = 2 -- !query 4 schema struct<(- key):int,key:int> -- !query 4 output @@ -43,7 +43,7 @@ struct<(- key):int,key:int> -- !query 5 -select -(key + 1), - key + 1, +(key + 5) from testdata where key = 1 +select -(key + 1), - key + 1, +(key + 5) from uniqueRowData where key = 1 -- !query 5 schema struct<(- (key + 1)):int,((- key) + 1):int,(key + 5):int> -- !query 5 output @@ -51,7 +51,7 @@ struct<(- (key + 1)):int,((- key) + 1):int,(key + 5):int> -- !query 6 -select -max(key), +max(key) from testdata +select -max(key), +max(key) from uniqueRowData -- !query 6 schema struct<(- max(key)):int,max(key):int> -- !query 6 output @@ -67,7 +67,7 @@ struct<(- -10):int> -- !query 8 -select + (-key) from testdata where key = 32 +select + (-key) from uniqueRowData where key = 32 -- !query 8 schema struct<(- key):int> -- !query 8 output @@ -75,7 +75,7 @@ struct<(- key):int> -- !query 9 -select - (+max(key)) from testdata +select - (+max(key)) from uniqueRowData -- !query 9 schema struct<(- max(key)):int> -- !query 9 output @@ -107,7 +107,7 @@ struct<100:int> -- !query 13 -select - - max(key) from testdata +select - - max(key) from uniqueRowData -- !query 13 schema struct<(- (- max(key))):int> -- !query 13 output @@ -115,7 +115,7 @@ struct<(- (- max(key))):int> -- !query 14 -select + - key from testdata where key = 33 +select + - key from uniqueRowData where key = 33 -- !query 14 schema struct<(- key):int> -- !query 14 output diff --git a/sql/core/src/test/resources/sql-tests/results/limit.sql.out b/sql/core/src/test/resources/sql-tests/results/limit.sql.out index cb4e4d04810d..e3d1a8bc8717 100644 --- a/sql/core/src/test/resources/sql-tests/results/limit.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/limit.sql.out @@ -3,7 +3,7 @@ -- !query 0 -select * from testdata limit 2 +select * from uniqueRowData limit 2 -- !query 0 schema struct -- !query 0 output @@ -30,7 +30,7 @@ struct> -- !query 3 -select * from testdata limit 2 + 1 +select * from uniqueRowData limit 2 + 1 -- !query 3 schema struct -- !query 3 output @@ -40,7 +40,7 @@ struct -- !query 4 -select * from testdata limit CAST(1 AS int) +select * from uniqueRowData limit CAST(1 AS int) -- !query 4 schema struct -- !query 4 output @@ -48,7 +48,7 @@ struct -- !query 5 -select * from testdata limit -1 +select * from uniqueRowData limit -1 -- !query 5 schema struct<> -- !query 5 output @@ -57,16 +57,16 @@ The limit expression must be equal to or greater than 0, but got -1; -- !query 6 -select * from testdata limit key > 3 +select * from uniqueRowData limit key > 3 -- !query 6 schema struct<> -- !query 6 output org.apache.spark.sql.AnalysisException -The limit expression must evaluate to a constant value, but got (testdata.`key` > 3); +The limit expression must evaluate to a constant value, but got (uniquerowdata.`key` > 3); -- !query 7 -select * from testdata limit true +select * from uniqueRowData limit true -- !query 7 schema struct<> -- !query 7 output @@ -75,7 +75,7 @@ The limit expression must be integer type, but got boolean; -- !query 8 -select * from testdata limit 'a' +select * from uniqueRowData limit 'a' -- !query 8 schema struct<> -- !query 8 output diff --git a/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out b/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out new file mode 100644 index 000000000000..d1f4eed68ecb --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out @@ -0,0 +1,643 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 32 + + +-- !query 0 +CREATE OR REPLACE TEMPORARY VIEW lowerCaseData AS SELECT * FROM VALUES +(1, "a"), +(2, "b"), +(3, "c"), +(4, "d") +as lowerCaseData(n, l) +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +CREATE OR REPLACE TEMPORARY VIEW upperCaseData AS SELECT * FROM VALUES +(1, "A"), +(2, "B"), +(3, "C"), +(4, "D"), +(5, "E"), +(6, "F") +as upperCaseData(N, L) +-- !query 1 schema +struct<> +-- !query 1 output + + + +-- !query 2 +CREATE OR REPLACE TEMPORARY VIEW nullData AS SELECT * FROM VALUES +(201, null), +(86, "val_86"), +(null, "val_null"), +(165, "val_165"), +(null, null), +(330, "val_330"), +(165, null) +as nullData(key, value) +-- !query 2 schema +struct<> +-- !query 2 output + + + +-- !query 3 +CREATE OR REPLACE TEMPORARY VIEW duplicateRowData AS SELECT * FROM VALUES +(251, "val_251"), +(86, "val_86"), +(165, "val_165"), +(330, "val_330"), +(165, "val_165") +as duplicateRowData(key, value) +-- !query 3 schema +struct<> +-- !query 3 output + + + +-- !query 4 +SELECT * FROM + (SELECT * FROM upperCaseData WHERE N <= 4) leftTable FULL OUTER JOIN + (SELECT * FROM upperCaseData WHERE N >= 3) rightTable + ON leftTable.N = rightTable.N +-- !query 4 schema +struct +-- !query 4 output +1 A NULL NULL +2 B NULL NULL +3 C 3 C +4 D 4 D +NULL NULL 5 E +NULL NULL 6 F + + +-- !query 5 +SELECT * FROM lowercasedata l RIGHT OUTER JOIN uppercasedata u ON l.n = u.N +-- !query 5 schema +struct +-- !query 5 output +1 a 1 A +2 b 2 B +3 c 3 C +4 d 4 D +NULL NULL 5 E +NULL NULL 6 F + + +-- !query 6 +SELECT * FROM uppercasedata u LEFT OUTER JOIN lowercasedata l ON l.n = u.N +-- !query 6 schema +struct +-- !query 6 output +1 A 1 a +2 B 2 b +3 C 3 c +4 D 4 d +5 E NULL NULL +6 F NULL NULL + + +-- !query 7 +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 + ) a + LEFT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c +-- !query 7 schema +struct +-- !query 7 output +165 val_165 NULL NULL +165 val_165 NULL NULL +251 val_251 251 val_251 + + +-- !query 8 +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 + ) a + RIGHT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c +-- !query 8 schema +struct +-- !query 8 output +251 val_251 251 val_251 +NULL NULL 330 val_330 + + +-- !query 9 +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 + ) a + FULL OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c +-- !query 9 schema +struct +-- !query 9 output +165 val_165 NULL NULL +165 val_165 NULL NULL +251 val_251 251 val_251 +NULL NULL 330 val_330 + + +-- !query 10 +SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 10 and src1.key < 150 + ) a + FULL OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 150 and src2.key < 300 + ) b + ON (a.c1 = b.c3) + LEFT OUTER JOIN + ( + SELECT src3.key AS c5, src3.value AS c6 + FROM duplicateRowData src3 WHERE src3.key > 200 and src3.key < 400 + ) c + ON (a.c1 = c.c5) +) c +-- !query 10 schema +struct +-- !query 10 output +86 val_86 NULL NULL NULL NULL +NULL NULL 165 val_165 NULL NULL +NULL NULL 165 val_165 NULL NULL +NULL NULL 251 val_251 NULL NULL + + +-- !query 11 +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 + ) a + LEFT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c +where c.c3 IS NULL AND c.c1 IS NOT NULL +-- !query 11 schema +struct +-- !query 11 output +165 val_165 NULL NULL +165 val_165 NULL NULL + + +-- !query 12 +SELECT a.key, a.value, b.key, b.value +FROM + ( + SELECT src1.key as key, count(src1.value) AS value + FROM duplicateRowData src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value + FROM nullData src2 group by src2.key + ) b +ON (a.key = b.key) +-- !query 12 schema +struct +-- !query 12 output +165 2 165 1 +251 1 NULL NULL +330 1 330 1 +86 1 86 1 +NULL NULL 201 0 +NULL NULL NULL 1 + + +-- !query 13 +SELECT a.key, a.value, b.key, b.value1, b.value2 +FROM + ( + SELECT src1.key as key, count(src1.value) AS value + FROM duplicateRowData src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value1, + count(distinct(src2.key)) AS value2 + FROM nullData src2 group by src2.key + ) b +ON (a.key = b.key) +-- !query 13 schema +struct +-- !query 13 output +165 2 165 1 1 +251 1 NULL NULL NULL +330 1 330 1 1 +86 1 86 1 1 +NULL NULL 201 0 1 +NULL NULL NULL 1 0 + + +-- !query 14 +SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 +FROM ( + SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, + src3.value as v3 + FROM duplicateRowData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200) + RIGHT OUTER JOIN duplicateRowData src3 ON (src1.key = src3.key AND src3.key < 300) + SORT BY k1,v1,k2,v2,k3,v3 +)a +-- !query 14 schema +struct +-- !query 14 output +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +86 val_86 86 val_86 86 val_86 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 + + +-- !query 15 +SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 +FROM ( + SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, + src3.value as v3 + FROM duplicateRowData src1 + JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key < 100) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src1.key = src3.key AND src3.key < 300) + SORT BY k1,v1,k2,v2,k3,v3 +)a +-- !query 15 schema +struct +-- !query 15 output +86 val_86 86 val_86 86 val_86 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 + + +-- !query 16 +SELECT * +FROM + duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) +SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 16 schema +struct +-- !query 16 output +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 +NULL NULL NULL NULL 86 val_86 + + +-- !query 17 +SELECT * FROM duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 17 schema +struct +-- !query 17 output +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 +NULL NULL NULL NULL 86 val_86 + + +-- !query 18 +SELECT * FROM duplicateRowData src1 + RIGHT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 18 schema +struct +-- !query 18 output +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 86 val_86 86 val_86 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 + + +-- !query 19 +SELECT * FROM duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 19 schema +struct +-- !query 19 output +165 val_165 NULL NULL NULL NULL +165 val_165 NULL NULL NULL NULL +251 val_251 NULL NULL NULL NULL +330 val_330 NULL NULL NULL NULL +86 val_86 NULL NULL NULL NULL + + +-- !query 20 +SELECT * FROM duplicateRowData src1 + RIGHT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 20 schema +struct +-- !query 20 output +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 251 val_251 NULL NULL +NULL NULL 330 val_330 NULL NULL +NULL NULL 86 val_86 86 val_86 + + +-- !query 21 +SELECT * FROM duplicateRowData src1 + JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN duplicateRowData src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 21 schema +struct +-- !query 21 output + + + +-- !query 22 +SELECT * FROM duplicateRowData src1 + JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 22 schema +struct +-- !query 22 output +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 +NULL NULL NULL NULL 86 val_86 + + +-- !query 23 +SELECT * FROM duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 23 schema +struct +-- !query 23 output + + + +-- !query 24 +SELECT * FROM duplicateRowData src1 + RIGHT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 24 schema +struct +-- !query 24 output +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 86 val_86 86 val_86 + + +-- !query 25 +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +LEFT OUTER JOIN +(SELECT nullData.* FROM nullData sort by value) Y +ON (x.key = Y.key) +select Y.key,Y.value +-- !query 25 schema +struct +-- !query 25 output +165 NULL +165 NULL +165 val_165 +165 val_165 +330 val_330 +86 val_86 +NULL NULL + + +-- !query 26 +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +RIGHT OUTER JOIN +(SELECT nullData.* FROM nullData sort by value) Y +ON (x.key = Y.key) +select Y.key,Y.value +-- !query 26 schema +struct +-- !query 26 output +165 NULL +165 NULL +165 val_165 +165 val_165 +201 NULL +330 val_330 +86 val_86 +NULL NULL +NULL val_null + + +-- !query 27 +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value +-- !query 27 schema +struct +-- !query 27 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 28 +FROM +(SELECT nullData.* FROM nullData sort by key) x +LEFT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT nullData.* FROM nullData sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value +-- !query 28 schema +struct +-- !query 28 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +330 val_330 +86 val_86 +NULL NULL +NULL NULL +NULL NULL + + +-- !query 29 +FROM +(SELECT nullData.* FROM nullData sort by key) x +LEFT OUTER JOIN +(SELECT nullData.* FROM nullData sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value +-- !query 29 schema +struct +-- !query 29 output +165 NULL +165 NULL +165 NULL +165 NULL +165 val_165 +165 val_165 +165 val_165 +165 val_165 +330 val_330 +86 val_86 +NULL NULL + + +-- !query 30 +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +RIGHT OUTER JOIN +(SELECT nullData.* FROM nullData sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value +-- !query 30 schema +struct +-- !query 30 output +165 NULL +165 NULL +165 NULL +165 NULL +165 val_165 +165 val_165 +165 val_165 +165 val_165 +330 val_330 +86 val_86 +NULL NULL + + +-- !query 31 +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +RIGHT OUTER JOIN +(SELECT nullData.* FROM nullData sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value +-- !query 31 schema +struct +-- !query 31 output +165 NULL +165 NULL +165 NULL +165 NULL +165 val_165 +165 val_165 +165 val_165 +165 val_165 +330 val_330 +86 val_86 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index eac266cba55b..edaaac569c4c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -734,49 +734,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } - test("left outer join") { - withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { - checkAnswer( - sql("SELECT * FROM uppercasedata LEFT OUTER JOIN lowercasedata ON n = N"), - Row(1, "A", 1, "a") :: - Row(2, "B", 2, "b") :: - Row(3, "C", 3, "c") :: - Row(4, "D", 4, "d") :: - Row(5, "E", null, null) :: - Row(6, "F", null, null) :: Nil) - } - } - - test("right outer join") { - withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { - checkAnswer( - sql("SELECT * FROM lowercasedata RIGHT OUTER JOIN uppercasedata ON n = N"), - Row(1, "a", 1, "A") :: - Row(2, "b", 2, "B") :: - Row(3, "c", 3, "C") :: - Row(4, "d", 4, "D") :: - Row(null, null, 5, "E") :: - Row(null, null, 6, "F") :: Nil) - } - } - - test("full outer join") { - checkAnswer( - sql( - """ - |SELECT * FROM - | (SELECT * FROM upperCaseData WHERE N <= 4) leftTable FULL OUTER JOIN - | (SELECT * FROM upperCaseData WHERE N >= 3) rightTable - | ON leftTable.N = rightTable.N - """.stripMargin), - Row(1, "A", null, null) :: - Row(2, "B", null, null) :: - Row(3, "C", 3, "C") :: - Row (4, "D", 4, "D") :: - Row(null, null, 5, "E") :: - Row(null, null, 6, "F") :: Nil) - } - test("SPARK-11111 null-safe join should not use cartesian product") { val df = sql("select count(*) from testData a join testData b on (a.key <=> b.key)") val cp = df.queryExecution.sparkPlan.collect { @@ -808,23 +765,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { Row(2, "b", 2) :: Nil) } - test("mixed-case keywords") { - checkAnswer( - sql( - """ - |SeleCT * from - | (select * from upperCaseData WherE N <= 4) leftTable fuLL OUtER joiN - | (sElEcT * FROM upperCaseData whERe N >= 3) rightTable - | oN leftTable.N = rightTable.N - """.stripMargin), - Row(1, "A", null, null) :: - Row(2, "B", null, null) :: - Row(3, "C", 3, "C") :: - Row(4, "D", 4, "D") :: - Row(null, null, 5, "E") :: - Row(null, null, 6, "F") :: Nil) - } - test("select with table name as qualifier") { checkAnswer( sql("SELECT testData.value FROM testData WHERE testData.key = 1"), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 55d5a56f1040..f1879a436b98 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -243,12 +243,16 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { private def loadTestData(session: SparkSession): Unit = { import session.implicits._ - (1 to 100).map(i => (i, i.toString)).toDF("key", "value").createOrReplaceTempView("testdata") + // A data set containing non-duplicate column values + (1 to 100).map(i => (i, i.toString)).toDF("key", "value") + .createOrReplaceTempView("uniqueRowData") + // A data set containing a complex data type: ARRAY ((Seq(1, 2, 3), Seq(Seq(1, 2, 3))) :: (Seq(2, 3, 4), Seq(Seq(2, 3, 4))) :: Nil) .toDF("arraycol", "nestedarraycol") .createOrReplaceTempView("arraydata") + // A data set containing a complex data type: MAP (Tuple1(Map(1 -> "a1", 2 -> "b1", 3 -> "c1", 4 -> "d1", 5 -> "e1")) :: Tuple1(Map(1 -> "a2", 2 -> "b2", 3 -> "c2", 4 -> "d2")) :: Tuple1(Map(1 -> "a3", 2 -> "b3", 3 -> "c3")) ::