From dc36ce9a00e99ab8ec8bf60da85fa80e84729a25 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Tue, 23 Aug 2016 23:12:32 -0700 Subject: [PATCH 1/2] test cases --- .../resources/sql-tests/inputs/arithmetic.sql | 14 +- .../test/resources/sql-tests/inputs/limit.sql | 14 +- .../resources/sql-tests/inputs/outer-join.sql | 296 +++++++++ .../sql-tests/results/arithmetic.sql.out | 14 +- .../resources/sql-tests/results/limit.sql.out | 16 +- .../sql-tests/results/outer-join.sql.out | 582 ++++++++++++++++++ .../org/apache/spark/sql/SQLQuerySuite.scala | 60 -- .../apache/spark/sql/SQLQueryTestSuite.scala | 64 +- 8 files changed, 970 insertions(+), 90 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/outer-join.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/outer-join.sql.out diff --git a/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql b/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql index f62b10ca0037..79815099c59a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql @@ -4,17 +4,17 @@ select -100; select +230; select -5.2; select +6.8e0; -select -key, +key from testdata where key = 2; -select -(key + 1), - key + 1, +(key + 5) from testdata where key = 1; -select -max(key), +max(key) from testdata; +select -key, +key from uniqueRowData where key = 2; +select -(key + 1), - key + 1, +(key + 5) from uniqueRowData where key = 1; +select -max(key), +max(key) from uniqueRowData; select - (-10); -select + (-key) from testdata where key = 32; -select - (+max(key)) from testdata; +select + (-key) from uniqueRowData where key = 32; +select - (+max(key)) from uniqueRowData; select - - 3; select - + 20; select + + 100; -select - - max(key) from testdata; -select + - key from testdata where key = 33; +select - - max(key) from uniqueRowData; +select + - key from uniqueRowData where key = 33; -- div select 5 / 2; diff --git a/sql/core/src/test/resources/sql-tests/inputs/limit.sql b/sql/core/src/test/resources/sql-tests/inputs/limit.sql index 2ea35f7f3a5c..a37c631f015e 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/limit.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/limit.sql @@ -1,23 +1,23 @@ -- limit on various data types -select * from testdata limit 2; +select * from uniqueRowData limit 2; select * from arraydata limit 2; select * from mapdata limit 2; -- foldable non-literal in limit -select * from testdata limit 2 + 1; +select * from uniqueRowData limit 2 + 1; -select * from testdata limit CAST(1 AS int); +select * from uniqueRowData limit CAST(1 AS int); -- limit must be non-negative -select * from testdata limit -1; +select * from uniqueRowData limit -1; -- limit must be foldable -select * from testdata limit key > 3; +select * from uniqueRowData limit key > 3; -- limit must be integer -select * from testdata limit true; -select * from testdata limit 'a'; +select * from uniqueRowData limit true; +select * from uniqueRowData limit 'a'; -- limit within a subquery select * from (select * from range(10) limit 5) where id > 3; diff --git a/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql b/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql new file mode 100644 index 000000000000..f51fae718b6e --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql @@ -0,0 +1,296 @@ +-- basic full outer join +SELECT * FROM + (SELECT * FROM upperCaseData WHERE N <= 4) leftTable FULL OUTER JOIN + (SELECT * FROM upperCaseData WHERE N >= 3) rightTable + ON leftTable.N = rightTable.N; + +-- basic right outer join +SELECT * FROM lowercasedata l RIGHT OUTER JOIN uppercasedata u ON l.n = u.N; + +-- basic left outer join +SELECT * FROM uppercasedata u LEFT OUTER JOIN lowercasedata l ON l.n = u.N; + +-- left-outer join over two nested table expressions +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 + ) a + LEFT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c; + +-- right-outer join over two nested table expressions +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 + ) a + RIGHT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c; + +-- full-outer join over two nested table expressions +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 + ) a + FULL OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c; + +-- full-outer join + left-outer join over nested table expressions +SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 10 and src1.key < 150 + ) a + FULL OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 150 and src2.key < 300 + ) b + ON (a.c1 = b.c3) + LEFT OUTER JOIN + ( + SELECT src3.key AS c5, src3.value AS c6 + FROM duplicateRowData src3 WHERE src3.key > 200 and src3.key < 400 + ) c + ON (a.c1 = c.c5) +) c; + +-- left-outer join + join condition + filter +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 + ) a + LEFT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c +where c.c3 IS NULL AND c.c1 IS NOT NULL; + +-- full outer join over Aggregate +SELECT a.key, a.value, b.key, b.value +FROM + ( + SELECT src1.key as key, count(src1.value) AS value + FROM duplicateRowData src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value + FROM nullData src2 group by src2.key + ) b +ON (a.key = b.key); + +-- full outer join + multi distinct +SELECT a.key, a.value, b.key, b.value1, b.value2 +FROM + ( + SELECT src1.key as key, count(src1.value) AS value + FROM duplicateRowData src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value1, + count(distinct(src2.key)) AS value2 + FROM nullData src2 group by src2.key + ) b +ON (a.key = b.key); + +-- inner join + right-outer join #1 +SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 +FROM ( + SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, + src3.value as v3 + FROM duplicateRowData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200) + RIGHT OUTER JOIN duplicateRowData src3 ON (src1.key = src3.key AND src3.key < 300) + SORT BY k1,v1,k2,v2,k3,v3 +)a; + +-- inner join + right-outer join #2 +SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 +FROM ( + SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, + src3.value as v3 + FROM duplicateRowData src1 + JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key < 100) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src1.key = src3.key AND src3.key < 300) + SORT BY k1,v1,k2,v2,k3,v3 +)a; + +-- left outer join + right outer join +SELECT * +FROM + duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) +SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- left outer + right outer +SELECT * FROM duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- right outer + right outer +SELECT * FROM duplicateRowData src1 + RIGHT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- left outer + left outer +SELECT * FROM duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- right outer + left outer +SELECT * FROM duplicateRowData src1 + RIGHT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- inner + left outer +SELECT * FROM duplicateRowData src1 + JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN duplicateRowData src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- inner + right outer +SELECT * FROM duplicateRowData src1 + JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- left + inner outer +SELECT * FROM duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- right + inner join +SELECT * FROM duplicateRowData src1 + RIGHT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- left outer join with sorted by nested table expression +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +LEFT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +ON (x.key = Y.key) +select Y.key,Y.value; + +-- right outer join with sorted by nested table expression +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +RIGHT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +ON (x.key = Y.key) +select Y.key,Y.value; + +-- inner + left outer with sorted by nested table expression +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value; + +-- left + left outer with sorted by nested table expression +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +LEFT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value; + +-- left + right outer with sorted by nested table expression +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +LEFT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value; + +-- right + right outer with sorted by nested table expression +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +RIGHT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value; + +-- right outer + inner with sorted by nested table expression +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +RIGHT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value; diff --git a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out index 6abe048af477..c1d3e6a898a5 100644 --- a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out @@ -35,7 +35,7 @@ struct<6.8:double> -- !query 4 -select -key, +key from testdata where key = 2 +select -key, +key from uniqueRowData where key = 2 -- !query 4 schema struct<(- key):int,key:int> -- !query 4 output @@ -43,7 +43,7 @@ struct<(- key):int,key:int> -- !query 5 -select -(key + 1), - key + 1, +(key + 5) from testdata where key = 1 +select -(key + 1), - key + 1, +(key + 5) from uniqueRowData where key = 1 -- !query 5 schema struct<(- (key + 1)):int,((- key) + 1):int,(key + 5):int> -- !query 5 output @@ -51,7 +51,7 @@ struct<(- (key + 1)):int,((- key) + 1):int,(key + 5):int> -- !query 6 -select -max(key), +max(key) from testdata +select -max(key), +max(key) from uniqueRowData -- !query 6 schema struct<(- max(key)):int,max(key):int> -- !query 6 output @@ -67,7 +67,7 @@ struct<(- -10):int> -- !query 8 -select + (-key) from testdata where key = 32 +select + (-key) from uniqueRowData where key = 32 -- !query 8 schema struct<(- key):int> -- !query 8 output @@ -75,7 +75,7 @@ struct<(- key):int> -- !query 9 -select - (+max(key)) from testdata +select - (+max(key)) from uniqueRowData -- !query 9 schema struct<(- max(key)):int> -- !query 9 output @@ -107,7 +107,7 @@ struct<100:int> -- !query 13 -select - - max(key) from testdata +select - - max(key) from uniqueRowData -- !query 13 schema struct<(- (- max(key))):int> -- !query 13 output @@ -115,7 +115,7 @@ struct<(- (- max(key))):int> -- !query 14 -select + - key from testdata where key = 33 +select + - key from uniqueRowData where key = 33 -- !query 14 schema struct<(- key):int> -- !query 14 output diff --git a/sql/core/src/test/resources/sql-tests/results/limit.sql.out b/sql/core/src/test/resources/sql-tests/results/limit.sql.out index cb4e4d04810d..e3d1a8bc8717 100644 --- a/sql/core/src/test/resources/sql-tests/results/limit.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/limit.sql.out @@ -3,7 +3,7 @@ -- !query 0 -select * from testdata limit 2 +select * from uniqueRowData limit 2 -- !query 0 schema struct -- !query 0 output @@ -30,7 +30,7 @@ struct> -- !query 3 -select * from testdata limit 2 + 1 +select * from uniqueRowData limit 2 + 1 -- !query 3 schema struct -- !query 3 output @@ -40,7 +40,7 @@ struct -- !query 4 -select * from testdata limit CAST(1 AS int) +select * from uniqueRowData limit CAST(1 AS int) -- !query 4 schema struct -- !query 4 output @@ -48,7 +48,7 @@ struct -- !query 5 -select * from testdata limit -1 +select * from uniqueRowData limit -1 -- !query 5 schema struct<> -- !query 5 output @@ -57,16 +57,16 @@ The limit expression must be equal to or greater than 0, but got -1; -- !query 6 -select * from testdata limit key > 3 +select * from uniqueRowData limit key > 3 -- !query 6 schema struct<> -- !query 6 output org.apache.spark.sql.AnalysisException -The limit expression must evaluate to a constant value, but got (testdata.`key` > 3); +The limit expression must evaluate to a constant value, but got (uniquerowdata.`key` > 3); -- !query 7 -select * from testdata limit true +select * from uniqueRowData limit true -- !query 7 schema struct<> -- !query 7 output @@ -75,7 +75,7 @@ The limit expression must be integer type, but got boolean; -- !query 8 -select * from testdata limit 'a' +select * from uniqueRowData limit 'a' -- !query 8 schema struct<> -- !query 8 output diff --git a/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out b/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out new file mode 100644 index 000000000000..c478745cc203 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out @@ -0,0 +1,582 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 28 + + +-- !query 0 +SELECT * FROM + (SELECT * FROM upperCaseData WHERE N <= 4) leftTable FULL OUTER JOIN + (SELECT * FROM upperCaseData WHERE N >= 3) rightTable + ON leftTable.N = rightTable.N +-- !query 0 schema +struct +-- !query 0 output +1 A NULL NULL +2 B NULL NULL +3 C 3 C +4 D 4 D +NULL NULL 5 E +NULL NULL 6 F + + +-- !query 1 +SELECT * FROM lowercasedata l RIGHT OUTER JOIN uppercasedata u ON l.n = u.N +-- !query 1 schema +struct +-- !query 1 output +1 a 1 A +2 b 2 B +3 c 3 C +4 d 4 D +NULL NULL 5 E +NULL NULL 6 F + + +-- !query 2 +SELECT * FROM uppercasedata u LEFT OUTER JOIN lowercasedata l ON l.n = u.N +-- !query 2 schema +struct +-- !query 2 output +1 A 1 a +2 B 2 b +3 C 3 c +4 D 4 d +5 E NULL NULL +6 F NULL NULL + + +-- !query 3 +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 + ) a + LEFT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c +-- !query 3 schema +struct +-- !query 3 output +165 val_165 NULL NULL +165 val_165 NULL NULL +251 val_251 251 val_251 + + +-- !query 4 +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 + ) a + RIGHT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c +-- !query 4 schema +struct +-- !query 4 output +251 val_251 251 val_251 +NULL NULL 330 val_330 + + +-- !query 5 +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 + ) a + FULL OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c +-- !query 5 schema +struct +-- !query 5 output +165 val_165 NULL NULL +165 val_165 NULL NULL +251 val_251 251 val_251 +NULL NULL 330 val_330 + + +-- !query 6 +SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 10 and src1.key < 150 + ) a + FULL OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 150 and src2.key < 300 + ) b + ON (a.c1 = b.c3) + LEFT OUTER JOIN + ( + SELECT src3.key AS c5, src3.value AS c6 + FROM duplicateRowData src3 WHERE src3.key > 200 and src3.key < 400 + ) c + ON (a.c1 = c.c5) +) c +-- !query 6 schema +struct +-- !query 6 output +86 val_86 NULL NULL NULL NULL +NULL NULL 165 val_165 NULL NULL +NULL NULL 165 val_165 NULL NULL +NULL NULL 251 val_251 NULL NULL + + +-- !query 7 +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 + ) a + LEFT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c +where c.c3 IS NULL AND c.c1 IS NOT NULL +-- !query 7 schema +struct +-- !query 7 output +165 val_165 NULL NULL +165 val_165 NULL NULL + + +-- !query 8 +SELECT a.key, a.value, b.key, b.value +FROM + ( + SELECT src1.key as key, count(src1.value) AS value + FROM duplicateRowData src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value + FROM nullData src2 group by src2.key + ) b +ON (a.key = b.key) +-- !query 8 schema +struct +-- !query 8 output +165 2 165 1 +251 1 NULL NULL +330 1 330 1 +86 1 86 1 +NULL NULL 201 0 +NULL NULL NULL 1 + + +-- !query 9 +SELECT a.key, a.value, b.key, b.value1, b.value2 +FROM + ( + SELECT src1.key as key, count(src1.value) AS value + FROM duplicateRowData src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value1, + count(distinct(src2.key)) AS value2 + FROM nullData src2 group by src2.key + ) b +ON (a.key = b.key) +-- !query 9 schema +struct +-- !query 9 output +165 2 165 1 1 +251 1 NULL NULL NULL +330 1 330 1 1 +86 1 86 1 1 +NULL NULL 201 0 1 +NULL NULL NULL 1 0 + + +-- !query 10 +SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 +FROM ( + SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, + src3.value as v3 + FROM duplicateRowData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200) + RIGHT OUTER JOIN duplicateRowData src3 ON (src1.key = src3.key AND src3.key < 300) + SORT BY k1,v1,k2,v2,k3,v3 +)a +-- !query 10 schema +struct +-- !query 10 output +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +86 val_86 86 val_86 86 val_86 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 + + +-- !query 11 +SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 +FROM ( + SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, + src3.value as v3 + FROM duplicateRowData src1 + JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key < 100) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src1.key = src3.key AND src3.key < 300) + SORT BY k1,v1,k2,v2,k3,v3 +)a +-- !query 11 schema +struct +-- !query 11 output +86 val_86 86 val_86 86 val_86 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 + + +-- !query 12 +SELECT * +FROM + duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) +SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 12 schema +struct +-- !query 12 output +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 +NULL NULL NULL NULL 86 val_86 + + +-- !query 13 +SELECT * FROM duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 13 schema +struct +-- !query 13 output +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 +NULL NULL NULL NULL 86 val_86 + + +-- !query 14 +SELECT * FROM duplicateRowData src1 + RIGHT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 14 schema +struct +-- !query 14 output +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 86 val_86 86 val_86 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 + + +-- !query 15 +SELECT * FROM duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 15 schema +struct +-- !query 15 output +165 val_165 NULL NULL NULL NULL +165 val_165 NULL NULL NULL NULL +251 val_251 NULL NULL NULL NULL +330 val_330 NULL NULL NULL NULL +86 val_86 NULL NULL NULL NULL + + +-- !query 16 +SELECT * FROM duplicateRowData src1 + RIGHT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 16 schema +struct +-- !query 16 output +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 251 val_251 NULL NULL +NULL NULL 330 val_330 NULL NULL +NULL NULL 86 val_86 86 val_86 + + +-- !query 17 +SELECT * FROM duplicateRowData src1 + JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN duplicateRowData src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 17 schema +struct +-- !query 17 output + + + +-- !query 18 +SELECT * FROM duplicateRowData src1 + JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 18 schema +struct +-- !query 18 output +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 +NULL NULL NULL NULL 86 val_86 + + +-- !query 19 +SELECT * FROM duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 19 schema +struct +-- !query 19 output + + + +-- !query 20 +SELECT * FROM duplicateRowData src1 + RIGHT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 20 schema +struct +-- !query 20 output +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 86 val_86 86 val_86 + + +-- !query 21 +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +LEFT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +ON (x.key = Y.key) +select Y.key,Y.value +-- !query 21 schema +struct +-- !query 21 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 22 +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +RIGHT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +ON (x.key = Y.key) +select Y.key,Y.value +-- !query 22 schema +struct +-- !query 22 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 23 +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value +-- !query 23 schema +struct +-- !query 23 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 24 +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +LEFT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value +-- !query 24 schema +struct +-- !query 24 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 25 +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +LEFT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value +-- !query 25 schema +struct +-- !query 25 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 26 +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +RIGHT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value +-- !query 26 schema +struct +-- !query 26 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 27 +FROM +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +RIGHT OUTER JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value +-- !query 27 schema +struct +-- !query 27 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index eac266cba55b..edaaac569c4c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -734,49 +734,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } - test("left outer join") { - withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { - checkAnswer( - sql("SELECT * FROM uppercasedata LEFT OUTER JOIN lowercasedata ON n = N"), - Row(1, "A", 1, "a") :: - Row(2, "B", 2, "b") :: - Row(3, "C", 3, "c") :: - Row(4, "D", 4, "d") :: - Row(5, "E", null, null) :: - Row(6, "F", null, null) :: Nil) - } - } - - test("right outer join") { - withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { - checkAnswer( - sql("SELECT * FROM lowercasedata RIGHT OUTER JOIN uppercasedata ON n = N"), - Row(1, "a", 1, "A") :: - Row(2, "b", 2, "B") :: - Row(3, "c", 3, "C") :: - Row(4, "d", 4, "D") :: - Row(null, null, 5, "E") :: - Row(null, null, 6, "F") :: Nil) - } - } - - test("full outer join") { - checkAnswer( - sql( - """ - |SELECT * FROM - | (SELECT * FROM upperCaseData WHERE N <= 4) leftTable FULL OUTER JOIN - | (SELECT * FROM upperCaseData WHERE N >= 3) rightTable - | ON leftTable.N = rightTable.N - """.stripMargin), - Row(1, "A", null, null) :: - Row(2, "B", null, null) :: - Row(3, "C", 3, "C") :: - Row (4, "D", 4, "D") :: - Row(null, null, 5, "E") :: - Row(null, null, 6, "F") :: Nil) - } - test("SPARK-11111 null-safe join should not use cartesian product") { val df = sql("select count(*) from testData a join testData b on (a.key <=> b.key)") val cp = df.queryExecution.sparkPlan.collect { @@ -808,23 +765,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { Row(2, "b", 2) :: Nil) } - test("mixed-case keywords") { - checkAnswer( - sql( - """ - |SeleCT * from - | (select * from upperCaseData WherE N <= 4) leftTable fuLL OUtER joiN - | (sElEcT * FROM upperCaseData whERe N >= 3) rightTable - | oN leftTable.N = rightTable.N - """.stripMargin), - Row(1, "A", null, null) :: - Row(2, "B", null, null) :: - Row(3, "C", 3, "C") :: - Row(4, "D", 4, "D") :: - Row(null, null, 5, "E") :: - Row(null, null, 6, "F") :: Nil) - } - test("select with table name as qualifier") { checkAnswer( sql("SELECT testData.value FROM testData WHERE testData.key = 1"), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 55d5a56f1040..29a45080bdc1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -243,12 +243,21 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { private def loadTestData(session: SparkSession): Unit = { import session.implicits._ - (1 to 100).map(i => (i, i.toString)).toDF("key", "value").createOrReplaceTempView("testdata") + // A data set containing non-duplicate column values + (1 to 100).map(i => (i, i.toString)).toDF("key", "value") + .createOrReplaceTempView("uniqueRowData") + // A data set containing duplicate values for each column (but all the rows are unique) + Seq((1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)) + .toDF("a", "b") + .createOrReplaceTempView("duplicateColumnValueData") + + // A data set containing a complex data type: ARRAY ((Seq(1, 2, 3), Seq(Seq(1, 2, 3))) :: (Seq(2, 3, 4), Seq(Seq(2, 3, 4))) :: Nil) .toDF("arraycol", "nestedarraycol") .createOrReplaceTempView("arraydata") + // A data set containing a complex data type: MAP (Tuple1(Map(1 -> "a1", 2 -> "b1", 3 -> "c1", 4 -> "d1", 5 -> "e1")) :: Tuple1(Map(1 -> "a2", 2 -> "b2", 3 -> "c2", 4 -> "d2")) :: Tuple1(Map(1 -> "a3", 2 -> "b3", 3 -> "c3")) :: @@ -256,6 +265,59 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { Tuple1(Map(1 -> "a5")) :: Nil) .toDF("mapcol") .createOrReplaceTempView("mapdata") + + // A data set containing uppercase column names and column values + Seq((1, "a"), (2, "b"), (3, "c"), (4, "d")) + .toDF("n", "l") + .createOrReplaceTempView("lowerCaseData") + + // A data set containing uppercase column names and column values + Seq((1, "A"), (2, "B"), (3, "C"), (4, "D"), (5, "E"), (6, "F")) + .toDF("N", "L") + .createOrReplaceTempView("upperCaseData") + + // A data set containing duplicate rows + Seq((251, "val_251"), (86, "val_86"), (165, "val_165"), (330, "val_330"), (165, "val_165")) + .toDF("key", "value") + .createOrReplaceTempView("duplicateRowData") + + // A data set containing null + session.sql( + """ + |CREATE OR REPLACE TEMPORARY VIEW nullData AS SELECT * FROM VALUES + |(201, null), + |(86, "val_86"), + |(null, "val_null"), + |(165, "val_165"), + |(null, null), + |(330, "val_330"), + |(165, null) + |as nullData(key, value) + """.stripMargin) + + // A data set with logical partition columns ("ds" and "hr") + Seq((251, "val_251", "2008-04-08", "11"), + (251, "val_251", "2008-04-09", "11"), + (251, "val_251", "2008-04-08", "12"), + (251, "val_251", "2008-04-09", "12"), + (86, "val_86", "2008-04-08", "11"), + (86, "val_86", "2008-04-09", "11"), + (86, "val_86", "2008-04-08", "12"), + (86, "val_86", "2008-04-09", "12"), + (165, "val_165", "2008-04-08", "11"), + (165, "val_165", "2008-04-09", "11"), + (165, "val_165", "2008-04-08", "12"), + (165, "val_165", "2008-04-09", "12"), + (330, "val_330", "2008-04-08", "11"), + (330, "val_330", "2008-04-09", "11"), + (330, "val_330", "2008-04-08", "12"), + (330, "val_330", "2008-04-09", "12"), + (165, "val_165", "2008-04-08", "11"), + (165, "val_165", "2008-04-09", "11"), + (165, "val_165", "2008-04-08", "12"), + (165, "val_165", "2008-04-09", "12")) + .toDF("key", "value", "ds", "hr") + .createOrReplaceTempView("partitionedData") } private val originalTimeZone = TimeZone.getDefault From 02d500befd049b07b9f7883b83476c33085dfbe2 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Wed, 24 Aug 2016 00:40:50 -0700 Subject: [PATCH 2/2] improve the test cases --- .../resources/sql-tests/inputs/outer-join.sql | 54 +++- .../sql-tests/results/outer-join.sql.out | 289 +++++++++++------- .../apache/spark/sql/SQLQueryTestSuite.scala | 58 ---- 3 files changed, 221 insertions(+), 180 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql b/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql index f51fae718b6e..dc8c858ae2dd 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql @@ -1,3 +1,41 @@ +-- A data set containing uppercase column names and column values +CREATE OR REPLACE TEMPORARY VIEW lowerCaseData AS SELECT * FROM VALUES +(1, "a"), +(2, "b"), +(3, "c"), +(4, "d") +as lowerCaseData(n, l); + +-- A data set containing uppercase column names and column values +CREATE OR REPLACE TEMPORARY VIEW upperCaseData AS SELECT * FROM VALUES +(1, "A"), +(2, "B"), +(3, "C"), +(4, "D"), +(5, "E"), +(6, "F") +as upperCaseData(N, L); + +-- A data set containing null +CREATE OR REPLACE TEMPORARY VIEW nullData AS SELECT * FROM VALUES +(201, null), +(86, "val_86"), +(null, "val_null"), +(165, "val_165"), +(null, null), +(330, "val_330"), +(165, null) +as nullData(key, value); + +-- A data set containing duplicate rows +CREATE OR REPLACE TEMPORARY VIEW duplicateRowData AS SELECT * FROM VALUES +(251, "val_251"), +(86, "val_86"), +(165, "val_165"), +(330, "val_330"), +(165, "val_165") +as duplicateRowData(key, value); + -- basic full outer join SELECT * FROM (SELECT * FROM upperCaseData WHERE N <= 4) leftTable FULL OUTER JOIN @@ -228,7 +266,7 @@ SELECT * FROM duplicateRowData src1 FROM (SELECT duplicateRowData.* FROM duplicateRowData sort by key) x LEFT OUTER JOIN -(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +(SELECT nullData.* FROM nullData sort by value) Y ON (x.key = Y.key) select Y.key,Y.value; @@ -236,7 +274,7 @@ select Y.key,Y.value; FROM (SELECT duplicateRowData.* FROM duplicateRowData sort by key) x RIGHT OUTER JOIN -(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +(SELECT nullData.* FROM nullData sort by value) Y ON (x.key = Y.key) select Y.key,Y.value; @@ -253,20 +291,20 @@ select Y.key,Y.value; -- left + left outer with sorted by nested table expression FROM -(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +(SELECT nullData.* FROM nullData sort by key) x LEFT OUTER JOIN (SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y ON (x.key = Y.key) LEFT OUTER JOIN -(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z +(SELECT nullData.* FROM nullData sort by value) Z ON (x.key = Z.key) select Y.key,Y.value; -- left + right outer with sorted by nested table expression FROM -(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +(SELECT nullData.* FROM nullData sort by key) x LEFT OUTER JOIN -(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +(SELECT nullData.* FROM nullData sort by value) Y ON (x.key = Y.key) RIGHT OUTER JOIN (SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z @@ -277,7 +315,7 @@ select Y.key,Y.value; FROM (SELECT duplicateRowData.* FROM duplicateRowData sort by key) x RIGHT OUTER JOIN -(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +(SELECT nullData.* FROM nullData sort by value) Y ON (x.key = Y.key) RIGHT OUTER JOIN (SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z @@ -288,7 +326,7 @@ select Y.key,Y.value; FROM (SELECT duplicateRowData.* FROM duplicateRowData sort by key) x RIGHT OUTER JOIN -(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +(SELECT nullData.* FROM nullData sort by value) Y ON (x.key = Y.key) JOIN (SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z diff --git a/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out b/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out index c478745cc203..d1f4eed68ecb 100644 --- a/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out @@ -1,15 +1,73 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 28 +-- Number of queries: 32 -- !query 0 +CREATE OR REPLACE TEMPORARY VIEW lowerCaseData AS SELECT * FROM VALUES +(1, "a"), +(2, "b"), +(3, "c"), +(4, "d") +as lowerCaseData(n, l) +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +CREATE OR REPLACE TEMPORARY VIEW upperCaseData AS SELECT * FROM VALUES +(1, "A"), +(2, "B"), +(3, "C"), +(4, "D"), +(5, "E"), +(6, "F") +as upperCaseData(N, L) +-- !query 1 schema +struct<> +-- !query 1 output + + + +-- !query 2 +CREATE OR REPLACE TEMPORARY VIEW nullData AS SELECT * FROM VALUES +(201, null), +(86, "val_86"), +(null, "val_null"), +(165, "val_165"), +(null, null), +(330, "val_330"), +(165, null) +as nullData(key, value) +-- !query 2 schema +struct<> +-- !query 2 output + + + +-- !query 3 +CREATE OR REPLACE TEMPORARY VIEW duplicateRowData AS SELECT * FROM VALUES +(251, "val_251"), +(86, "val_86"), +(165, "val_165"), +(330, "val_330"), +(165, "val_165") +as duplicateRowData(key, value) +-- !query 3 schema +struct<> +-- !query 3 output + + + +-- !query 4 SELECT * FROM (SELECT * FROM upperCaseData WHERE N <= 4) leftTable FULL OUTER JOIN (SELECT * FROM upperCaseData WHERE N >= 3) rightTable ON leftTable.N = rightTable.N --- !query 0 schema +-- !query 4 schema struct --- !query 0 output +-- !query 4 output 1 A NULL NULL 2 B NULL NULL 3 C 3 C @@ -18,11 +76,11 @@ NULL NULL 5 E NULL NULL 6 F --- !query 1 +-- !query 5 SELECT * FROM lowercasedata l RIGHT OUTER JOIN uppercasedata u ON l.n = u.N --- !query 1 schema +-- !query 5 schema struct --- !query 1 output +-- !query 5 output 1 a 1 A 2 b 2 B 3 c 3 C @@ -31,11 +89,11 @@ NULL NULL 5 E NULL NULL 6 F --- !query 2 +-- !query 6 SELECT * FROM uppercasedata u LEFT OUTER JOIN lowercasedata l ON l.n = u.N --- !query 2 schema +-- !query 6 schema struct --- !query 2 output +-- !query 6 output 1 A 1 a 2 B 2 b 3 C 3 c @@ -44,7 +102,7 @@ struct 6 F NULL NULL --- !query 3 +-- !query 7 SELECT c.c1, c.c2, c.c3, c.c4 FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 @@ -60,15 +118,15 @@ FROM ( ) b ON (a.c1 = b.c3) ) c --- !query 3 schema +-- !query 7 schema struct --- !query 3 output +-- !query 7 output 165 val_165 NULL NULL 165 val_165 NULL NULL 251 val_251 251 val_251 --- !query 4 +-- !query 8 SELECT c.c1, c.c2, c.c3, c.c4 FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 @@ -84,14 +142,14 @@ FROM ( ) b ON (a.c1 = b.c3) ) c --- !query 4 schema +-- !query 8 schema struct --- !query 4 output +-- !query 8 output 251 val_251 251 val_251 NULL NULL 330 val_330 --- !query 5 +-- !query 9 SELECT c.c1, c.c2, c.c3, c.c4 FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 @@ -107,16 +165,16 @@ FROM ( ) b ON (a.c1 = b.c3) ) c --- !query 5 schema +-- !query 9 schema struct --- !query 5 output +-- !query 9 output 165 val_165 NULL NULL 165 val_165 NULL NULL 251 val_251 251 val_251 NULL NULL 330 val_330 --- !query 6 +-- !query 10 SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 @@ -138,16 +196,16 @@ FROM ( ) c ON (a.c1 = c.c5) ) c --- !query 6 schema +-- !query 10 schema struct --- !query 6 output +-- !query 10 output 86 val_86 NULL NULL NULL NULL NULL NULL 165 val_165 NULL NULL NULL NULL 165 val_165 NULL NULL NULL NULL 251 val_251 NULL NULL --- !query 7 +-- !query 11 SELECT c.c1, c.c2, c.c3, c.c4 FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 @@ -164,14 +222,14 @@ FROM ( ON (a.c1 = b.c3) ) c where c.c3 IS NULL AND c.c1 IS NOT NULL --- !query 7 schema +-- !query 11 schema struct --- !query 7 output +-- !query 11 output 165 val_165 NULL NULL 165 val_165 NULL NULL --- !query 8 +-- !query 12 SELECT a.key, a.value, b.key, b.value FROM ( @@ -184,9 +242,9 @@ FROM FROM nullData src2 group by src2.key ) b ON (a.key = b.key) --- !query 8 schema +-- !query 12 schema struct --- !query 8 output +-- !query 12 output 165 2 165 1 251 1 NULL NULL 330 1 330 1 @@ -195,7 +253,7 @@ NULL NULL 201 0 NULL NULL NULL 1 --- !query 9 +-- !query 13 SELECT a.key, a.value, b.key, b.value1, b.value2 FROM ( @@ -209,9 +267,9 @@ FROM FROM nullData src2 group by src2.key ) b ON (a.key = b.key) --- !query 9 schema +-- !query 13 schema struct --- !query 9 output +-- !query 13 output 165 2 165 1 1 251 1 NULL NULL NULL 330 1 330 1 1 @@ -220,7 +278,7 @@ NULL NULL 201 0 1 NULL NULL NULL 1 0 --- !query 10 +-- !query 14 SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 FROM ( SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, @@ -229,9 +287,9 @@ FROM ( RIGHT OUTER JOIN duplicateRowData src3 ON (src1.key = src3.key AND src3.key < 300) SORT BY k1,v1,k2,v2,k3,v3 )a --- !query 10 schema +-- !query 14 schema struct --- !query 10 output +-- !query 14 output 165 val_165 165 val_165 165 val_165 165 val_165 165 val_165 165 val_165 165 val_165 165 val_165 165 val_165 @@ -245,7 +303,7 @@ NULL NULL NULL NULL 251 val_251 NULL NULL NULL NULL 330 val_330 --- !query 11 +-- !query 15 SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 FROM ( SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, @@ -257,9 +315,9 @@ FROM ( ON (src1.key = src3.key AND src3.key < 300) SORT BY k1,v1,k2,v2,k3,v3 )a --- !query 11 schema +-- !query 15 schema struct --- !query 11 output +-- !query 15 output 86 val_86 86 val_86 86 val_86 NULL NULL NULL NULL 165 val_165 NULL NULL NULL NULL 165 val_165 @@ -267,7 +325,7 @@ NULL NULL NULL NULL 251 val_251 NULL NULL NULL NULL 330 val_330 --- !query 12 +-- !query 16 SELECT * FROM duplicateRowData src1 @@ -276,9 +334,9 @@ FROM RIGHT OUTER JOIN duplicateRowData src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value --- !query 12 schema +-- !query 16 schema struct --- !query 12 output +-- !query 16 output NULL NULL NULL NULL 165 val_165 NULL NULL NULL NULL 165 val_165 NULL NULL NULL NULL 251 val_251 @@ -286,16 +344,16 @@ NULL NULL NULL NULL 330 val_330 NULL NULL NULL NULL 86 val_86 --- !query 13 +-- !query 17 SELECT * FROM duplicateRowData src1 LEFT OUTER JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) RIGHT OUTER JOIN duplicateRowData src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value --- !query 13 schema +-- !query 17 schema struct --- !query 13 output +-- !query 17 output NULL NULL NULL NULL 165 val_165 NULL NULL NULL NULL 165 val_165 NULL NULL NULL NULL 251 val_251 @@ -303,16 +361,16 @@ NULL NULL NULL NULL 330 val_330 NULL NULL NULL NULL 86 val_86 --- !query 14 +-- !query 18 SELECT * FROM duplicateRowData src1 RIGHT OUTER JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) RIGHT OUTER JOIN duplicateRowData src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value --- !query 14 schema +-- !query 18 schema struct --- !query 14 output +-- !query 18 output NULL NULL 165 val_165 165 val_165 NULL NULL 165 val_165 165 val_165 NULL NULL 165 val_165 165 val_165 @@ -322,16 +380,16 @@ NULL NULL NULL NULL 251 val_251 NULL NULL NULL NULL 330 val_330 --- !query 15 +-- !query 19 SELECT * FROM duplicateRowData src1 LEFT OUTER JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) LEFT OUTER JOIN duplicateRowData src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value --- !query 15 schema +-- !query 19 schema struct --- !query 15 output +-- !query 19 output 165 val_165 NULL NULL NULL NULL 165 val_165 NULL NULL NULL NULL 251 val_251 NULL NULL NULL NULL @@ -339,16 +397,16 @@ struct 86 val_86 NULL NULL NULL NULL --- !query 16 +-- !query 20 SELECT * FROM duplicateRowData src1 RIGHT OUTER JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) LEFT OUTER JOIN duplicateRowData src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value --- !query 16 schema +-- !query 20 schema struct --- !query 16 output +-- !query 20 output NULL NULL 165 val_165 165 val_165 NULL NULL 165 val_165 165 val_165 NULL NULL 165 val_165 165 val_165 @@ -358,25 +416,25 @@ NULL NULL 330 val_330 NULL NULL NULL NULL 86 val_86 86 val_86 --- !query 17 +-- !query 21 SELECT * FROM duplicateRowData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) LEFT OUTER JOIN duplicateRowData src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value --- !query 17 schema +-- !query 21 schema struct --- !query 17 output +-- !query 21 output --- !query 18 +-- !query 22 SELECT * FROM duplicateRowData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) RIGHT OUTER JOIN duplicateRowData src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value --- !query 18 schema +-- !query 22 schema struct --- !query 18 output +-- !query 22 output NULL NULL NULL NULL 165 val_165 NULL NULL NULL NULL 165 val_165 NULL NULL NULL NULL 251 val_251 @@ -384,29 +442,29 @@ NULL NULL NULL NULL 330 val_330 NULL NULL NULL NULL 86 val_86 --- !query 19 +-- !query 23 SELECT * FROM duplicateRowData src1 LEFT OUTER JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) JOIN duplicateRowData src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value --- !query 19 schema +-- !query 23 schema struct --- !query 19 output +-- !query 23 output --- !query 20 +-- !query 24 SELECT * FROM duplicateRowData src1 RIGHT OUTER JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) JOIN duplicateRowData src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value --- !query 20 schema +-- !query 24 schema struct --- !query 20 output +-- !query 24 output NULL NULL 165 val_165 165 val_165 NULL NULL 165 val_165 165 val_165 NULL NULL 165 val_165 165 val_165 @@ -414,45 +472,47 @@ NULL NULL 165 val_165 165 val_165 NULL NULL 86 val_86 86 val_86 --- !query 21 +-- !query 25 FROM (SELECT duplicateRowData.* FROM duplicateRowData sort by key) x LEFT OUTER JOIN -(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +(SELECT nullData.* FROM nullData sort by value) Y ON (x.key = Y.key) select Y.key,Y.value --- !query 21 schema +-- !query 25 schema struct --- !query 21 output -165 val_165 -165 val_165 +-- !query 25 output +165 NULL +165 NULL 165 val_165 165 val_165 -251 val_251 330 val_330 86 val_86 +NULL NULL --- !query 22 +-- !query 26 FROM (SELECT duplicateRowData.* FROM duplicateRowData sort by key) x RIGHT OUTER JOIN -(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +(SELECT nullData.* FROM nullData sort by value) Y ON (x.key = Y.key) select Y.key,Y.value --- !query 22 schema +-- !query 26 schema struct --- !query 22 output -165 val_165 -165 val_165 +-- !query 26 output +165 NULL +165 NULL 165 val_165 165 val_165 -251 val_251 +201 NULL 330 val_330 86 val_86 +NULL NULL +NULL val_null --- !query 23 +-- !query 27 FROM (SELECT duplicateRowData.* FROM duplicateRowData sort by key) x JOIN @@ -462,9 +522,9 @@ LEFT OUTER JOIN (SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z ON (x.key = Z.key) select Y.key,Y.value --- !query 23 schema +-- !query 27 schema struct --- !query 23 output +-- !query 27 output 165 val_165 165 val_165 165 val_165 @@ -478,19 +538,19 @@ struct 86 val_86 --- !query 24 +-- !query 28 FROM -(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +(SELECT nullData.* FROM nullData sort by key) x LEFT OUTER JOIN (SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y ON (x.key = Y.key) LEFT OUTER JOIN -(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z +(SELECT nullData.* FROM nullData sort by value) Z ON (x.key = Z.key) select Y.key,Y.value --- !query 24 schema +-- !query 28 schema struct --- !query 24 output +-- !query 28 output 165 val_165 165 val_165 165 val_165 @@ -499,84 +559,85 @@ struct 165 val_165 165 val_165 165 val_165 -251 val_251 330 val_330 86 val_86 +NULL NULL +NULL NULL +NULL NULL --- !query 25 +-- !query 29 FROM -(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x +(SELECT nullData.* FROM nullData sort by key) x LEFT OUTER JOIN -(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +(SELECT nullData.* FROM nullData sort by value) Y ON (x.key = Y.key) RIGHT OUTER JOIN (SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z ON (x.key = Z.key) select Y.key,Y.value --- !query 25 schema +-- !query 29 schema struct --- !query 25 output -165 val_165 -165 val_165 -165 val_165 -165 val_165 +-- !query 29 output +165 NULL +165 NULL +165 NULL +165 NULL 165 val_165 165 val_165 165 val_165 165 val_165 -251 val_251 330 val_330 86 val_86 +NULL NULL --- !query 26 +-- !query 30 FROM (SELECT duplicateRowData.* FROM duplicateRowData sort by key) x RIGHT OUTER JOIN -(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +(SELECT nullData.* FROM nullData sort by value) Y ON (x.key = Y.key) RIGHT OUTER JOIN (SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z ON (x.key = Z.key) select Y.key,Y.value --- !query 26 schema +-- !query 30 schema struct --- !query 26 output -165 val_165 +-- !query 30 output +165 NULL +165 NULL +165 NULL +165 NULL 165 val_165 165 val_165 165 val_165 165 val_165 -165 val_165 -165 val_165 -165 val_165 -251 val_251 330 val_330 86 val_86 +NULL NULL --- !query 27 +-- !query 31 FROM (SELECT duplicateRowData.* FROM duplicateRowData sort by key) x RIGHT OUTER JOIN -(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y +(SELECT nullData.* FROM nullData sort by value) Y ON (x.key = Y.key) JOIN (SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z ON (x.key = Z.key) select Y.key,Y.value --- !query 27 schema +-- !query 31 schema struct --- !query 27 output -165 val_165 -165 val_165 +-- !query 31 output +165 NULL +165 NULL +165 NULL +165 NULL 165 val_165 165 val_165 165 val_165 165 val_165 -165 val_165 -165 val_165 -251 val_251 330 val_330 86 val_86 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 29a45080bdc1..f1879a436b98 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -247,11 +247,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { (1 to 100).map(i => (i, i.toString)).toDF("key", "value") .createOrReplaceTempView("uniqueRowData") - // A data set containing duplicate values for each column (but all the rows are unique) - Seq((1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)) - .toDF("a", "b") - .createOrReplaceTempView("duplicateColumnValueData") - // A data set containing a complex data type: ARRAY ((Seq(1, 2, 3), Seq(Seq(1, 2, 3))) :: (Seq(2, 3, 4), Seq(Seq(2, 3, 4))) :: Nil) .toDF("arraycol", "nestedarraycol") @@ -265,59 +260,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { Tuple1(Map(1 -> "a5")) :: Nil) .toDF("mapcol") .createOrReplaceTempView("mapdata") - - // A data set containing uppercase column names and column values - Seq((1, "a"), (2, "b"), (3, "c"), (4, "d")) - .toDF("n", "l") - .createOrReplaceTempView("lowerCaseData") - - // A data set containing uppercase column names and column values - Seq((1, "A"), (2, "B"), (3, "C"), (4, "D"), (5, "E"), (6, "F")) - .toDF("N", "L") - .createOrReplaceTempView("upperCaseData") - - // A data set containing duplicate rows - Seq((251, "val_251"), (86, "val_86"), (165, "val_165"), (330, "val_330"), (165, "val_165")) - .toDF("key", "value") - .createOrReplaceTempView("duplicateRowData") - - // A data set containing null - session.sql( - """ - |CREATE OR REPLACE TEMPORARY VIEW nullData AS SELECT * FROM VALUES - |(201, null), - |(86, "val_86"), - |(null, "val_null"), - |(165, "val_165"), - |(null, null), - |(330, "val_330"), - |(165, null) - |as nullData(key, value) - """.stripMargin) - - // A data set with logical partition columns ("ds" and "hr") - Seq((251, "val_251", "2008-04-08", "11"), - (251, "val_251", "2008-04-09", "11"), - (251, "val_251", "2008-04-08", "12"), - (251, "val_251", "2008-04-09", "12"), - (86, "val_86", "2008-04-08", "11"), - (86, "val_86", "2008-04-09", "11"), - (86, "val_86", "2008-04-08", "12"), - (86, "val_86", "2008-04-09", "12"), - (165, "val_165", "2008-04-08", "11"), - (165, "val_165", "2008-04-09", "11"), - (165, "val_165", "2008-04-08", "12"), - (165, "val_165", "2008-04-09", "12"), - (330, "val_330", "2008-04-08", "11"), - (330, "val_330", "2008-04-09", "11"), - (330, "val_330", "2008-04-08", "12"), - (330, "val_330", "2008-04-09", "12"), - (165, "val_165", "2008-04-08", "11"), - (165, "val_165", "2008-04-09", "11"), - (165, "val_165", "2008-04-08", "12"), - (165, "val_165", "2008-04-09", "12")) - .toDF("key", "value", "ds", "hr") - .createOrReplaceTempView("partitionedData") } private val originalTimeZone = TimeZone.getDefault