diff --git a/sql/core/src/test/resources/ssb/1.1.sql b/sql/core/src/test/resources/ssb/1.1.sql new file mode 100644 index 0000000000000..62da3029469e4 --- /dev/null +++ b/sql/core/src/test/resources/ssb/1.1.sql @@ -0,0 +1,6 @@ +select sum(lo_extendedprice*lo_discount) as revenue + from lineorder, date + where lo_orderdate = d_datekey + and d_year = 1993 + and lo_discount between 1 and 3 + and lo_quantity < 25 diff --git a/sql/core/src/test/resources/ssb/1.2.sql b/sql/core/src/test/resources/ssb/1.2.sql new file mode 100644 index 0000000000000..1657bfd4b2a32 --- /dev/null +++ b/sql/core/src/test/resources/ssb/1.2.sql @@ -0,0 +1,6 @@ +select sum(lo_extendedprice*lo_discount) as revenue + from lineorder, date + where lo_orderdate = d_datekey + and d_yearmonthnum = 199401 + and lo_discount between 4 and 6 + and lo_quantity between 26 and 35 diff --git a/sql/core/src/test/resources/ssb/1.3.sql b/sql/core/src/test/resources/ssb/1.3.sql new file mode 100644 index 0000000000000..e9bbf51f051dd --- /dev/null +++ b/sql/core/src/test/resources/ssb/1.3.sql @@ -0,0 +1,6 @@ +select sum(lo_extendedprice*lo_discount) as revenue + from lineorder, date + where lo_orderdate = d_datekey + and d_weeknuminyear = 6 and d_year = 1994 + and lo_discount between 5 and 7 + and lo_quantity between 36 and 40 diff --git a/sql/core/src/test/resources/ssb/2.1.sql b/sql/core/src/test/resources/ssb/2.1.sql new file mode 100644 index 0000000000000..00d402785320b --- /dev/null +++ b/sql/core/src/test/resources/ssb/2.1.sql @@ -0,0 +1,9 @@ +select sum(lo_revenue), d_year, p_brand1 + from lineorder, date, part, supplier + where lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_category = 'MFGR#12' + and s_region = 'AMERICA' + group by d_year, p_brand1 + order by d_year, p_brand1 diff --git a/sql/core/src/test/resources/ssb/2.2.sql b/sql/core/src/test/resources/ssb/2.2.sql new file mode 100644 index 0000000000000..c53a925f75ddb --- /dev/null +++ b/sql/core/src/test/resources/ssb/2.2.sql @@ -0,0 +1,9 @@ +select sum(lo_revenue), d_year, p_brand1 + from lineorder, date, part, supplier + where lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_brand1 between 'MFGR#2221' and 'MFGR#2228' + and s_region = 'ASIA' + group by d_year, p_brand1 + order by d_year, p_brand1 diff --git a/sql/core/src/test/resources/ssb/2.3.sql b/sql/core/src/test/resources/ssb/2.3.sql new file mode 100644 index 0000000000000..e36530c65e428 --- /dev/null +++ b/sql/core/src/test/resources/ssb/2.3.sql @@ -0,0 +1,9 @@ +select sum(lo_revenue), d_year, p_brand1 + from lineorder, date, part, supplier + where lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_brand1 = 'MFGR#2221' + and s_region = 'EUROPE' + group by d_year, p_brand1 + order by d_year, p_brand1 diff --git a/sql/core/src/test/resources/ssb/3.1.sql b/sql/core/src/test/resources/ssb/3.1.sql new file mode 100644 index 0000000000000..663ec3fb3d5fa --- /dev/null +++ b/sql/core/src/test/resources/ssb/3.1.sql @@ -0,0 +1,10 @@ +select c_nation, s_nation, d_year, sum(lo_revenue) as revenue + from customer, lineorder, supplier, date + where lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_region = 'ASIA' + and s_region = 'ASIA' + and d_year >= 1992 and d_year <= 1997 + group by c_nation, s_nation, d_year + order by d_year asc, revenue desc diff --git a/sql/core/src/test/resources/ssb/3.2.sql b/sql/core/src/test/resources/ssb/3.2.sql new file mode 100644 index 0000000000000..e1eaf10cded62 --- /dev/null +++ b/sql/core/src/test/resources/ssb/3.2.sql @@ -0,0 +1,10 @@ +select c_city, s_city, d_year, sum(lo_revenue) as revenue + from customer, lineorder, supplier, date + where lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_nation = 'UNITED STATES' + and s_nation = 'UNITED STATES' + and d_year >= 1992 and d_year <= 1997 + group by c_city, s_city, d_year + order by d_year asc, revenue desc diff --git a/sql/core/src/test/resources/ssb/3.3.sql b/sql/core/src/test/resources/ssb/3.3.sql new file mode 100644 index 0000000000000..f2cb44f15a761 --- /dev/null +++ b/sql/core/src/test/resources/ssb/3.3.sql @@ -0,0 +1,12 @@ +select c_city, s_city, d_year, sum(lo_revenue) as revenue + from customer, lineorder, supplier, date + where lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_nation = 'UNITED KINGDOM' + and (c_city='UNITED KI1' or c_city='UNITED KI5') + and (s_city='UNITED KI1' or s_city='UNITED KI5') + and s_nation = 'UNITED KINGDOM' + and d_year >= 1992 and d_year <= 1997 + group by c_city, s_city, d_year + order by d_year asc, revenue desc diff --git a/sql/core/src/test/resources/ssb/3.4.sql b/sql/core/src/test/resources/ssb/3.4.sql new file mode 100644 index 0000000000000..936f2464a55f1 --- /dev/null +++ b/sql/core/src/test/resources/ssb/3.4.sql @@ -0,0 +1,12 @@ +select c_city, s_city, d_year, sum(lo_revenue) as revenue + from customer, lineorder, supplier, date + where lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_nation = 'UNITED KINGDOM' + and (c_city='UNITED KI1' or c_city='UNITED KI5') + and (s_city='UNITED KI1' or s_city='UNITED KI5') + and s_nation = 'UNITED KINGDOM' + and d_yearmonth = 'Dec1997' + group by c_city, s_city, d_year + order by d_year asc, revenue desc diff --git a/sql/core/src/test/resources/ssb/4.1.sql b/sql/core/src/test/resources/ssb/4.1.sql new file mode 100644 index 0000000000000..af19ecdf58c1f --- /dev/null +++ b/sql/core/src/test/resources/ssb/4.1.sql @@ -0,0 +1,11 @@ +select d_year, c_nation, sum(lo_revenue-lo_supplycost) as profit1 + from date, customer, supplier, part, lineorder + where lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_region = 'AMERICA' + and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') + group by d_year, c_nation + order by d_year, c_nation diff --git a/sql/core/src/test/resources/ssb/4.2.sql b/sql/core/src/test/resources/ssb/4.2.sql new file mode 100644 index 0000000000000..2ffe6e2897522 --- /dev/null +++ b/sql/core/src/test/resources/ssb/4.2.sql @@ -0,0 +1,12 @@ +select d_year, s_nation, p_category, sum(lo_revenue-lo_supplycost) as profit1 + from date, customer, supplier, part, lineorder + where lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_region = 'AMERICA' + and (d_year = 1997 or d_year = 1998) + and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') + group by d_year, s_nation, p_category + order by d_year, s_nation, p_category diff --git a/sql/core/src/test/resources/ssb/4.3.sql b/sql/core/src/test/resources/ssb/4.3.sql new file mode 100644 index 0000000000000..e24e2cc78c914 --- /dev/null +++ b/sql/core/src/test/resources/ssb/4.3.sql @@ -0,0 +1,12 @@ +select d_year, s_city, p_brand1, sum(lo_revenue-lo_supplycost) as profit1 + from date, customer, supplier, part, lineorder + where lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_nation = 'UNITED STATES' + and (d_year = 1997 or d_year = 1998) + and p_category = 'MFGR#14' + group by d_year, s_city, p_brand1 + order by d_year, s_city, p_brand1 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala new file mode 100644 index 0000000000000..9a0c61b3304c5 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.catalyst.util.resourceToString + +/** + * This test suite ensures all the Star Schema Benchmark queries can be successfully analyzed, + * optimized and compiled without hitting the max iteration threshold. + */ +class SSBQuerySuite extends BenchmarkQueryTest { + + override def beforeAll { + super.beforeAll + + sql( + """ + |CREATE TABLE `part` (`p_partkey` INT, `p_name` STRING, `p_mfgr` STRING, + |`p_category` STRING, `p_brand1` STRING, `p_color` STRING, `p_type` STRING, `p_size` INT, + |`p_container` STRING) + |USING parquet + """.stripMargin) + + sql( + """ + |CREATE TABLE `supplier` (`s_suppkey` INT, `s_name` STRING, `s_address` STRING, + |`s_city` STRING, `s_nation` STRING, `s_region` STRING, `s_phone` STRING) + |USING parquet + """.stripMargin) + + sql( + """ + |CREATE TABLE `customer` (`c_custkey` INT, `c_name` STRING, `c_address` STRING, + |`c_city` STRING, `c_nation` STRING, `c_region` STRING, `c_phone` STRING, + |`c_mktsegment` STRING) + |USING parquet + """.stripMargin) + + sql( + """ + |CREATE TABLE `date` (`d_datekey` INT, `d_date` STRING, `d_dayofweek` STRING, + |`d_month` STRING, `d_year` INT, `d_yearmonthnum` INT, `d_yearmonth` STRING, + |`d_daynuminweek` INT, `d_daynuminmonth` INT, `d_daynuminyear` INT, `d_monthnuminyear` INT, + |`d_weeknuminyear` INT, `d_sellingseason` STRING, `d_lastdayinweekfl` STRING, + |`d_lastdayinmonthfl` STRING, `d_holidayfl` STRING, `d_weekdayfl` STRING) + |USING parquet + """.stripMargin) + + sql( + """ + |CREATE TABLE `lineorder` (`lo_orderkey` INT, `lo_linenumber` INT, `lo_custkey` INT, + |`lo_partkey` INT, `lo_suppkey` INT, `lo_orderdate` INT, `lo_orderpriority` STRING, + |`lo_shippriority` STRING, `lo_quantity` INT, `lo_extendedprice` INT, + |`lo_ordertotalprice` INT, `lo_discount` INT, `lo_revenue` INT, `lo_supplycost` INT, + |`lo_tax` INT, `lo_commitdate` INT, `lo_shipmode` STRING) + |USING parquet + """.stripMargin) + } + + val ssbQueries = Seq( + "1.1", "1.2", "1.3", "2.1", "2.2", "2.3", "3.1", "3.2", "3.3", "3.4", "4.1", "4.2", "4.3") + + ssbQueries.foreach { name => + val queryString = resourceToString(s"ssb/$name.sql", + classLoader = Thread.currentThread.getContextClassLoader) + test(name) { + // check the plans can be properly generated + val plan = sql(queryString).queryExecution.executedPlan + checkGeneratedCode(plan) + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala index 69ac92eaed7dc..e3e700529bba7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala @@ -17,9 +17,7 @@ package org.apache.spark.sql -import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.catalyst.util.resourceToString -import org.apache.spark.util.Utils /** * This test suite ensures all the TPC-H queries can be successfully analyzed, optimized