Skip to content

Commit 190c57b

Browse files
maropudongjoon-hyun
authored andcommitted
[SPARK-34795][SPARK-35192][SPARK-35293][SPARK-35327][SQL][TESTS][3.0] Adds a new job in GitHub Actions to check the output of TPC-DS queries
### What changes were proposed in this pull request? This PR proposes to add a new job in GitHub Actions to check the output of TPC-DS queries. NOTE: To generate TPC-DS table data in GA jobs, this PR includes generator code implemented in #32243 and #32460. This is the backport PR of #31886. ### Why are the changes needed? There are some cases where we noticed runtime-realted bugs after merging commits (e.g. .SPARK-33822). Therefore, I think it is worth adding a new job in GitHub Actions to check query output of TPC-DS (sf=1). ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? The new test added. Closes #32479 from maropu/TPCDSQueryTestSuite-Branch3.0. Authored-by: Takeshi Yamamuro <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 2831c62 commit 190c57b

File tree

141 files changed

+18164
-126
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

141 files changed

+18164
-126
lines changed

.github/workflows/build_and_test.yml

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,3 +385,67 @@ jobs:
385385
# It uses Maven's 'install' intentionally, see https://github.com/apache/spark/pull/26414.
386386
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install
387387
rm -rf ~/.m2/repository/org/apache/spark
388+
389+
tpcds-1g:
390+
name: Run TPC-DS queries with SF=1
391+
runs-on: ubuntu-20.04
392+
env:
393+
SPARK_LOCAL_IP: localhost
394+
steps:
395+
- name: Checkout Spark repository
396+
uses: actions/checkout@v2
397+
- name: Cache Scala, SBT and Maven
398+
uses: actions/cache@v2
399+
with:
400+
path: |
401+
build/apache-maven-*
402+
build/scala-*
403+
build/*.jar
404+
~/.sbt
405+
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
406+
restore-keys: |
407+
build-
408+
- name: Cache Ivy local repository
409+
uses: actions/cache@v2
410+
with:
411+
path: ~/.ivy2/cache
412+
key: tpcds-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
413+
restore-keys: |
414+
tpcds-ivy-
415+
- name: Install Java 8
416+
uses: actions/setup-java@v1
417+
with:
418+
java-version: 8
419+
- name: Cache TPC-DS generated data
420+
id: cache-tpcds-sf-1
421+
uses: actions/cache@v2
422+
with:
423+
path: ./tpcds-sf-1
424+
key: tpcds-${{ hashFiles('.github/workflows/build_and_test.yml', 'sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala') }}
425+
- name: Checkout tpcds-kit repository
426+
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
427+
uses: actions/checkout@v2
428+
with:
429+
repository: databricks/tpcds-kit
430+
path: ./tpcds-kit
431+
- name: Build tpcds-kit
432+
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
433+
run: cd tpcds-kit/tools && make OS=LINUX
434+
- name: Generate TPC-DS (SF=1) table data
435+
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
436+
run: build/sbt "sql/test:runMain org.apache.spark.sql.GenTPCDSData --dsdgenDir `pwd`/tpcds-kit/tools --location `pwd`/tpcds-sf-1 --scaleFactor 1 --numPartitions 1 --overwrite"
437+
- name: Run TPC-DS queries
438+
run: |
439+
SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
440+
- name: Upload test results to report
441+
if: always()
442+
uses: actions/upload-artifact@v2
443+
with:
444+
name: test-results-tpcds--8-hadoop3.2-hive2.3
445+
path: "**/target/test-reports/*.xml"
446+
- name: Upload unit tests log files
447+
if: failure()
448+
uses: actions/upload-artifact@v2
449+
with:
450+
name: unit-tests-log-tpcds--8-hadoop3.2-hive2.3
451+
path: "**/target/unit-tests.log"
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
-- Automatically generated by TPCDSQueryTestSuite
2+
3+
-- !query schema
4+
struct<c_customer_id:string>
5+
-- !query output
6+
AAAAAAAAAAABAAAA
7+
AAAAAAAAAAAHBAAA
8+
AAAAAAAAAAAMAAAA
9+
AAAAAAAAAAAOAAAA
10+
AAAAAAAAAABCBAAA
11+
AAAAAAAAAABEAAAA
12+
AAAAAAAAAABFAAAA
13+
AAAAAAAAAACFBAAA
14+
AAAAAAAAAACFBAAA
15+
AAAAAAAAAADBBAAA
16+
AAAAAAAAAADOAAAA
17+
AAAAAAAAAADPAAAA
18+
AAAAAAAAAAEABAAA
19+
AAAAAAAAAAEEAAAA
20+
AAAAAAAAAAEGBAAA
21+
AAAAAAAAAAENAAAA
22+
AAAAAAAAAAFCBAAA
23+
AAAAAAAAAAFEBAAA
24+
AAAAAAAAAAFGAAAA
25+
AAAAAAAAAAFLAAAA
26+
AAAAAAAAAAFPAAAA
27+
AAAAAAAAAAGCAAAA
28+
AAAAAAAAAAGEAAAA
29+
AAAAAAAAAAGIBAAA
30+
AAAAAAAAAAGOAAAA
31+
AAAAAAAAAAHABAAA
32+
AAAAAAAAAAHGBAAA
33+
AAAAAAAAAAHHAAAA
34+
AAAAAAAAAAHMAAAA
35+
AAAAAAAAAAHPAAAA
36+
AAAAAAAAAAHPAAAA
37+
AAAAAAAAAAHPAAAA
38+
AAAAAAAAAAJAAAAA
39+
AAAAAAAAAAJEBAAA
40+
AAAAAAAAAAJMAAAA
41+
AAAAAAAAAAJPAAAA
42+
AAAAAAAAAAKBBAAA
43+
AAAAAAAAAAKGBAAA
44+
AAAAAAAAAAKHBAAA
45+
AAAAAAAAAAKLAAAA
46+
AAAAAAAAAALCAAAA
47+
AAAAAAAAAALJAAAA
48+
AAAAAAAAAALJAAAA
49+
AAAAAAAAAAMABAAA
50+
AAAAAAAAAAMGAAAA
51+
AAAAAAAAAAMLAAAA
52+
AAAAAAAAAAMMAAAA
53+
AAAAAAAAAANHBAAA
54+
AAAAAAAAAANMAAAA
55+
AAAAAAAAAAOBBAAA
56+
AAAAAAAAAAPDAAAA
57+
AAAAAAAAAAPKAAAA
58+
AAAAAAAAAAPLAAAA
59+
AAAAAAAAABANAAAA
60+
AAAAAAAAABCCBAAA
61+
AAAAAAAAABCGAAAA
62+
AAAAAAAAABDABAAA
63+
AAAAAAAAABDBAAAA
64+
AAAAAAAAABDEAAAA
65+
AAAAAAAAABDEBAAA
66+
AAAAAAAAABDEBAAA
67+
AAAAAAAAABDFBAAA
68+
AAAAAAAAABDOAAAA
69+
AAAAAAAAABDOAAAA
70+
AAAAAAAAABEBBAAA
71+
AAAAAAAAABEDAAAA
72+
AAAAAAAAABEEAAAA
73+
AAAAAAAAABEEBAAA
74+
AAAAAAAAABEIBAAA
75+
AAAAAAAAABEOAAAA
76+
AAAAAAAAABFFBAAA
77+
AAAAAAAAABFHAAAA
78+
AAAAAAAAABFNAAAA
79+
AAAAAAAAABFOAAAA
80+
AAAAAAAAABGAAAAA
81+
AAAAAAAAABHDBAAA
82+
AAAAAAAAABHGAAAA
83+
AAAAAAAAABHGBAAA
84+
AAAAAAAAABHLAAAA
85+
AAAAAAAAABIAAAAA
86+
AAAAAAAAABIBAAAA
87+
AAAAAAAAABIDBAAA
88+
AAAAAAAAABIEBAAA
89+
AAAAAAAAABKLAAAA
90+
AAAAAAAAABKNAAAA
91+
AAAAAAAAABKNAAAA
92+
AAAAAAAAABLJAAAA
93+
AAAAAAAAABLNAAAA
94+
AAAAAAAAABMAAAAA
95+
AAAAAAAAABMEBAAA
96+
AAAAAAAAABMPAAAA
97+
AAAAAAAAABNABAAA
98+
AAAAAAAAABNBAAAA
99+
AAAAAAAAABNEAAAA
100+
AAAAAAAAABNEAAAA
101+
AAAAAAAAABNGAAAA
102+
AAAAAAAAABNNAAAA
103+
AAAAAAAAABOEAAAA
104+
AAAAAAAAABOGBAAA
105+
AAAAAAAAABPABAAA
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
-- Automatically generated by TPCDSQueryTestSuite
2+
3+
-- !query schema
4+
struct<cd_gender:string,cd_marital_status:string,cd_education_status:string,cnt1:bigint,cd_purchase_estimate:int,cnt2:bigint,cd_credit_rating:string,cnt3:bigint,cd_dep_count:int,cnt4:bigint,cd_dep_employed_count:int,cnt5:bigint,cd_dep_college_count:int,cnt6:bigint>
5+
-- !query output
6+
F W 4 yr Degree 1 4000 1 Low Risk 1 4 1 6 1 4 1
7+
M D 4 yr Degree 1 1500 1 Low Risk 1 3 1 4 1 2 1
8+
M S College 1 4500 1 High Risk 1 3 1 4 1 3 1
9+
M S Primary 1 9500 1 Low Risk 1 3 1 0 1 6 1
10+
M S Secondary 1 3000 1 High Risk 1 1 1 1 1 4 1
11+
M U 4 yr Degree 1 2000 1 Low Risk 1 3 1 1 1 3 1
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
-- Automatically generated by TPCDSQueryTestSuite
2+
3+
-- !query schema
4+
struct<customer_preferred_cust_flag:string>
5+
-- !query output
6+
NULL
7+
NULL
8+
NULL
9+
NULL
10+
N
11+
N
12+
N
13+
N
14+
N
15+
N
16+
N
17+
N
18+
N
19+
N
20+
N
21+
N
22+
N
23+
N
24+
N
25+
N
26+
N
27+
N
28+
N
29+
N
30+
N
31+
N
32+
N
33+
N
34+
N
35+
N
36+
N
37+
N
38+
N
39+
N
40+
N
41+
N
42+
N
43+
N
44+
N
45+
N
46+
N
47+
N
48+
N
49+
N
50+
N
51+
N
52+
N
53+
N
54+
N
55+
Y
56+
Y
57+
Y
58+
Y
59+
Y
60+
Y
61+
Y
62+
Y
63+
Y
64+
Y
65+
Y
66+
Y
67+
Y
68+
Y
69+
Y
70+
Y
71+
Y
72+
Y
73+
Y
74+
Y
75+
Y
76+
Y
77+
Y
78+
Y
79+
Y
80+
Y
81+
Y
82+
Y
83+
Y
84+
Y
85+
Y
86+
Y
87+
Y
88+
Y
89+
Y
90+
Y
91+
Y
92+
Y
93+
Y
94+
Y
95+
Y
96+
Y
97+
Y
98+
Y
99+
Y

0 commit comments

Comments
 (0)