Skip to content

Commit 89b98d4

Browse files
committed
Split TPC-DS build in GitHub Actions
1 parent 3565c3a commit 89b98d4

File tree

2 files changed

+51
-22
lines changed

2 files changed

+51
-22
lines changed

.github/workflows/build_and_test.yml

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -676,9 +676,26 @@ jobs:
676676
- name: Generate TPC-DS (SF=1) table data
677677
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
678678
run: build/sbt "sql/test:runMain org.apache.spark.sql.GenTPCDSData --dsdgenDir `pwd`/tpcds-kit/tools --location `pwd`/tpcds-sf-1 --scaleFactor 1 --numPartitions 1 --overwrite"
679-
- name: Run TPC-DS queries
679+
- name: Run TPC-DS queries (Sort merge join)
680680
run: |
681681
SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
682+
env:
683+
SPARK_TPCDS_JOIN_CONF: |
684+
spark.sql.autoBroadcastJoinThreshold=-1
685+
spark.sql.join.preferSortMergeJoin=true
686+
- name: Run TPC-DS queries (Broadcast hash join)
687+
run: |
688+
SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
689+
env:
690+
SPARK_TPCDS_JOIN_CONF: |
691+
spark.sql.autoBroadcastJoinThreshold=10485760
692+
- name: Run TPC-DS queries (Shuffled hash join)
693+
run: |
694+
SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
695+
env:
696+
SPARK_TPCDS_JOIN_CONF: |
697+
spark.sql.autoBroadcastJoinThreshold=1
698+
spark.sql.join.forceApplyShuffledHashJoin=true
682699
- name: Upload test results to report
683700
if: always()
684701
uses: actions/upload-artifact@v2
@@ -695,7 +712,7 @@ jobs:
695712
docker-integration-tests:
696713
needs: configure-jobs
697714
if: needs.configure-jobs.outputs.type == 'regular'
698-
name: Run docker integration tests
715+
name: Run Docker integration tests
699716
runs-on: ubuntu-20.04
700717
env:
701718
HADOOP_PROFILE: ${{ needs.configure-jobs.outputs.hadoop }}

sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ package org.apache.spark.sql
2020
import java.io.File
2121
import java.nio.file.{Files, Paths}
2222

23+
import scala.collection.JavaConverters._
24+
2325
import org.apache.spark.{SparkConf, SparkContext}
2426
import org.apache.spark.sql.catalyst.util.{fileToString, resourceToString, stringToFile}
2527
import org.apache.spark.sql.internal.SQLConf
@@ -100,9 +102,9 @@ class TPCDSQueryTestSuite extends QueryTest with TPCDSBase with SQLQueryTestHelp
100102
private def runQuery(
101103
query: String,
102104
goldenFile: File,
103-
conf: Seq[(String, String)],
104-
needSort: Boolean): Unit = {
105-
withSQLConf(conf: _*) {
105+
conf: Map[String, String]): Unit = {
106+
val shouldSortResults = sortMergeJoinConf != conf // Sort for other joins
107+
withSQLConf(conf.toSeq: _*) {
106108
try {
107109
val (schema, output) = handleExceptions(getNormalizedResult(spark, query))
108110
val queryString = query.trim
@@ -139,7 +141,7 @@ class TPCDSQueryTestSuite extends QueryTest with TPCDSBase with SQLQueryTestHelp
139141
assertResult(expectedSchema, s"Schema did not match\n$queryString") {
140142
schema
141143
}
142-
if (needSort) {
144+
if (shouldSortResults) {
143145
val expectSorted = expectedOutput.split("\n").sorted.map(_.trim)
144146
.mkString("\n").replaceAll("\\s+$", "")
145147
val outputSorted = output.sorted.map(_.trim).mkString("\n").replaceAll("\\s+$", "")
@@ -171,22 +173,36 @@ class TPCDSQueryTestSuite extends QueryTest with TPCDSBase with SQLQueryTestHelp
171173
SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
172174
"spark.sql.join.forceApplyShuffledHashJoin" -> "true")
173175

174-
val joinConfSet: Set[Map[String, String]] =
175-
Set(sortMergeJoinConf, broadcastHashJoinConf, shuffledHashJoinConf);
176+
val allJoinConfCombinations = Seq(
177+
sortMergeJoinConf, broadcastHashJoinConf, shuffledHashJoinConf)
178+
179+
val joinConfs: Seq[Map[String, String]] = if (regenerateGoldenFiles) {
180+
require(
181+
!sys.env.contains("SPARK_TPCDS_JOIN_CONF"),
182+
"'SPARK_TPCDS_JOIN_CONF' cannot be set together with 'SPARK_GENERATE_GOLDEN_FILES'")
183+
Seq(sortMergeJoinConf)
184+
} else {
185+
sys.env.get("SPARK_TPCDS_JOIN_CONF").map { s =>
186+
val p = new java.util.Properties()
187+
p.load(new java.io.StringReader(s))
188+
Seq(p.asScala.toMap)
189+
}.getOrElse(allJoinConfCombinations)
190+
}
191+
192+
assert(joinConfs.nonEmpty)
193+
joinConfs.foreach(conf => require(
194+
allJoinConfCombinations.contains(conf),
195+
s"Join configurations [$conf] should be one of $allJoinConfCombinations"))
176196

177197
if (tpcdsDataPath.nonEmpty) {
178198
tpcdsQueries.foreach { name =>
179199
val queryString = resourceToString(s"tpcds/$name.sql",
180200
classLoader = Thread.currentThread().getContextClassLoader)
181201
test(name) {
182202
val goldenFile = new File(s"$baseResourcePath/v1_4", s"$name.sql.out")
183-
System.gc() // Workaround for GitHub Actions memory limitation, see also SPARK-37368
184-
runQuery(queryString, goldenFile, joinConfSet.head.toSeq, false)
185-
if (!regenerateGoldenFiles) {
186-
joinConfSet.tail.foreach { conf =>
187-
System.gc() // SPARK-37368
188-
runQuery(queryString, goldenFile, conf.toSeq, true)
189-
}
203+
joinConfs.foreach { conf =>
204+
System.gc() // Workaround for GitHub Actions memory limitation, see also SPARK-37368
205+
runQuery(queryString, goldenFile, conf)
190206
}
191207
}
192208
}
@@ -196,13 +212,9 @@ class TPCDSQueryTestSuite extends QueryTest with TPCDSBase with SQLQueryTestHelp
196212
classLoader = Thread.currentThread().getContextClassLoader)
197213
test(s"$name-v2.7") {
198214
val goldenFile = new File(s"$baseResourcePath/v2_7", s"$name.sql.out")
199-
System.gc() // SPARK-37368
200-
runQuery(queryString, goldenFile, joinConfSet.head.toSeq, false)
201-
if (!regenerateGoldenFiles) {
202-
joinConfSet.tail.foreach { conf =>
203-
System.gc() // SPARK-37368
204-
runQuery(queryString, goldenFile, conf.toSeq, true)
205-
}
215+
joinConfs.foreach { conf =>
216+
System.gc() // SPARK-37368
217+
runQuery(queryString, goldenFile, conf)
206218
}
207219
}
208220
}

0 commit comments

Comments
 (0)