@@ -65,8 +65,8 @@ object TPCDSQueryBenchmark {
6565 " please modify the value of dataLocation to point to your local TPCDS data" )
6666 val tableSizes = setupTables(dataLocation)
6767 queries.foreach { name =>
68- val queryString = fileToString( new File ( Thread .currentThread().getContextClassLoader
69- .getResource( s " tpcds/ $name .sql " ).getFile) )
68+ val queryString = resourceToString( s " tpcds/ $name .sql " , " UTF-8 " ,
69+ Thread .currentThread().getContextClassLoader )
7070
7171 // This is an indirect hack to estimate the size of each query's input by traversing the
7272 // logical plan and adding up the sizes of all tables that appear in the plan. Note that this
@@ -100,6 +100,14 @@ object TPCDSQueryBenchmark {
100100
101101 def main (args : Array [String ]): Unit = {
102102
103+ if (args.length < 1 ) {
104+ // scalastyle:off println
105+ println(
106+ " Usage: spark-submit --class <this class> --jars <spark sql test jar> <data location>" )
107+ // scalastyle:on println
108+ System .exit(1 )
109+ }
110+
103111 // List of all TPC-DS queries
104112 val tpcdsQueries = Seq (
105113 " q1" , " q2" , " q3" , " q4" , " q5" , " q6" , " q7" , " q8" , " q9" , " q10" , " q11" ,
@@ -117,7 +125,7 @@ object TPCDSQueryBenchmark {
117125 // https://github.com/databricks/spark-sql-perf/blob/master/README.md to generate the TPCDS data
118126 // locally (preferably with a scale factor of 5 for benchmarking). Thereafter, the value of
119127 // dataLocation below needs to be set to the location where the generated data is stored.
120- val dataLocation = " "
128+ val dataLocation = args( 0 )
121129
122130 tpcdsAll(dataLocation, queries = tpcdsQueries)
123131 }
0 commit comments