Skip to content

Commit e90fa00

Browse files
committed
Address comments
1 parent 8ffba61 commit e90fa00

File tree

1 file changed

+37
-33
lines changed

1 file changed

+37
-33
lines changed

sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceWriteBenchmark.scala

Lines changed: 37 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -55,44 +55,43 @@ object DataSourceWriteBenchmark {
5555
}
5656
}
5757

58-
def writeInt(table: String, format: String, benchmark: Benchmark): Unit = {
59-
spark.sql(s"create table $table(c1 INT, c2 STRING) using $format")
60-
benchmark.addCase("Output Single Int Column") { _ =>
61-
spark.sql(s"INSERT overwrite table $table select cast(id as INT) as " +
62-
s"c1, cast(id as STRING) as c2 from $tempTable")
58+
def writeNumeric(table: String, format: String, benchmark: Benchmark, dataType: String): Unit = {
59+
spark.sql(s"create table $table(id $dataType) using $format")
60+
benchmark.addCase(s"Output Single $dataType Column") { _ =>
61+
spark.sql(s"INSERT OVERWRITE TABLE $table SELECT CAST(id AS $dataType) AS c1 FROM $tempTable")
6362
}
6463
}
6564

6665
def writeIntString(table: String, format: String, benchmark: Benchmark): Unit = {
67-
spark.sql(s"create table $table(c1 INT, c2 STRING) using $format")
66+
spark.sql(s"CREATE TABLE $table(c1 INT, c2 STRING) USING $format")
6867
benchmark.addCase("Output Int and String Column") { _ =>
69-
spark.sql(s"INSERT overwrite table $table select cast(id as INT) as " +
70-
s"c1, cast(id as STRING) as c2 from $tempTable")
68+
spark.sql(s"INSERT OVERWRITE TABLE $table SELECT CAST(id AS INT) AS " +
69+
s"c1, CAST(id AS STRING) AS c2 FROM $tempTable")
7170
}
7271
}
7372

7473
def writePartition(table: String, format: String, benchmark: Benchmark): Unit = {
75-
spark.sql(s"create table $table(p INT, id INT) using $format PARTITIONED BY (p)")
74+
spark.sql(s"CREATE TABLE $table(p INT, id INT) USING $format PARTITIONED BY (p)")
7675
benchmark.addCase("Output Partitions") { _ =>
77-
spark.sql(s"INSERT overwrite table $table select cast(id as INT) as id," +
78-
s" cast(id % 2 as INT) as p from $tempTable")
76+
spark.sql(s"INSERT OVERWRITE TABLE $table SELECT CAST(id AS INT) AS id," +
77+
s" CAST(id % 2 AS INT) AS p FROM $tempTable")
7978
}
8079
}
8180

8281
def writeBucket(table: String, format: String, benchmark: Benchmark): Unit = {
83-
spark.sql(s"create table $table(c1 INT, c2 INT) using $format CLUSTERED BY (c2) INTO 2 BUCKETS")
82+
spark.sql(s"CREATE TABLE $table(c1 INT, c2 INT) USING $format CLUSTERED BY (c2) INTO 2 BUCKETS")
8483
benchmark.addCase("Output Buckets") { _ =>
85-
spark.sql(s"INSERT overwrite table $table select cast(id as INT) as " +
86-
s"c1, cast(id as INT) as c2 from $tempTable")
84+
spark.sql(s"INSERT OVERWRITE TABLE $table SELECT CAST(id AS INT) AS " +
85+
s"c1, CAST(id AS INT) AS c2 FROM $tempTable")
8786
}
8887
}
8988

9089
def main(args: Array[String]): Unit = {
9190
val tableInt = "tableInt"
91+
val tableDouble = "tableDouble"
9292
val tableIntString = "tableIntString"
9393
val tablePartition = "tablePartition"
9494
val tableBucket = "tableBucket"
95-
// If the
9695
val formats: Seq[String] = if (args.isEmpty) {
9796
Seq("Parquet", "ORC", "JSON", "CSV")
9897
} else {
@@ -102,38 +101,43 @@ object DataSourceWriteBenchmark {
102101
Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
103102
Parquet writer benchmark: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
104103
------------------------------------------------------------------------------------------------
105-
Output Single Int Column 6054 / 6070 2.6 384.9 1.0X
106-
Output Int and String Column 5784 / 5800 2.7 367.8 1.0X
107-
Output Partitions 3891 / 3904 4.0 247.4 1.6X
108-
Output Buckets 5446 / 5729 2.9 346.2 1.1X
104+
Output Single Int Column 1815 / 1932 8.7 115.4 1.0X
105+
Output Single Double Column 1877 / 1878 8.4 119.3 1.0X
106+
Output Int and String Column 6265 / 6543 2.5 398.3 0.3X
107+
Output Partitions 4067 / 4457 3.9 258.6 0.4X
108+
Output Buckets 5608 / 5820 2.8 356.6 0.3X
109109
110110
ORC writer benchmark: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
111111
------------------------------------------------------------------------------------------------
112-
Output Single Int Column 5734 / 5823 2.7 364.6 1.0X
113-
Output Int and String Column 5802 / 5839 2.7 368.9 1.0X
114-
Output Partitions 3384 / 3671 4.6 215.1 1.7X
115-
Output Buckets 4950 / 4988 3.2 314.7 1.2X
112+
Output Single Int Column 1201 / 1239 13.1 76.3 1.0X
113+
Output Single Double Column 1542 / 1600 10.2 98.0 0.8X
114+
Output Int and String Column 6495 / 6580 2.4 412.9 0.2X
115+
Output Partitions 3648 / 3842 4.3 231.9 0.3X
116+
Output Buckets 5022 / 5145 3.1 319.3 0.2X
116117
117118
JSON writer benchmark: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
118119
------------------------------------------------------------------------------------------------
119-
Output Single Int Column 5576 / 5594 2.8 354.5 1.0X
120-
Output Int and String Column 5550 / 5620 2.8 352.9 1.0X
121-
Output Partitions 3727 / 4100 4.2 237.0 1.5X
122-
Output Buckets 5316 / 5852 3.0 338.0 1.0X
120+
Output Single Int Column 1988 / 2093 7.9 126.4 1.0X
121+
Output Single Double Column 2854 / 2911 5.5 181.4 0.7X
122+
Output Int and String Column 6467 / 6653 2.4 411.1 0.3X
123+
Output Partitions 4548 / 5055 3.5 289.1 0.4X
124+
Output Buckets 5664 / 5765 2.8 360.1 0.4X
123125
124126
CSV writer benchmark: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
125127
------------------------------------------------------------------------------------------------
126-
Output Single Int Column 7064 / 8714 2.2 449.1 1.0X
127-
Output Int and String Column 7114 / 7663 2.2 452.3 1.0X
128-
Output Partitions 5771 / 6228 2.7 366.9 1.2X
129-
Output Buckets 7414 / 7479 2.1 471.3 1.0X
128+
Output Single Int Column 3025 / 3190 5.2 192.3 1.0X
129+
Output Single Double Column 3575 / 3634 4.4 227.3 0.8X
130+
Output Int and String Column 7313 / 7399 2.2 464.9 0.4X
131+
Output Partitions 5105 / 5190 3.1 324.6 0.6X
132+
Output Buckets 6986 / 6992 2.3 444.1 0.4X
130133
*/
131134
withTempTable(tempTable) {
132135
spark.range(numRows).createOrReplaceTempView(tempTable)
133136
formats.foreach { format =>
134-
withTable(tableInt, tableIntString, tablePartition, tableBucket) {
137+
withTable(tableInt, tableDouble, tableIntString, tablePartition, tableBucket) {
135138
val benchmark = new Benchmark(s"$format writer benchmark", numRows)
136-
writeInt(tableInt, format, benchmark)
139+
writeNumeric(tableInt, format, benchmark, "Int")
140+
writeNumeric(tableDouble, format, benchmark, "Double")
137141
writeIntString(tableIntString, format, benchmark)
138142
writePartition(tablePartition, format, benchmark)
139143
writeBucket(tableBucket, format, benchmark)

0 commit comments

Comments
 (0)