From 58e2285f12801daa46e5ea3a6d8e8b9e7d4fc3ee Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 16 Sep 2019 22:36:32 +0500 Subject: [PATCH 1/7] Add benchmarks for make_date() --- .../benchmark/MakeDateTimeBenchmark.scala | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala new file mode 100644 index 0000000000000..97a34a535e272 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.benchmark + +import org.apache.spark.benchmark.Benchmark +import org.apache.spark.sql.internal.SQLConf + +/** + * Synthetic benchmark for the make_date() and make_timestamp() functions. + * To run this benchmark: + * {{{ + * 1. without sbt: + * bin/spark-submit --class --jars + * 2. build/sbt "sql/test:runMain " + * 3. generate result: + * SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain " + * Results will be written to "benchmarks/MakeDateTimeBenchmark-results.txt". + * }}} + */ +object MakeDateTimeBenchmark extends SqlBasedBenchmark { + private def doBenchmark(cardinality: Long, exprs: String*): Unit = { + withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") { + spark + .range(0, cardinality, 1, 1) + .selectExpr(exprs: _*) + .write + .format("noop") + .save() + } + } + + private def run( + benchmark: Benchmark, + cardinality: Long, + name: String, + exprs: String*): Unit = { + benchmark.addCase(name, numIters = 3) { _ => + doBenchmark(cardinality, exprs: _*) + } + } + + private def benchmarkMakeDate(cardinality: Long): Unit = { + val benchmark = new Benchmark(s"make_date()", cardinality, output = output) + val ymdExprs = Seq("(1900 + (id % 200))", "((id % 12) + 1)", "((id % 27) + 1)") + + run(benchmark, cardinality, "prepare make_date()", ymdExprs: _*) + val foldableExpr = "make_date(2019, 9, 16)" + run(benchmark, cardinality, foldableExpr, foldableExpr) + run( + benchmark, + cardinality, + "make_date(1900..2099, 1..12, 1..28)", + "make_date" + ymdExprs.mkString("(", ",", ")")) + + benchmark.run() + } + + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { + val N = 100000000L + + benchmarkMakeDate(N) + } +} From 2ff11bbc67da3aeb06c37cdb1ea5c8a0708d168f Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 17 Sep 2019 10:21:27 +0500 Subject: [PATCH 2/7] Add benchmarks for make_timestamp() --- .../benchmark/MakeDateTimeBenchmark.scala | 65 +++++++++++++------ 1 file changed, 46 insertions(+), 19 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala index 97a34a535e272..0447e75190426 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala @@ -33,7 +33,9 @@ import org.apache.spark.sql.internal.SQLConf * }}} */ object MakeDateTimeBenchmark extends SqlBasedBenchmark { - private def doBenchmark(cardinality: Long, exprs: String*): Unit = { + private val cardinality: Long = 100000000L + + private def doBenchmark(exprs: String*): Unit = { withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") { spark .range(0, cardinality, 1, 1) @@ -44,35 +46,60 @@ object MakeDateTimeBenchmark extends SqlBasedBenchmark { } } - private def run( - benchmark: Benchmark, - cardinality: Long, - name: String, - exprs: String*): Unit = { - benchmark.addCase(name, numIters = 3) { _ => - doBenchmark(cardinality, exprs: _*) - } + private def run(benchmark: Benchmark, name: String, exprs: String*): Unit = { + benchmark.addCase(name, numIters = 3) { _ => doBenchmark(exprs: _*) } } - private def benchmarkMakeDate(cardinality: Long): Unit = { + private val ymdExprs = Seq("(1900 + (id % 200))", "((id % 12) + 1)", "((id % 27) + 1)") + + private def benchmarkMakeDate(): Unit = { val benchmark = new Benchmark(s"make_date()", cardinality, output = output) - val ymdExprs = Seq("(1900 + (id % 200))", "((id % 12) + 1)", "((id % 27) + 1)") + val args = ymdExprs - run(benchmark, cardinality, "prepare make_date()", ymdExprs: _*) + run(benchmark, "prepare make_date()", args: _*) val foldableExpr = "make_date(2019, 9, 16)" - run(benchmark, cardinality, foldableExpr, foldableExpr) + run(benchmark, foldableExpr, foldableExpr) run( benchmark, - cardinality, - "make_date(1900..2099, 1..12, 1..28)", - "make_date" + ymdExprs.mkString("(", ",", ")")) + "make_date(*, *, *)", + "make_date" + args.mkString("(", ",", ")")) benchmark.run() } - override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { - val N = 100000000L + private def benchmarkMakeTimestamp(): Unit = { + val benchmark = new Benchmark(s"make_date()", cardinality, output = output) + val hmsExprs = Seq("id % 24", "id % 60", "cast(id % 60000000 as decimal(8, 6))") + val args = ymdExprs ++ hmsExprs + + run( + benchmark, + "prepare make_timestamp()", + args: _*) + var foldableExpr = "make_timestamp(2019, 1, 2, 3, 4, 50.123456)" + run(benchmark, foldableExpr, foldableExpr) + foldableExpr = "make_timestamp(2019, 1, 2, 3, 4, 60.000000)" + run(benchmark, foldableExpr, foldableExpr) + foldableExpr = "make_timestamp(2019, 12, 31, 23, 59, 60.00)" + run(benchmark, foldableExpr, foldableExpr) + run( + benchmark, + "make_timestamp(2019, 1, 2, *, *, *)", + s"make_timestamp(2019, 1, 2, ${hmsExprs.mkString(",")})") + run( + benchmark, + "make_timestamp(*, *, *, 3, 4, 50.123456)", + s"make_timestamp(${ymdExprs.mkString(",")}, 3, 4, 50.123456)") + run( + benchmark, + "make_timestamp(*, *, *, *, *, *)", + s"make_timestamp" + args.mkString("(", ", ", ")")) - benchmarkMakeDate(N) + benchmark.run() + } + + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { + benchmarkMakeDate() + benchmarkMakeTimestamp() } } From ff6d47dc160bae6222503d8937ca75791e784827 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 17 Sep 2019 11:51:58 +0500 Subject: [PATCH 3/7] Additional benchmarks for make_timestamp() --- .../benchmark/MakeDateTimeBenchmark.scala | 48 +++++++++++-------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala index 0447e75190426..6e9f40975f219 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala @@ -33,9 +33,8 @@ import org.apache.spark.sql.internal.SQLConf * }}} */ object MakeDateTimeBenchmark extends SqlBasedBenchmark { - private val cardinality: Long = 100000000L - private def doBenchmark(exprs: String*): Unit = { + private def doBenchmark(cardinality: Long, exprs: String*): Unit = { withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") { spark .range(0, cardinality, 1, 1) @@ -46,52 +45,63 @@ object MakeDateTimeBenchmark extends SqlBasedBenchmark { } } - private def run(benchmark: Benchmark, name: String, exprs: String*): Unit = { - benchmark.addCase(name, numIters = 3) { _ => doBenchmark(exprs: _*) } + private def run(benchmark: Benchmark, cardinality: Long, name: String, exprs: String*): Unit = { + benchmark.addCase(name, numIters = 3) { _ => doBenchmark(cardinality, exprs: _*) } } - private val ymdExprs = Seq("(1900 + (id % 200))", "((id % 12) + 1)", "((id % 27) + 1)") + private val ymdExprs = Seq("(2000 + (id % 30))", "((id % 12) + 1)", "((id % 27) + 1)") - private def benchmarkMakeDate(): Unit = { + private def benchmarkMakeDate(cardinality: Long): Unit = { val benchmark = new Benchmark(s"make_date()", cardinality, output = output) val args = ymdExprs - run(benchmark, "prepare make_date()", args: _*) + run(benchmark, cardinality, "prepare make_date()", args: _*) val foldableExpr = "make_date(2019, 9, 16)" - run(benchmark, foldableExpr, foldableExpr) + run(benchmark, cardinality, foldableExpr, foldableExpr) run( benchmark, + cardinality, "make_date(*, *, *)", "make_date" + args.mkString("(", ",", ")")) benchmark.run() } - private def benchmarkMakeTimestamp(): Unit = { + private def benchmarkMakeTimestamp(cardinality: Long): Unit = { val benchmark = new Benchmark(s"make_date()", cardinality, output = output) - val hmsExprs = Seq("id % 24", "id % 60", "cast(id % 60000000 as decimal(8, 6))") + val hmExprs = Seq("id % 24", "id % 60") + val hmsExprs = hmExprs ++ Seq("cast((id % 60000000) / 1000000.0 as decimal(8, 6))") val args = ymdExprs ++ hmsExprs run( benchmark, + cardinality, "prepare make_timestamp()", args: _*) var foldableExpr = "make_timestamp(2019, 1, 2, 3, 4, 50.123456)" - run(benchmark, foldableExpr, foldableExpr) + run(benchmark, cardinality, foldableExpr, foldableExpr) foldableExpr = "make_timestamp(2019, 1, 2, 3, 4, 60.000000)" - run(benchmark, foldableExpr, foldableExpr) + run(benchmark, cardinality, foldableExpr, foldableExpr) foldableExpr = "make_timestamp(2019, 12, 31, 23, 59, 60.00)" - run(benchmark, foldableExpr, foldableExpr) - run( - benchmark, - "make_timestamp(2019, 1, 2, *, *, *)", - s"make_timestamp(2019, 1, 2, ${hmsExprs.mkString(",")})") + run(benchmark, cardinality, foldableExpr, foldableExpr) run( benchmark, + cardinality, "make_timestamp(*, *, *, 3, 4, 50.123456)", s"make_timestamp(${ymdExprs.mkString(",")}, 3, 4, 50.123456)") run( benchmark, + cardinality, + "make_timestamp(*, *, *, *, *, 60.0)", + s"make_timestamp(" + (ymdExprs ++ hmExprs).mkString(", ") + ", 60.0)") + run( + benchmark, + cardinality, + "make_timestamp(2019, 1, 2, *, *, *)", + s"make_timestamp(2019, 1, 2, ${hmsExprs.mkString(",")})") + run( + benchmark, + cardinality, "make_timestamp(*, *, *, *, *, *)", s"make_timestamp" + args.mkString("(", ", ", ")")) @@ -99,7 +109,7 @@ object MakeDateTimeBenchmark extends SqlBasedBenchmark { } override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { - benchmarkMakeDate() - benchmarkMakeTimestamp() + benchmarkMakeDate(100000000L) + benchmarkMakeTimestamp(1000000L) } } From 4b5878cfa2ed1da94495a58f33f5980743df4c7e Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 17 Sep 2019 12:46:42 +0500 Subject: [PATCH 4/7] Add benchmark for make_timestamp(*, *, *, *, *, 0) --- .../sql/execution/benchmark/MakeDateTimeBenchmark.scala | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala index 6e9f40975f219..58be621084498 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala @@ -89,6 +89,11 @@ object MakeDateTimeBenchmark extends SqlBasedBenchmark { cardinality, "make_timestamp(*, *, *, 3, 4, 50.123456)", s"make_timestamp(${ymdExprs.mkString(",")}, 3, 4, 50.123456)") + run( + benchmark, + cardinality, + "make_timestamp(*, *, *, *, *, 0)", + s"make_timestamp(" + (ymdExprs ++ hmExprs).mkString(", ") + ", 0)") run( benchmark, cardinality, From 8f20dc88f9e0bee023007fced71438180edbdd79 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 17 Sep 2019 12:47:13 +0500 Subject: [PATCH 5/7] Generate results --- .../MakeDateTimeBenchmark-results.txt | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 sql/core/benchmarks/MakeDateTimeBenchmark-results.txt diff --git a/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt b/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt new file mode 100644 index 0000000000000..c7da9b62aaf00 --- /dev/null +++ b/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt @@ -0,0 +1,22 @@ +Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.6 +Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz +make_date(): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +prepare make_date() 2149 2289 196 46.5 21.5 1.0X +make_date(2019, 9, 16) 1829 1868 58 54.7 18.3 1.2X +make_date(*, *, *) 3180 3339 139 31.4 31.8 0.7X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.6 +Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz +make_date(): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +prepare make_timestamp() 2950 3025 96 0.3 2950.3 1.0X +make_timestamp(2019, 1, 2, 3, 4, 50.123456) 45 47 1 22.1 45.2 65.3X +make_timestamp(2019, 1, 2, 3, 4, 60.000000) 42 42 1 24.0 41.7 70.8X +make_timestamp(2019, 12, 31, 23, 59, 60.00) 41 42 1 24.2 41.3 71.4X +make_timestamp(*, *, *, 3, 4, 50.123456) 252 256 7 4.0 251.5 11.7X +make_timestamp(*, *, *, *, *, 0) 225 227 3 4.5 224.6 13.1X +make_timestamp(*, *, *, *, *, 60.0) 230 233 2 4.3 230.4 12.8X +make_timestamp(2019, 1, 2, *, *, *) 3078 3118 35 0.3 3078.5 1.0X +make_timestamp(*, *, *, *, *, *) 3092 3109 17 0.3 3092.4 1.0X + From 7669d245cef4efa22f71b887e388b8e0930e5d00 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 17 Sep 2019 13:06:11 +0500 Subject: [PATCH 6/7] Fix title of the make_timestamp benchmark --- sql/core/benchmarks/MakeDateTimeBenchmark-results.txt | 2 +- .../spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt b/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt index c7da9b62aaf00..a3c89d643e912 100644 --- a/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt +++ b/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt @@ -8,7 +8,7 @@ make_date(*, *, *) 3180 3339 1 Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.6 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz -make_date(): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +make_timestamp(): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ prepare make_timestamp() 2950 3025 96 0.3 2950.3 1.0X make_timestamp(2019, 1, 2, 3, 4, 50.123456) 45 47 1 22.1 45.2 65.3X diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala index 58be621084498..82c09b6e7b96f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala @@ -68,7 +68,7 @@ object MakeDateTimeBenchmark extends SqlBasedBenchmark { } private def benchmarkMakeTimestamp(cardinality: Long): Unit = { - val benchmark = new Benchmark(s"make_date()", cardinality, output = output) + val benchmark = new Benchmark(s"make_timestamp()", cardinality, output = output) val hmExprs = Seq("id % 24", "id % 60") val hmsExprs = hmExprs ++ Seq("cast((id % 60000000) / 1000000.0 as decimal(8, 6))") val args = ymdExprs ++ hmsExprs From cc8061fd7b09c6d55632d39282c5a562ede1f009 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 17 Sep 2019 13:07:51 +0500 Subject: [PATCH 7/7] Remove s --- .../spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala index 82c09b6e7b96f..7f7908544693f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala @@ -52,7 +52,7 @@ object MakeDateTimeBenchmark extends SqlBasedBenchmark { private val ymdExprs = Seq("(2000 + (id % 30))", "((id % 12) + 1)", "((id % 27) + 1)") private def benchmarkMakeDate(cardinality: Long): Unit = { - val benchmark = new Benchmark(s"make_date()", cardinality, output = output) + val benchmark = new Benchmark("make_date()", cardinality, output = output) val args = ymdExprs run(benchmark, cardinality, "prepare make_date()", args: _*) @@ -68,7 +68,7 @@ object MakeDateTimeBenchmark extends SqlBasedBenchmark { } private def benchmarkMakeTimestamp(cardinality: Long): Unit = { - val benchmark = new Benchmark(s"make_timestamp()", cardinality, output = output) + val benchmark = new Benchmark("make_timestamp()", cardinality, output = output) val hmExprs = Seq("id % 24", "id % 60") val hmsExprs = hmExprs ++ Seq("cast((id % 60000000) / 1000000.0 as decimal(8, 6))") val args = ymdExprs ++ hmsExprs