From 267d0b5c732f6cb12022f586ced6041337d90971 Mon Sep 17 00:00:00 2001 From: yucai Date: Fri, 21 Sep 2018 18:44:41 +0800 Subject: [PATCH 1/3] [SPARK-25486][TEST] Refactor SortBenchmark to use main method --- sql/core/benchmarks/SortBenchmark-results.txt | 17 +++++++++ .../execution/benchmark/SortBenchmark.scala | 36 ++++++++----------- 2 files changed, 31 insertions(+), 22 deletions(-) create mode 100644 sql/core/benchmarks/SortBenchmark-results.txt diff --git a/sql/core/benchmarks/SortBenchmark-results.txt b/sql/core/benchmarks/SortBenchmark-results.txt new file mode 100644 index 000000000000..88f3dbebf6f3 --- /dev/null +++ b/sql/core/benchmarks/SortBenchmark-results.txt @@ -0,0 +1,17 @@ +================================================================================================ +sort benchmark +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +radix sort 25000000: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +reference TimSort key prefix array 11765 / 11772 2.1 470.6 1.0X +reference Arrays.sort 2128 / 2134 11.7 85.1 5.5X +radix sort one byte 91 / 99 275.5 3.6 129.7X +radix sort two bytes 169 / 180 148.2 6.7 69.8X +radix sort eight bytes 665 / 668 37.6 26.6 17.7X +radix sort key prefix array 1058 / 1060 23.6 42.3 11.1X + + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala index 17619ec5fadc..559020165e8e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.benchmark import java.util.{Arrays, Comparator} -import org.apache.spark.benchmark.Benchmark +import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} import org.apache.spark.unsafe.array.LongArray import org.apache.spark.unsafe.memory.MemoryBlock import org.apache.spark.util.collection.Sorter @@ -28,12 +28,13 @@ import org.apache.spark.util.random.XORShiftRandom /** * Benchmark to measure performance for aggregate primitives. - * To run this: - * build/sbt "sql/test-only *benchmark.SortBenchmark" - * - * Benchmarks in this file are skipped in normal builds. + * To run this benchmark: + * 1. without sbt: bin/spark-submit --class + * 2. build/sbt "sql/test:runMain " + * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain " + * Results will be written to "benchmarks/-results.txt". */ -class SortBenchmark extends BenchmarkWithCodegen { +object SortBenchmark extends BenchmarkBase { private def referenceKeyPrefixSort(buf: LongArray, lo: Int, hi: Int, refCmp: PrefixComparator) { val sortBuffer = new LongArray(MemoryBlock.fromLongArray(new Array[Long](buf.size().toInt))) @@ -54,10 +55,10 @@ class SortBenchmark extends BenchmarkWithCodegen { new LongArray(MemoryBlock.fromLongArray(extended))) } - ignore("sort") { + def sortBenchmark(): Unit = { val size = 25000000 val rand = new XORShiftRandom(123) - val benchmark = new Benchmark("radix sort " + size, size) + val benchmark = new Benchmark("radix sort " + size, size, output = output) benchmark.addTimerCase("reference TimSort key prefix array") { timer => val array = Array.tabulate[Long](size * 2) { i => rand.nextLong } val buf = new LongArray(MemoryBlock.fromLongArray(array)) @@ -114,20 +115,11 @@ class SortBenchmark extends BenchmarkWithCodegen { timer.stopTiming() } benchmark.run() + } - /* - Running benchmark: radix sort 25000000 - Java HotSpot(TM) 64-Bit Server VM 1.8.0_66-b17 on Linux 3.13.0-44-generic - Intel(R) Core(TM) i7-4600U CPU @ 2.10GHz - - radix sort 25000000: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - reference TimSort key prefix array 15546 / 15859 1.6 621.9 1.0X - reference Arrays.sort 2416 / 2446 10.3 96.6 6.4X - radix sort one byte 133 / 137 188.4 5.3 117.2X - radix sort two bytes 255 / 258 98.2 10.2 61.1X - radix sort eight bytes 991 / 997 25.2 39.6 15.7X - radix sort key prefix array 1540 / 1563 16.2 61.6 10.1X - */ + override def benchmark(): Unit = { + runBenchmark("radix sort") { + sortBenchmark() + } } } From 3943a7f7b9cfa8f389c765ef4870323c4b40ab05 Mon Sep 17 00:00:00 2001 From: yucai Date: Fri, 21 Sep 2018 23:56:37 +0800 Subject: [PATCH 2/3] new tests --- sql/core/benchmarks/SortBenchmark-results.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sql/core/benchmarks/SortBenchmark-results.txt b/sql/core/benchmarks/SortBenchmark-results.txt index 88f3dbebf6f3..0d00a0c89d02 100644 --- a/sql/core/benchmarks/SortBenchmark-results.txt +++ b/sql/core/benchmarks/SortBenchmark-results.txt @@ -1,5 +1,5 @@ ================================================================================================ -sort benchmark +radix sort ================================================================================================ Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 @@ -7,11 +7,11 @@ Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz radix sort 25000000: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -reference TimSort key prefix array 11765 / 11772 2.1 470.6 1.0X -reference Arrays.sort 2128 / 2134 11.7 85.1 5.5X -radix sort one byte 91 / 99 275.5 3.6 129.7X -radix sort two bytes 169 / 180 148.2 6.7 69.8X -radix sort eight bytes 665 / 668 37.6 26.6 17.7X -radix sort key prefix array 1058 / 1060 23.6 42.3 11.1X +reference TimSort key prefix array 11770 / 11960 2.1 470.8 1.0X +reference Arrays.sort 2106 / 2128 11.9 84.3 5.6X +radix sort one byte 93 / 100 269.7 3.7 126.9X +radix sort two bytes 171 / 179 146.0 6.9 68.7X +radix sort eight bytes 659 / 664 37.9 26.4 17.9X +radix sort key prefix array 1024 / 1053 24.4 41.0 11.5X From be2d1c0e1b224386b2d3a5c43b6f2b1638604607 Mon Sep 17 00:00:00 2001 From: yucai Date: Sat, 22 Sep 2018 00:15:47 +0800 Subject: [PATCH 3/3] update comments --- .../sql/execution/benchmark/SortBenchmark.scala | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala index 559020165e8e..958a06440214 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala @@ -28,11 +28,13 @@ import org.apache.spark.util.random.XORShiftRandom /** * Benchmark to measure performance for aggregate primitives. - * To run this benchmark: - * 1. without sbt: bin/spark-submit --class - * 2. build/sbt "sql/test:runMain " - * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain " - * Results will be written to "benchmarks/-results.txt". + * {{{ + * To run this benchmark: + * 1. without sbt: bin/spark-submit --class + * 2. build/sbt "sql/test:runMain " + * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain " + * Results will be written to "benchmarks/-results.txt". + * }}} */ object SortBenchmark extends BenchmarkBase {