From f77c9cec96798ac9219bcfa837316eb96d3a5329 Mon Sep 17 00:00:00 2001 From: Yifei Huang Date: Wed, 27 Feb 2019 13:31:51 -0800 Subject: [PATCH 1/5] add stdev --- .../org/apache/spark/benchmark/Benchmark.scala | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala index df1ed28477908..acfc4306d7349 100644 --- a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala +++ b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala @@ -111,16 +111,17 @@ private[spark] class Benchmark( // The results are going to be processor specific so it is useful to include that. out.println(Benchmark.getJVMOSInfo()) out.println(Benchmark.getProcessorName()) - out.printf("%-40s %16s %12s %13s %10s\n", name + ":", "Best/Avg Time(ms)", "Rate(M/s)", - "Per Row(ns)", "Relative") + out.printf("%-40s %16s %12s %13s %10s %16s\n", name + ":", "Best/Avg Time(ms)", "Rate(M/s)", + "Per Row(ns)", "Relative", "Stdev (ms)") out.println("-" * 96) results.zip(benchmarks).foreach { case (result, benchmark) => - out.printf("%-40s %16s %12s %13s %10s\n", + out.printf("%-40s %16s %12s %13s %10s %16s\n", benchmark.name, "%5.0f / %4.0f" format (result.bestMs, result.avgMs), "%10.1f" format result.bestRate, "%6.1f" format (1000 / result.bestRate), - "%3.1fX" format (firstBest / result.bestMs)) + "%3.1fX" format (firstBest / result.bestMs), + "%5.0f" format result.stdevMs) } out.println // scalastyle:on @@ -158,7 +159,8 @@ private[spark] class Benchmark( // scalastyle:on val best = runTimes.min val avg = runTimes.sum / runTimes.size - Result(avg / 1000000.0, num / (best / 1000.0), best / 1000000.0) + val stdev = math.sqrt(runTimes.map(time => math.pow(time - avg, 2)).sum / runTimes.size) + Result(avg / 1000000.0, num / (best / 1000.0), best / 1000000.0, stdev / 1000000.0) } } @@ -191,7 +193,7 @@ private[spark] object Benchmark { } case class Case(name: String, fn: Timer => Unit, numIters: Int) - case class Result(avgMs: Double, bestRate: Double, bestMs: Double) + case class Result(avgMs: Double, bestRate: Double, bestMs: Double, stdevMs: Double) /** * This should return a user helpful processor information. Getting at this depends on the OS. From 85e04874845ab9de2005e9c4e91cc21511f162c8 Mon Sep 17 00:00:00 2001 From: Yifei Huang Date: Wed, 27 Feb 2019 14:28:15 -0800 Subject: [PATCH 2/5] fix stdev text --- .../test/scala/org/apache/spark/benchmark/Benchmark.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala index acfc4306d7349..e2703d74e5fb7 100644 --- a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala +++ b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala @@ -111,11 +111,11 @@ private[spark] class Benchmark( // The results are going to be processor specific so it is useful to include that. out.println(Benchmark.getJVMOSInfo()) out.println(Benchmark.getProcessorName()) - out.printf("%-40s %16s %12s %13s %10s %16s\n", name + ":", "Best/Avg Time(ms)", "Rate(M/s)", + out.printf("%-40s %16s %12s %13s %10s %13s\n", name + ":", "Best/Avg Time(ms)", "Rate(M/s)", "Per Row(ns)", "Relative", "Stdev (ms)") - out.println("-" * 96) + out.println("-" * 110) results.zip(benchmarks).foreach { case (result, benchmark) => - out.printf("%-40s %16s %12s %13s %10s %16s\n", + out.printf("%-40s %16s %12s %13s %10s %13s\n", benchmark.name, "%5.0f / %4.0f" format (result.bestMs, result.avgMs), "%10.1f" format result.bestRate, From 8c6374575fdab57b7166918ac6f60c95677ad40f Mon Sep 17 00:00:00 2001 From: Yifei Huang Date: Thu, 28 Feb 2019 13:30:10 -0800 Subject: [PATCH 3/5] format --- .../org/apache/spark/benchmark/Benchmark.scala | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala index e2703d74e5fb7..917610515405a 100644 --- a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala +++ b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala @@ -111,17 +111,18 @@ private[spark] class Benchmark( // The results are going to be processor specific so it is useful to include that. out.println(Benchmark.getJVMOSInfo()) out.println(Benchmark.getProcessorName()) - out.printf("%-40s %16s %12s %13s %10s %13s\n", name + ":", "Best/Avg Time(ms)", "Rate(M/s)", - "Per Row(ns)", "Relative", "Stdev (ms)") - out.println("-" * 110) + out.printf("%-40s %14s %14s %11s %12s %13s %10s\n", name + ":", "Best Time(ms)", "Avg Time(ms)", "Stdev(ms)", "Rate(M/s)", + "Per Row(ns)", "Relative") + out.println("-" * 120) results.zip(benchmarks).foreach { case (result, benchmark) => - out.printf("%-40s %16s %12s %13s %10s %13s\n", + out.printf("%-40s %14s %14s %11s %12s %13s %10s\n", benchmark.name, - "%5.0f / %4.0f" format (result.bestMs, result.avgMs), + "%5.0f" format result.bestMs, + "%4.0f" format result.avgMs, + "%5.0f" format result.stdevMs, "%10.1f" format result.bestRate, "%6.1f" format (1000 / result.bestRate), - "%3.1fX" format (firstBest / result.bestMs), - "%5.0f" format result.stdevMs) + "%3.1fX" format (firstBest / result.bestMs)) } out.println // scalastyle:on From ef0066d1ec88ce348d5b7a103aa90af92b240956 Mon Sep 17 00:00:00 2001 From: Yifei Huang Date: Thu, 28 Feb 2019 13:33:34 -0800 Subject: [PATCH 4/5] use times and sample stdev --- .../src/test/scala/org/apache/spark/benchmark/Benchmark.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala index 917610515405a..662561c9805b2 100644 --- a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala +++ b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala @@ -160,7 +160,9 @@ private[spark] class Benchmark( // scalastyle:on val best = runTimes.min val avg = runTimes.sum / runTimes.size - val stdev = math.sqrt(runTimes.map(time => math.pow(time - avg, 2)).sum / runTimes.size) + val stdev = if (runTimes.size > 1) { + math.sqrt(runTimes.map(time => (time - avg) * (time - avg)).sum / (runTimes.size - 1)) + } else 0 Result(avg / 1000000.0, num / (best / 1000.0), best / 1000000.0, stdev / 1000000.0) } } From 47990f47113b016005e6f2e9f858d9d96ec9cbf9 Mon Sep 17 00:00:00 2001 From: Yifei Huang Date: Thu, 28 Feb 2019 13:41:06 -0800 Subject: [PATCH 5/5] add assert for at least one iteration --- core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala index 662561c9805b2..73f9d0e2bc0e1 100644 --- a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala +++ b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala @@ -158,6 +158,7 @@ private[spark] class Benchmark( // scalastyle:off println(s" Stopped after $i iterations, ${NANOSECONDS.toMillis(runTimes.sum)} ms") // scalastyle:on + assert(runTimes.nonEmpty) val best = runTimes.min val avg = runTimes.sum / runTimes.size val stdev = if (runTimes.size > 1) {