Skip to content

Commit c596154

Browse files
committed
[SPARK-15115][SQL] Reorganize whole stage codegen benchmark suites
## What changes were proposed in this pull request? We currently have a single suite that is very large, making it difficult to maintain and play with specific primitives. This patch reorganizes the file by creating multiple benchmark suites in a single package. Most of the changes are straightforward move of code. On top of the code moving, I did: 1. Use SparkSession instead of SQLContext. 2. Turned most benchmark scenarios into a their own test cases, rather than having multiple scenarios in a single test case, which takes forever to run. ## How was this patch tested? This is a test only change. Author: Reynold Xin <[email protected]> Closes #12891 from rxin/SPARK-15115. (cherry picked from commit 6274a52) Signed-off-by: Reynold Xin <[email protected]>
1 parent 54d90bd commit c596154

File tree

8 files changed

+603
-422
lines changed

8 files changed

+603
-422
lines changed

core/src/main/java/org/apache/spark/util/collection/unsafe/sort/RecordPointerAndKeyPrefix.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.util.collection.unsafe.sort;
1919

20-
final class RecordPointerAndKeyPrefix {
20+
public final class RecordPointerAndKeyPrefix {
2121
/**
2222
* A pointer to a record; see {@link org.apache.spark.memory.TaskMemoryManager} for a
2323
* description of how these addresses are encoded.

core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSortDataFormat.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@
2929
* Within each long[] buffer, position {@code 2 * i} holds a pointer pointer to the record at
3030
* index {@code i}, while position {@code 2 * i + 1} in the array holds an 8-byte key prefix.
3131
*/
32-
final class UnsafeSortDataFormat extends SortDataFormat<RecordPointerAndKeyPrefix, LongArray> {
32+
public final class UnsafeSortDataFormat
33+
extends SortDataFormat<RecordPointerAndKeyPrefix, LongArray> {
3334

3435
public static final UnsafeSortDataFormat INSTANCE = new UnsafeSortDataFormat();
3536

core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala

Lines changed: 0 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ import org.apache.spark.SparkFunSuite
2626
import org.apache.spark.internal.Logging
2727
import org.apache.spark.unsafe.array.LongArray
2828
import org.apache.spark.unsafe.memory.MemoryBlock
29-
import org.apache.spark.util.Benchmark
3029
import org.apache.spark.util.collection.Sorter
3130
import org.apache.spark.util.random.XORShiftRandom
3231

@@ -184,81 +183,4 @@ class RadixSortSuite extends SparkFunSuite with Logging {
184183
assert(res1.view == res2.view)
185184
}
186185
}
187-
188-
ignore("microbenchmarks") {
189-
val size = 25000000
190-
val rand = new XORShiftRandom(123)
191-
val benchmark = new Benchmark("radix sort " + size, size)
192-
benchmark.addTimerCase("reference TimSort key prefix array") { timer =>
193-
val array = Array.tabulate[Long](size * 2) { i => rand.nextLong }
194-
val buf = new LongArray(MemoryBlock.fromLongArray(array))
195-
timer.startTiming()
196-
referenceKeyPrefixSort(buf, 0, size, PrefixComparators.BINARY)
197-
timer.stopTiming()
198-
}
199-
benchmark.addTimerCase("reference Arrays.sort") { timer =>
200-
val ref = Array.tabulate[Long](size) { i => rand.nextLong }
201-
timer.startTiming()
202-
Arrays.sort(ref)
203-
timer.stopTiming()
204-
}
205-
benchmark.addTimerCase("radix sort one byte") { timer =>
206-
val array = new Array[Long](size * 2)
207-
var i = 0
208-
while (i < size) {
209-
array(i) = rand.nextLong & 0xff
210-
i += 1
211-
}
212-
val buf = new LongArray(MemoryBlock.fromLongArray(array))
213-
timer.startTiming()
214-
RadixSort.sort(buf, size, 0, 7, false, false)
215-
timer.stopTiming()
216-
}
217-
benchmark.addTimerCase("radix sort two bytes") { timer =>
218-
val array = new Array[Long](size * 2)
219-
var i = 0
220-
while (i < size) {
221-
array(i) = rand.nextLong & 0xffff
222-
i += 1
223-
}
224-
val buf = new LongArray(MemoryBlock.fromLongArray(array))
225-
timer.startTiming()
226-
RadixSort.sort(buf, size, 0, 7, false, false)
227-
timer.stopTiming()
228-
}
229-
benchmark.addTimerCase("radix sort eight bytes") { timer =>
230-
val array = new Array[Long](size * 2)
231-
var i = 0
232-
while (i < size) {
233-
array(i) = rand.nextLong
234-
i += 1
235-
}
236-
val buf = new LongArray(MemoryBlock.fromLongArray(array))
237-
timer.startTiming()
238-
RadixSort.sort(buf, size, 0, 7, false, false)
239-
timer.stopTiming()
240-
}
241-
benchmark.addTimerCase("radix sort key prefix array") { timer =>
242-
val (_, buf2) = generateKeyPrefixTestData(size, rand.nextLong)
243-
timer.startTiming()
244-
RadixSort.sortKeyPrefixArray(buf2, size, 0, 7, false, false)
245-
timer.stopTiming()
246-
}
247-
benchmark.run()
248-
249-
/**
250-
Running benchmark: radix sort 25000000
251-
Java HotSpot(TM) 64-Bit Server VM 1.8.0_66-b17 on Linux 3.13.0-44-generic
252-
Intel(R) Core(TM) i7-4600U CPU @ 2.10GHz
253-
254-
radix sort 25000000: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
255-
-------------------------------------------------------------------------------------------
256-
reference TimSort key prefix array 15546 / 15859 1.6 621.9 1.0X
257-
reference Arrays.sort 2416 / 2446 10.3 96.6 6.4X
258-
radix sort one byte 133 / 137 188.4 5.3 117.2X
259-
radix sort two bytes 255 / 258 98.2 10.2 61.1X
260-
radix sort eight bytes 991 / 997 25.2 39.6 15.7X
261-
radix sort key prefix array 1540 / 1563 16.2 61.6 10.1X
262-
*/
263-
}
264186
}

0 commit comments

Comments
 (0)