Skip to content

Commit 1204561

Browse files
committed
Add benchmarks for partial sorting functions
1 parent 0cc1337 commit 1204561

File tree

6 files changed

+371
-74
lines changed

6 files changed

+371
-74
lines changed

benchmarks/bench-qsort-common.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#ifndef AVX512_BENCH_COMMON
2+
#define AVX512_BENCH_COMMON
3+
4+
#include <benchmark/benchmark.h>
5+
#include "rand_array.h"
6+
#include "cpuinfo.h"
7+
#include "avx512-16bit-qsort.hpp"
8+
#include "avx512-32bit-qsort.hpp"
9+
#include "avx512-64bit-qsort.hpp"
10+
11+
#endif

benchmarks/bench_partial_qsort.hpp

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#include "bench-qsort-common.h"
2+
3+
template <typename T>
4+
static void avx512_partial_qsort(benchmark::State& state) {
5+
if (!cpu_has_avx512bw()) {
6+
state.SkipWithMessage("Requires AVX512 BW ISA");
7+
}
8+
if ((sizeof(T) == 2) && (!cpu_has_avx512_vbmi2())) {
9+
state.SkipWithMessage("Requires AVX512 VBMI2 ISA");
10+
}
11+
// Perform setup here
12+
size_t ARRSIZE = state.range(0);
13+
std::vector<T> arr;
14+
std::vector<T> arr_bkp;
15+
16+
/* Initialize elements */
17+
arr = get_uniform_rand_array<T>(ARRSIZE);
18+
arr_bkp = arr;
19+
20+
/* Choose random index to sort up until */
21+
int k = get_uniform_rand_array<int64_t>(1, ARRSIZE, 1).front();
22+
23+
/* call avx512_partial_qsort */
24+
for (auto _ : state) {
25+
avx512_partial_qsort<T>(arr.data(), k, ARRSIZE);
26+
27+
state.PauseTiming();
28+
arr = arr_bkp;
29+
state.ResumeTiming();
30+
}
31+
}
32+
33+
template <typename T>
34+
static void stdpartialsort(benchmark::State& state) {
35+
// Perform setup here
36+
size_t ARRSIZE = state.range(0);
37+
std::vector<T> arr;
38+
std::vector<T> arr_bkp;
39+
40+
/* Initialize elements */
41+
arr = get_uniform_rand_array<T>(ARRSIZE);
42+
arr_bkp = arr;
43+
44+
/* Choose random index to sort up until */
45+
int k = get_uniform_rand_array<int64_t>(1, ARRSIZE, 1).front();
46+
47+
/* call std::partial_sort */
48+
for (auto _ : state) {
49+
std::partial_sort(arr.begin(), arr.begin() + k, arr.end());
50+
51+
state.PauseTiming();
52+
arr = arr_bkp;
53+
state.ResumeTiming();
54+
}
55+
}
56+
57+
// Register the function as a benchmark
58+
BENCHMARK(avx512_partial_qsort<float>)->Arg(10000)->Arg(1000000);
59+
BENCHMARK(stdpartialsort<float>)->Arg(10000)->Arg(1000000);
60+
BENCHMARK(avx512_partial_qsort<uint32_t>)->Arg(10000)->Arg(1000000);
61+
BENCHMARK(stdpartialsort<uint32_t>)->Arg(10000)->Arg(1000000);
62+
BENCHMARK(avx512_partial_qsort<int32_t>)->Arg(10000)->Arg(1000000);
63+
BENCHMARK(stdpartialsort<int32_t>)->Arg(10000)->Arg(1000000);
64+
65+
BENCHMARK(avx512_partial_qsort<double>)->Arg(10000)->Arg(1000000);
66+
BENCHMARK(stdpartialsort<double>)->Arg(10000)->Arg(1000000);
67+
BENCHMARK(avx512_partial_qsort<uint64_t>)->Arg(10000)->Arg(1000000);
68+
BENCHMARK(stdpartialsort<uint64_t>)->Arg(10000)->Arg(1000000);
69+
BENCHMARK(avx512_partial_qsort<int64_t>)->Arg(10000)->Arg(1000000);
70+
BENCHMARK(stdpartialsort<int64_t>)->Arg(10000)->Arg(10000000);
71+
72+
//BENCHMARK(avx512_partial_qsort<float16>)->Arg(10000)->Arg(1000000);
73+
BENCHMARK(avx512_partial_qsort<uint16_t>)->Arg(10000)->Arg(1000000);
74+
BENCHMARK(stdpartialsort<uint16_t>)->Arg(10000)->Arg(1000000);
75+
BENCHMARK(avx512_partial_qsort<int16_t>)->Arg(10000)->Arg(1000000);
76+
BENCHMARK(stdpartialsort<int16_t>)->Arg(10000)->Arg(10000000);

benchmarks/bench_qselect.hpp

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#include "bench-qsort-common.h"
2+
3+
template <typename T>
4+
static void avx512_qselect(benchmark::State& state) {
5+
if (!cpu_has_avx512bw()) {
6+
state.SkipWithMessage("Requires AVX512 BW ISA");
7+
}
8+
if ((sizeof(T) == 2) && (!cpu_has_avx512_vbmi2())) {
9+
state.SkipWithMessage("Requires AVX512 VBMI2 ISA");
10+
}
11+
// Perform setup here
12+
size_t ARRSIZE = state.range(0);
13+
std::vector<T> arr;
14+
std::vector<T> arr_bkp;
15+
16+
/* Initialize elements */
17+
arr = get_uniform_rand_array<T>(ARRSIZE);
18+
arr_bkp = arr;
19+
20+
/* Choose random index to make sorted */
21+
int k = get_uniform_rand_array<int64_t>(1, ARRSIZE, 1).front();
22+
23+
/* call avx512 quickselect */
24+
for (auto _ : state) {
25+
avx512_qselect<T>(arr.data(), k, ARRSIZE);
26+
27+
state.PauseTiming();
28+
arr = arr_bkp;
29+
state.ResumeTiming();
30+
}
31+
}
32+
33+
template <typename T>
34+
static void stdnthelement(benchmark::State& state) {
35+
// Perform setup here
36+
size_t ARRSIZE = state.range(0);
37+
std::vector<T> arr;
38+
std::vector<T> arr_bkp;
39+
40+
/* Initialize elements */
41+
arr = get_uniform_rand_array<T>(ARRSIZE);
42+
arr_bkp = arr;
43+
44+
/* Choose random index to make sorted */
45+
int k = get_uniform_rand_array<int64_t>(1, ARRSIZE, 1).front();
46+
47+
/* call std::nth_element */
48+
for (auto _ : state) {
49+
std::nth_element(arr.begin(), arr.begin() + k, arr.end());
50+
51+
state.PauseTiming();
52+
arr = arr_bkp;
53+
state.ResumeTiming();
54+
}
55+
}
56+
57+
// Register the function as a benchmark
58+
BENCHMARK(avx512_qselect<float>)->Arg(10000)->Arg(1000000);
59+
BENCHMARK(stdnthelement<float>)->Arg(10000)->Arg(1000000);
60+
BENCHMARK(avx512_qselect<uint32_t>)->Arg(10000)->Arg(1000000);
61+
BENCHMARK(stdnthelement<uint32_t>)->Arg(10000)->Arg(1000000);
62+
BENCHMARK(avx512_qselect<int32_t>)->Arg(10000)->Arg(1000000);
63+
BENCHMARK(stdnthelement<int32_t>)->Arg(10000)->Arg(1000000);
64+
65+
BENCHMARK(avx512_qselect<double>)->Arg(10000)->Arg(1000000);
66+
BENCHMARK(stdnthelement<double>)->Arg(10000)->Arg(1000000);
67+
BENCHMARK(avx512_qselect<uint64_t>)->Arg(10000)->Arg(1000000);
68+
BENCHMARK(stdnthelement<uint64_t>)->Arg(10000)->Arg(1000000);
69+
BENCHMARK(avx512_qselect<int64_t>)->Arg(10000)->Arg(1000000);
70+
BENCHMARK(stdnthelement<int64_t>)->Arg(10000)->Arg(10000000);
71+
72+
//BENCHMARK(avx512_qselect<float16>)->Arg(10000)->Arg(1000000);
73+
BENCHMARK(avx512_qselect<uint16_t>)->Arg(10000)->Arg(1000000);
74+
BENCHMARK(stdnthelement<uint16_t>)->Arg(10000)->Arg(1000000);
75+
BENCHMARK(avx512_qselect<int16_t>)->Arg(10000)->Arg(1000000);
76+
BENCHMARK(stdnthelement<int16_t>)->Arg(10000)->Arg(10000000);

benchmarks/bench_qsort.cpp

Lines changed: 3 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,73 +1,3 @@
1-
#include <benchmark/benchmark.h>
2-
#include "rand_array.h"
3-
#include "cpuinfo.h"
4-
#include "avx512-16bit-qsort.hpp"
5-
#include "avx512-32bit-qsort.hpp"
6-
#include "avx512-64bit-qsort.hpp"
7-
8-
template <typename T>
9-
static void avx512_qsort(benchmark::State& state) {
10-
if (!cpu_has_avx512bw()) {
11-
state.SkipWithMessage("Requires AVX512 BW ISA");
12-
}
13-
if ((sizeof(T) == 2) && (!cpu_has_avx512_vbmi2())) {
14-
state.SkipWithMessage("Requires AVX512 VBMI2 ISA");
15-
}
16-
// Perform setup here
17-
size_t ARRSIZE = state.range(0);
18-
std::vector<T> arr;
19-
std::vector<T> arr_bkp;
20-
21-
/* Initialize elements is reverse order */
22-
arr = get_uniform_rand_array<T>(ARRSIZE);
23-
arr_bkp = arr;
24-
25-
/* call avx512 quicksort */
26-
for (auto _ : state) {
27-
avx512_qsort<T>(arr.data(), ARRSIZE);
28-
state.PauseTiming();
29-
arr = arr_bkp;
30-
state.ResumeTiming();
31-
}
32-
}
33-
34-
template <typename T>
35-
static void stdsort(benchmark::State& state) {
36-
// Perform setup here
37-
size_t ARRSIZE = state.range(0);
38-
std::vector<T> arr;
39-
std::vector<T> arr_bkp;
40-
41-
/* Initialize elements is reverse order */
42-
arr = get_uniform_rand_array<T>(ARRSIZE);
43-
arr_bkp = arr;
44-
45-
/* call avx512 quicksort */
46-
for (auto _ : state) {
47-
std::sort(arr.begin(), arr.end());
48-
state.PauseTiming();
49-
arr = arr_bkp;
50-
state.ResumeTiming();
51-
}
52-
}
53-
54-
// Register the function as a benchmark
55-
BENCHMARK(avx512_qsort<float>)->Arg(10000)->Arg(1000000);
56-
BENCHMARK(stdsort<float>)->Arg(10000)->Arg(1000000);
57-
BENCHMARK(avx512_qsort<uint32_t>)->Arg(10000)->Arg(1000000);
58-
BENCHMARK(stdsort<uint32_t>)->Arg(10000)->Arg(1000000);
59-
BENCHMARK(avx512_qsort<int32_t>)->Arg(10000)->Arg(1000000);
60-
BENCHMARK(stdsort<int32_t>)->Arg(10000)->Arg(1000000);
61-
62-
BENCHMARK(avx512_qsort<double>)->Arg(10000)->Arg(1000000);
63-
BENCHMARK(stdsort<double>)->Arg(10000)->Arg(1000000);
64-
BENCHMARK(avx512_qsort<uint64_t>)->Arg(10000)->Arg(1000000);
65-
BENCHMARK(stdsort<uint64_t>)->Arg(10000)->Arg(1000000);
66-
BENCHMARK(avx512_qsort<int64_t>)->Arg(10000)->Arg(1000000);
67-
BENCHMARK(stdsort<int64_t>)->Arg(10000)->Arg(10000000);
68-
69-
//BENCHMARK(avx512_qsort<float16>)->Arg(10000)->Arg(1000000);
70-
BENCHMARK(avx512_qsort<uint16_t>)->Arg(10000)->Arg(1000000);
71-
BENCHMARK(stdsort<uint16_t>)->Arg(10000)->Arg(1000000);
72-
BENCHMARK(avx512_qsort<int16_t>)->Arg(10000)->Arg(1000000);
73-
BENCHMARK(stdsort<int16_t>)->Arg(10000)->Arg(10000000);
1+
#include "bench_qsort.hpp"
2+
#include "bench_qselect.hpp"
3+
#include "bench_partial_qsort.hpp"

benchmarks/bench_qsort.hpp

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#include "bench-qsort-common.h"
2+
3+
template <typename T>
4+
static void avx512_qsort(benchmark::State& state) {
5+
if (!cpu_has_avx512bw()) {
6+
state.SkipWithMessage("Requires AVX512 BW ISA");
7+
}
8+
if ((sizeof(T) == 2) && (!cpu_has_avx512_vbmi2())) {
9+
state.SkipWithMessage("Requires AVX512 VBMI2 ISA");
10+
}
11+
// Perform setup here
12+
size_t ARRSIZE = state.range(0);
13+
std::vector<T> arr;
14+
std::vector<T> arr_bkp;
15+
16+
/* Initialize elements */
17+
arr = get_uniform_rand_array<T>(ARRSIZE);
18+
arr_bkp = arr;
19+
20+
/* call avx512 quicksort */
21+
for (auto _ : state) {
22+
avx512_qsort<T>(arr.data(), ARRSIZE);
23+
state.PauseTiming();
24+
arr = arr_bkp;
25+
state.ResumeTiming();
26+
}
27+
}
28+
29+
template <typename T>
30+
static void stdsort(benchmark::State& state) {
31+
// Perform setup here
32+
size_t ARRSIZE = state.range(0);
33+
std::vector<T> arr;
34+
std::vector<T> arr_bkp;
35+
36+
/* Initialize elements */
37+
arr = get_uniform_rand_array<T>(ARRSIZE);
38+
arr_bkp = arr;
39+
40+
/* call std::sort */
41+
for (auto _ : state) {
42+
std::sort(arr.begin(), arr.end());
43+
state.PauseTiming();
44+
arr = arr_bkp;
45+
state.ResumeTiming();
46+
}
47+
}
48+
49+
// Register the function as a benchmark
50+
BENCHMARK(avx512_qsort<float>)->Arg(10000)->Arg(1000000);
51+
BENCHMARK(stdsort<float>)->Arg(10000)->Arg(1000000);
52+
BENCHMARK(avx512_qsort<uint32_t>)->Arg(10000)->Arg(1000000);
53+
BENCHMARK(stdsort<uint32_t>)->Arg(10000)->Arg(1000000);
54+
BENCHMARK(avx512_qsort<int32_t>)->Arg(10000)->Arg(1000000);
55+
BENCHMARK(stdsort<int32_t>)->Arg(10000)->Arg(1000000);
56+
57+
BENCHMARK(avx512_qsort<double>)->Arg(10000)->Arg(1000000);
58+
BENCHMARK(stdsort<double>)->Arg(10000)->Arg(1000000);
59+
BENCHMARK(avx512_qsort<uint64_t>)->Arg(10000)->Arg(1000000);
60+
BENCHMARK(stdsort<uint64_t>)->Arg(10000)->Arg(1000000);
61+
BENCHMARK(avx512_qsort<int64_t>)->Arg(10000)->Arg(1000000);
62+
BENCHMARK(stdsort<int64_t>)->Arg(10000)->Arg(10000000);
63+
64+
//BENCHMARK(avx512_qsort<float16>)->Arg(10000)->Arg(1000000);
65+
BENCHMARK(avx512_qsort<uint16_t>)->Arg(10000)->Arg(1000000);
66+
BENCHMARK(stdsort<uint16_t>)->Arg(10000)->Arg(1000000);
67+
BENCHMARK(avx512_qsort<int16_t>)->Arg(10000)->Arg(1000000);
68+
BENCHMARK(stdsort<int16_t>)->Arg(10000)->Arg(10000000);

0 commit comments

Comments
 (0)