|  | 
|  | 1 | +#include "bench-qsort-common.h" | 
|  | 2 | + | 
|  | 3 | +template <typename T> | 
|  | 4 | +static void avx512_partial_qsort(benchmark::State& state) { | 
|  | 5 | +    if (!cpu_has_avx512bw()) { | 
|  | 6 | +        state.SkipWithMessage("Requires AVX512 BW ISA"); | 
|  | 7 | +    } | 
|  | 8 | +    if ((sizeof(T) == 2) && (!cpu_has_avx512_vbmi2())) { | 
|  | 9 | +        state.SkipWithMessage("Requires AVX512 VBMI2 ISA"); | 
|  | 10 | +    } | 
|  | 11 | +    // Perform setup here | 
|  | 12 | +    size_t ARRSIZE = state.range(0); | 
|  | 13 | +    std::vector<T> arr; | 
|  | 14 | +    std::vector<T> arr_bkp; | 
|  | 15 | + | 
|  | 16 | +    /* Initialize elements */ | 
|  | 17 | +    arr = get_uniform_rand_array<T>(ARRSIZE); | 
|  | 18 | +    arr_bkp = arr; | 
|  | 19 | + | 
|  | 20 | +    /* Choose random index to sort up until */ | 
|  | 21 | +    int k = get_uniform_rand_array<int64_t>(1, ARRSIZE, 1).front(); | 
|  | 22 | + | 
|  | 23 | +    /* call avx512_partial_qsort */ | 
|  | 24 | +    for (auto _ : state) { | 
|  | 25 | +        avx512_partial_qsort<T>(arr.data(), k, ARRSIZE); | 
|  | 26 | + | 
|  | 27 | +        state.PauseTiming(); | 
|  | 28 | +        arr = arr_bkp; | 
|  | 29 | +        state.ResumeTiming(); | 
|  | 30 | +    } | 
|  | 31 | +} | 
|  | 32 | + | 
|  | 33 | +template <typename T> | 
|  | 34 | +static void stdpartialsort(benchmark::State& state) { | 
|  | 35 | +    // Perform setup here | 
|  | 36 | +    size_t ARRSIZE = state.range(0); | 
|  | 37 | +    std::vector<T> arr; | 
|  | 38 | +    std::vector<T> arr_bkp; | 
|  | 39 | + | 
|  | 40 | +    /* Initialize elements */ | 
|  | 41 | +    arr = get_uniform_rand_array<T>(ARRSIZE); | 
|  | 42 | +    arr_bkp = arr; | 
|  | 43 | + | 
|  | 44 | +    /* Choose random index to sort up until */ | 
|  | 45 | +    int k = get_uniform_rand_array<int64_t>(1, ARRSIZE, 1).front(); | 
|  | 46 | + | 
|  | 47 | +    /* call std::partial_sort */ | 
|  | 48 | +    for (auto _ : state) { | 
|  | 49 | +        std::partial_sort(arr.begin(), arr.begin() + k, arr.end()); | 
|  | 50 | + | 
|  | 51 | +        state.PauseTiming(); | 
|  | 52 | +        arr = arr_bkp; | 
|  | 53 | +        state.ResumeTiming(); | 
|  | 54 | +    } | 
|  | 55 | +} | 
|  | 56 | + | 
|  | 57 | +// Register the function as a benchmark | 
|  | 58 | +BENCHMARK(avx512_partial_qsort<float>)->Arg(10000)->Arg(1000000); | 
|  | 59 | +BENCHMARK(stdpartialsort<float>)->Arg(10000)->Arg(1000000); | 
|  | 60 | +BENCHMARK(avx512_partial_qsort<uint32_t>)->Arg(10000)->Arg(1000000); | 
|  | 61 | +BENCHMARK(stdpartialsort<uint32_t>)->Arg(10000)->Arg(1000000); | 
|  | 62 | +BENCHMARK(avx512_partial_qsort<int32_t>)->Arg(10000)->Arg(1000000); | 
|  | 63 | +BENCHMARK(stdpartialsort<int32_t>)->Arg(10000)->Arg(1000000); | 
|  | 64 | + | 
|  | 65 | +BENCHMARK(avx512_partial_qsort<double>)->Arg(10000)->Arg(1000000); | 
|  | 66 | +BENCHMARK(stdpartialsort<double>)->Arg(10000)->Arg(1000000); | 
|  | 67 | +BENCHMARK(avx512_partial_qsort<uint64_t>)->Arg(10000)->Arg(1000000); | 
|  | 68 | +BENCHMARK(stdpartialsort<uint64_t>)->Arg(10000)->Arg(1000000); | 
|  | 69 | +BENCHMARK(avx512_partial_qsort<int64_t>)->Arg(10000)->Arg(1000000); | 
|  | 70 | +BENCHMARK(stdpartialsort<int64_t>)->Arg(10000)->Arg(10000000); | 
|  | 71 | + | 
|  | 72 | +//BENCHMARK(avx512_partial_qsort<float16>)->Arg(10000)->Arg(1000000); | 
|  | 73 | +BENCHMARK(avx512_partial_qsort<uint16_t>)->Arg(10000)->Arg(1000000); | 
|  | 74 | +BENCHMARK(stdpartialsort<uint16_t>)->Arg(10000)->Arg(1000000); | 
|  | 75 | +BENCHMARK(avx512_partial_qsort<int16_t>)->Arg(10000)->Arg(1000000); | 
|  | 76 | +BENCHMARK(stdpartialsort<int16_t>)->Arg(10000)->Arg(10000000); | 
0 commit comments