Skip to content

Commit c9588ca

Browse files
author
Raghuveer Devulapalli
committed
Split highway and x86-simd-sort dispatch to separate files
1 parent 190e80e commit c9588ca

File tree

9 files changed

+102
-64
lines changed

9 files changed

+102
-64
lines changed

numpy/_core/meson.build

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -764,23 +764,29 @@ endforeach
764764
# -----------------------------------
765765
foreach gen_mtargets : [
766766
[
767-
'simd_argsort.dispatch.h',
768-
'src/npysort/simd_argsort.dispatch.cpp',
767+
'x86_simd_argsort.dispatch.h',
768+
'src/npysort/x86_simd_argsort.dispatch.cpp',
769769
[AVX512_SKX]
770770
],
771771
[
772-
'simd_qsort.dispatch.h',
773-
'src/npysort/simd_qsort.dispatch.cpp',
772+
'x86_simd_qsort.dispatch.h',
773+
'src/npysort/x86_simd_qsort.dispatch.cpp',
774774
[
775775
AVX512_SKX, AVX2,
776-
ASIMD,
777776
]
778777
],
779778
[
780-
'simd_qsort_16bit.dispatch.h',
781-
'src/npysort/simd_qsort_16bit.dispatch.cpp',
779+
'x86_simd_qsort_16bit.dispatch.h',
780+
'src/npysort/x86_simd_qsort_16bit.dispatch.cpp',
782781
[AVX512_SPR, AVX512_ICL]
783782
],
783+
[
784+
'highway_qsort.dispatch.h',
785+
'src/npysort/highway_qsort.dispatch.cpp',
786+
[
787+
ASIMD,
788+
]
789+
],
784790
]
785791
mtargets = mod_features.multi_targets(
786792
gen_mtargets[0], multiarray_gen_headers + gen_mtargets[1],
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/*@targets
2+
* $maxopt $keep_baseline
3+
* asimd
4+
*/
5+
// policy $keep_baseline is used to avoid skip building avx512_skx
6+
// when its part of baseline features (--cpu-baseline), since
7+
// 'baseline' option isn't specified within targets.
8+
9+
#include "highway_qsort.hpp"
10+
#ifndef __CYGWIN__
11+
12+
#if NPY_HAVE_ASIMD
13+
#define VQSORT_ONLY_STATIC 1
14+
#include "hwy/contrib/sort/vqsort-inl.h"
15+
#endif
16+
17+
namespace np { namespace qsort_simd {
18+
19+
#if NPY_HAVE_ASIMD
20+
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(int32_t *arr, intptr_t size)
21+
{
22+
hwy::HWY_NAMESPACE::VQSortStatic(arr, size, hwy::SortAscending());
23+
}
24+
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(uint32_t *arr, intptr_t size)
25+
{
26+
hwy::HWY_NAMESPACE::VQSortStatic(arr, size, hwy::SortAscending());
27+
}
28+
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(int64_t *arr, intptr_t size)
29+
{
30+
hwy::HWY_NAMESPACE::VQSortStatic(arr, size, hwy::SortAscending());
31+
}
32+
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(uint64_t *arr, intptr_t size)
33+
{
34+
hwy::HWY_NAMESPACE::VQSortStatic(arr, size, hwy::SortAscending());
35+
}
36+
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(float *arr, intptr_t size)
37+
{
38+
hwy::HWY_NAMESPACE::VQSortStatic(arr, size, hwy::SortAscending());
39+
}
40+
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(double *arr, intptr_t size)
41+
{
42+
hwy::HWY_NAMESPACE::VQSortStatic(arr, size, hwy::SortAscending());
43+
}
44+
#endif // NPY_HAVE_ASIMD
45+
46+
}} // namespace np::simd
47+
48+
#endif // __CYGWIN__
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#ifndef NUMPY_SRC_COMMON_NPYSORT_HWY_SIMD_QSORT_HPP
2+
#define NUMPY_SRC_COMMON_NPYSORT_HWY_SIMD_QSORT_HPP
3+
4+
#include "common.hpp"
5+
6+
namespace np { namespace qsort_simd {
7+
8+
#ifndef NPY_DISABLE_OPTIMIZATION
9+
#include "highway_qsort.dispatch.h"
10+
#endif
11+
NPY_CPU_DISPATCH_DECLARE(template <typename T> void QSort, (T *arr, npy_intp size))
12+
NPY_CPU_DISPATCH_DECLARE(template <typename T> void QSelect, (T* arr, npy_intp num, npy_intp kth))
13+
14+
} } // np::qsort_simd
15+
16+
#endif // NUMPY_SRC_COMMON_NPYSORT_HWY_SIMD_QSORT_HPP

numpy/_core/src/npysort/quicksort.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,13 @@
5454
#include "npysort_common.h"
5555
#include "npysort_heapsort.h"
5656
#include "numpy_tag.h"
57-
#include "simd_qsort.hpp"
57+
#include "x86_simd_qsort.hpp"
58+
#include "highway_qsort.hpp"
5859

5960
#include <cstdlib>
6061
#include <utility>
6162

6263
#define NOT_USED NPY_UNUSED(unused)
63-
#define DISABLE_HIGHWAY_OPTIMIZATION defined(__arm__)
6464

6565
/*
6666
* pushing largest partition has upper bound of log2(n) space
@@ -81,18 +81,23 @@ inline bool quicksort_dispatch(T *start, npy_intp num)
8181
void (*dispfunc)(TF*, intptr_t) = nullptr;
8282
if (sizeof(T) == sizeof(uint16_t)) {
8383
#ifndef NPY_DISABLE_OPTIMIZATION
84-
#include "simd_qsort_16bit.dispatch.h"
84+
#include "x86_simd_qsort_16bit.dispatch.h"
8585
#endif
8686
NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::qsort_simd::template QSort, <TF>);
8787
}
88-
#if !DISABLE_HIGHWAY_OPTIMIZATION
8988
else if (sizeof(T) == sizeof(uint32_t) || sizeof(T) == sizeof(uint64_t)) {
9089
#ifndef NPY_DISABLE_OPTIMIZATION
91-
#include "simd_qsort.dispatch.h"
90+
#include "x86_simd_qsort.dispatch.h"
9291
#endif
9392
NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::qsort_simd::template QSort, <TF>);
93+
// If not dispatched for avx-512 or avx2:
94+
if (dispfunc == nullptr) {
95+
#ifndef NPY_DISABLE_OPTIMIZATION
96+
#include "highway_qsort.dispatch.h"
97+
#endif
98+
NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::qsort_simd::template QSort, <TF>);
99+
}
94100
}
95-
#endif
96101
if (dispfunc) {
97102
(*dispfunc)(reinterpret_cast<TF*>(start), static_cast<intptr_t>(num));
98103
return true;
@@ -109,7 +114,7 @@ inline bool aquicksort_dispatch(T *start, npy_intp* arg, npy_intp num)
109114
using TF = typename np::meta::FixedWidth<T>::Type;
110115
void (*dispfunc)(TF*, npy_intp*, npy_intp) = nullptr;
111116
#ifndef NPY_DISABLE_OPTIMIZATION
112-
#include "simd_argsort.dispatch.h"
117+
#include "x86_simd_argsort.dispatch.h"
113118
#endif
114119
NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::qsort_simd::template ArgQSort, <TF>);
115120
if (dispfunc) {

numpy/_core/src/npysort/selection.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
#include <array>
2626
#include <cstdlib>
2727
#include <utility>
28-
#include "simd_qsort.hpp"
28+
#include "x86_simd_qsort.hpp"
2929

3030
#define NOT_USED NPY_UNUSED(unused)
3131
#define DISABLE_HIGHWAY_OPTIMIZATION (defined(__arm__) || defined(__aarch64__))
@@ -45,14 +45,14 @@ inline bool quickselect_dispatch(T* v, npy_intp num, npy_intp kth)
4545
void (*dispfunc)(TF*, npy_intp, npy_intp) = nullptr;
4646
if constexpr (sizeof(T) == sizeof(uint16_t)) {
4747
#ifndef NPY_DISABLE_OPTIMIZATION
48-
#include "simd_qsort_16bit.dispatch.h"
48+
#include "x86_simd_qsort_16bit.dispatch.h"
4949
#endif
5050
NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::qsort_simd::template QSelect, <TF>);
5151
}
5252
#if !DISABLE_HIGHWAY_OPTIMIZATION
5353
else if constexpr (sizeof(T) == sizeof(uint32_t) || sizeof(T) == sizeof(uint64_t)) {
5454
#ifndef NPY_DISABLE_OPTIMIZATION
55-
#include "simd_qsort.dispatch.h"
55+
#include "x86_simd_qsort.dispatch.h"
5656
#endif
5757
NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::qsort_simd::template QSelect, <TF>);
5858
}
@@ -79,7 +79,7 @@ inline bool argquickselect_dispatch(T* v, npy_intp* arg, npy_intp num, npy_intp
7979
(sizeof(T) == sizeof(uint32_t) || sizeof(T) == sizeof(uint64_t))) {
8080
using TF = typename np::meta::FixedWidth<T>::Type;
8181
#ifndef NPY_DISABLE_OPTIMIZATION
82-
#include "simd_argsort.dispatch.h"
82+
#include "x86_simd_argsort.dispatch.h"
8383
#endif
8484
void (*dispfunc)(TF*, npy_intp*, npy_intp, npy_intp) = nullptr;
8585
NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::qsort_simd::template ArgQSelect, <TF>);

numpy/_core/src/npysort/simd_argsort.dispatch.cpp renamed to numpy/_core/src/npysort/x86_simd_argsort.dispatch.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
// when its part of baseline features (--cpu-baseline), since
77
// 'baseline' option isn't specified within targets.
88

9-
#include "simd_qsort.hpp"
9+
#include "x86_simd_qsort.hpp"
1010
#ifndef __CYGWIN__
1111

1212
#if defined(NPY_HAVE_AVX512_SKX)

numpy/_core/src/npysort/simd_qsort.dispatch.cpp renamed to numpy/_core/src/npysort/x86_simd_qsort.dispatch.cpp

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,21 @@
11
/*@targets
22
* $maxopt $keep_baseline
33
* avx512_skx avx2
4-
* asimd
54
*/
65
// policy $keep_baseline is used to avoid skip building avx512_skx
76
// when its part of baseline features (--cpu-baseline), since
87
// 'baseline' option isn't specified within targets.
98

10-
#include "simd_qsort.hpp"
9+
#include "x86_simd_qsort.hpp"
1110
#ifndef __CYGWIN__
1211

13-
#define USE_HIGHWAY defined(__aarch64__)
14-
1512
#if defined(NPY_HAVE_AVX512_SKX)
1613
#include "x86-simd-sort/src/avx512-32bit-qsort.hpp"
1714
#include "x86-simd-sort/src/avx512-64bit-qsort.hpp"
1815
#include "x86-simd-sort/src/avx512-64bit-argsort.hpp"
1916
#elif defined(NPY_HAVE_AVX2)
2017
#include "x86-simd-sort/src/avx2-32bit-qsort.hpp"
2118
#include "x86-simd-sort/src/avx2-64bit-qsort.hpp"
22-
#elif USE_HIGHWAY
23-
#define VQSORT_ONLY_STATIC 1
24-
#include "hwy/contrib/sort/vqsort-inl.h"
2519
#endif
2620

2721
namespace np { namespace qsort_simd {
@@ -123,31 +117,6 @@ template<> void NPY_CPU_DISPATCH_CURFX(QSort)(double *arr, npy_intp num)
123117
avx2_qsort(arr, num, true);
124118
#endif
125119
}
126-
#elif USE_HIGHWAY
127-
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(int32_t *arr, intptr_t size)
128-
{
129-
hwy::HWY_NAMESPACE::VQSortStatic(arr, size, hwy::SortAscending());
130-
}
131-
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(uint32_t *arr, intptr_t size)
132-
{
133-
hwy::HWY_NAMESPACE::VQSortStatic(arr, size, hwy::SortAscending());
134-
}
135-
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(int64_t *arr, intptr_t size)
136-
{
137-
hwy::HWY_NAMESPACE::VQSortStatic(arr, size, hwy::SortAscending());
138-
}
139-
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(uint64_t *arr, intptr_t size)
140-
{
141-
hwy::HWY_NAMESPACE::VQSortStatic(arr, size, hwy::SortAscending());
142-
}
143-
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(float *arr, intptr_t size)
144-
{
145-
hwy::HWY_NAMESPACE::VQSortStatic(arr, size, hwy::SortAscending());
146-
}
147-
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(double *arr, intptr_t size)
148-
{
149-
hwy::HWY_NAMESPACE::VQSortStatic(arr, size, hwy::SortAscending());
150-
}
151120
#endif // NPY_HAVE_AVX512_SKX || NPY_HAVE_AVX2
152121

153122
}} // namespace np::simd
Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,28 @@
1-
#ifndef NUMPY_SRC_COMMON_NPYSORT_SIMD_QSORT_HPP
2-
#define NUMPY_SRC_COMMON_NPYSORT_SIMD_QSORT_HPP
1+
#ifndef NUMPY_SRC_COMMON_NPYSORT_X86_SIMD_QSORT_HPP
2+
#define NUMPY_SRC_COMMON_NPYSORT_X86_SIMD_QSORT_HPP
33

44
#include "common.hpp"
55

6-
#define DISABLE_HIGHWAY_OPTIMIZATION defined(__arm__)
7-
86
namespace np { namespace qsort_simd {
97

10-
#if !DISABLE_HIGHWAY_OPTIMIZATION
118
#ifndef NPY_DISABLE_OPTIMIZATION
12-
#include "simd_qsort.dispatch.h"
9+
#include "x86_simd_qsort.dispatch.h"
1310
#endif
1411
NPY_CPU_DISPATCH_DECLARE(template <typename T> void QSort, (T *arr, npy_intp size))
1512
NPY_CPU_DISPATCH_DECLARE(template <typename T> void QSelect, (T* arr, npy_intp num, npy_intp kth))
16-
#endif // DISABLE_HIGHWAY_OPTIMIZATION
1713

1814
#ifndef NPY_DISABLE_OPTIMIZATION
19-
#include "simd_argsort.dispatch.h"
15+
#include "x86_simd_argsort.dispatch.h"
2016
#endif
2117
NPY_CPU_DISPATCH_DECLARE(template <typename T> void ArgQSort, (T *arr, npy_intp* arg, npy_intp size))
2218
NPY_CPU_DISPATCH_DECLARE(template <typename T> void ArgQSelect, (T *arr, npy_intp* arg, npy_intp kth, npy_intp size))
2319

2420
#ifndef NPY_DISABLE_OPTIMIZATION
25-
#include "simd_qsort_16bit.dispatch.h"
21+
#include "x86_simd_qsort_16bit.dispatch.h"
2622
#endif
2723
NPY_CPU_DISPATCH_DECLARE(template <typename T> void QSort, (T *arr, npy_intp size))
2824
NPY_CPU_DISPATCH_DECLARE(template <typename T> void QSelect, (T* arr, npy_intp num, npy_intp kth))
2925

3026
} } // np::qsort_simd
3127

32-
#undef DISABLE_HIGHWAY_OPTIMIZATION
33-
34-
#endif // NUMPY_SRC_COMMON_NPYSORT_SIMD_QSORT_HPP
28+
#endif // NUMPY_SRC_COMMON_NPYSORT_X86_SIMD_QSORT_HPP

numpy/_core/src/npysort/simd_qsort_16bit.dispatch.cpp renamed to numpy/_core/src/npysort/x86_simd_qsort_16bit.dispatch.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
// when its part of baseline features (--cpu-baseline), since
66
// 'baseline' option isn't specified within targets.
77

8-
#include "simd_qsort.hpp"
8+
#include "x86_simd_qsort.hpp"
99
#ifndef __CYGWIN__
1010

1111
#if defined(NPY_HAVE_AVX512_SPR)

0 commit comments

Comments
 (0)