Skip to content

Commit 1b73813

Browse files
author
Raghuveer Devulapalli
committed
Add inline keyword for template specializations
1 parent d9c9737 commit 1b73813

File tree

4 files changed

+35
-29
lines changed

4 files changed

+35
-29
lines changed

src/avx512-16bit-qsort.hpp

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,8 @@ struct zmm_vector<uint16_t> {
465465
};
466466

467467
template <>
468-
bool comparison_func<zmm_vector<float16>>(const uint16_t &a, const uint16_t &b)
468+
X86_SIMD_SORT_INLINE_ONLY bool
469+
comparison_func<zmm_vector<float16>>(const uint16_t &a, const uint16_t &b)
469470
{
470471
uint16_t signa = a & 0x8000, signb = b & 0x8000;
471472
uint16_t expa = a & 0x7c00, expb = b & 0x7c00;
@@ -493,8 +494,8 @@ bool comparison_func<zmm_vector<float16>>(const uint16_t &a, const uint16_t &b)
493494
}
494495

495496
template <>
496-
arrsize_t replace_nan_with_inf<zmm_vector<float16>>(uint16_t *arr,
497-
arrsize_t arrsize)
497+
X86_SIMD_SORT_INLINE_ONLY arrsize_t
498+
replace_nan_with_inf<zmm_vector<float16>>(uint16_t *arr, arrsize_t arrsize)
498499
{
499500
arrsize_t nan_count = 0;
500501
__mmask16 loadmask = 0xFFFF;
@@ -513,13 +514,13 @@ arrsize_t replace_nan_with_inf<zmm_vector<float16>>(uint16_t *arr,
513514
}
514515

515516
template <>
516-
bool is_a_nan<uint16_t>(uint16_t elem)
517+
X86_SIMD_SORT_INLINE_ONLY bool is_a_nan<uint16_t>(uint16_t elem)
517518
{
518519
return ((elem & 0x7c00u) == 0x7c00u) && ((elem & 0x03ffu) != 0);
519520
}
520521

521-
X86_SIMD_SORT_INLINE
522-
void avx512_qsort_fp16(uint16_t *arr, arrsize_t arrsize, bool hasnan = false)
522+
X86_SIMD_SORT_INLINE void
523+
avx512_qsort_fp16(uint16_t *arr, arrsize_t arrsize, bool hasnan = false)
523524
{
524525
if (arrsize > 1) {
525526
arrsize_t nan_count = 0;
@@ -533,11 +534,10 @@ void avx512_qsort_fp16(uint16_t *arr, arrsize_t arrsize, bool hasnan = false)
533534
}
534535
}
535536

536-
X86_SIMD_SORT_INLINE
537-
void avx512_qselect_fp16(uint16_t *arr,
538-
arrsize_t k,
539-
arrsize_t arrsize,
540-
bool hasnan = false)
537+
X86_SIMD_SORT_INLINE void avx512_qselect_fp16(uint16_t *arr,
538+
arrsize_t k,
539+
arrsize_t arrsize,
540+
bool hasnan = false)
541541
{
542542
arrsize_t indx_last_elem = arrsize - 1;
543543
if (UNLIKELY(hasnan)) {
@@ -549,11 +549,10 @@ void avx512_qselect_fp16(uint16_t *arr,
549549
}
550550
}
551551

552-
X86_SIMD_SORT_INLINE
553-
void avx512_partial_qsort_fp16(uint16_t *arr,
554-
arrsize_t k,
555-
arrsize_t arrsize,
556-
bool hasnan = false)
552+
X86_SIMD_SORT_INLINE void avx512_partial_qsort_fp16(uint16_t *arr,
553+
arrsize_t k,
554+
arrsize_t arrsize,
555+
bool hasnan = false)
557556
{
558557
avx512_qselect_fp16(arr, k - 1, arrsize, hasnan);
559558
avx512_qsort_fp16(arr, k - 1);

src/avx512-64bit-argsort.hpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -657,9 +657,8 @@ avx512_argsort(T *arr, arrsize_t *arg, arrsize_t arrsize, bool hasnan = false)
657657
}
658658

659659
template <typename T>
660-
X86_SIMD_SORT_INLINE std::vector<arrsize_t> avx512_argsort(T *arr,
661-
arrsize_t arrsize,
662-
bool hasnan = false)
660+
X86_SIMD_SORT_INLINE std::vector<arrsize_t>
661+
avx512_argsort(T *arr, arrsize_t arrsize, bool hasnan = false)
663662
{
664663
std::vector<arrsize_t> indices(arrsize);
665664
std::iota(indices.begin(), indices.end(), 0);
@@ -669,8 +668,11 @@ X86_SIMD_SORT_INLINE std::vector<arrsize_t> avx512_argsort(T *arr,
669668

670669
/* argselect methods for 32-bit and 64-bit dtypes */
671670
template <typename T>
672-
X86_SIMD_SORT_INLINE void
673-
avx512_argselect(T *arr, arrsize_t *arg, arrsize_t k, arrsize_t arrsize, bool hasnan = false)
671+
X86_SIMD_SORT_INLINE void avx512_argselect(T *arr,
672+
arrsize_t *arg,
673+
arrsize_t k,
674+
arrsize_t arrsize,
675+
bool hasnan = false)
674676
{
675677
using vectype = typename std::conditional<sizeof(T) == sizeof(int32_t),
676678
ymm_vector<T>,

src/avx512fp16-16bit-qsort.hpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -160,13 +160,14 @@ struct zmm_vector<_Float16> {
160160
};
161161

162162
template <>
163-
bool is_a_nan<_Float16>(_Float16 elem)
163+
X86_SIMD_SORT_INLINE_ONLY bool is_a_nan<_Float16>(_Float16 elem)
164164
{
165165
return elem != elem;
166166
}
167167

168168
template <>
169-
void replace_inf_with_nan(_Float16 *arr, arrsize_t size, arrsize_t nan_count)
169+
X86_SIMD_SORT_INLINE_ONLY void
170+
replace_inf_with_nan(_Float16 *arr, arrsize_t size, arrsize_t nan_count)
170171
{
171172
Fp16Bits val;
172173
val.i_ = 0x7c01;
@@ -177,7 +178,8 @@ void replace_inf_with_nan(_Float16 *arr, arrsize_t size, arrsize_t nan_count)
177178
}
178179
/* Specialized template function for _Float16 qsort_*/
179180
template <>
180-
void avx512_qsort(_Float16 *arr, arrsize_t arrsize, bool hasnan)
181+
X86_SIMD_SORT_INLINE_ONLY [[maybe_unused]] void
182+
avx512_qsort(_Float16 *arr, arrsize_t arrsize, bool hasnan)
181183
{
182184
if (arrsize > 1) {
183185
arrsize_t nan_count = 0;
@@ -192,7 +194,8 @@ void avx512_qsort(_Float16 *arr, arrsize_t arrsize, bool hasnan)
192194
}
193195

194196
template <>
195-
void avx512_qselect(_Float16 *arr, arrsize_t k, arrsize_t arrsize, bool hasnan)
197+
X86_SIMD_SORT_INLINE_ONLY [[maybe_unused]] void
198+
avx512_qselect(_Float16 *arr, arrsize_t k, arrsize_t arrsize, bool hasnan)
196199
{
197200
arrsize_t indx_last_elem = arrsize - 1;
198201
if (UNLIKELY(hasnan)) {
@@ -204,10 +207,8 @@ void avx512_qselect(_Float16 *arr, arrsize_t k, arrsize_t arrsize, bool hasnan)
204207
}
205208
}
206209
template <>
207-
void avx512_partial_qsort(_Float16 *arr,
208-
arrsize_t k,
209-
arrsize_t arrsize,
210-
bool hasnan)
210+
X86_SIMD_SORT_INLINE_ONLY [[maybe_unused]] void
211+
avx512_partial_qsort(_Float16 *arr, arrsize_t k, arrsize_t arrsize, bool hasnan)
211212
{
212213
avx512_qselect(arr, k - 1, arrsize, hasnan);
213214
avx512_qsort(arr, k - 1, hasnan);

src/xss-common-includes.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838

3939
/* Compiler specific macros specific */
4040
#ifdef _MSC_VER
41+
#define X86_SIMD_SORT_INLINE_ONLY inline
4142
#define X86_SIMD_SORT_INLINE static inline
4243
#define X86_SIMD_SORT_FINLINE static __forceinline
4344
#define LIKELY(x) (x)
@@ -47,14 +48,17 @@
4748
* Force inline in cygwin to work around a compiler bug. See
4849
* https://github.com/numpy/numpy/pull/22315#issuecomment-1267757584
4950
*/
51+
#define X86_SIMD_SORT_INLINE_ONLY inline
5052
#define X86_SIMD_SORT_INLINE static __attribute__((always_inline))
5153
#define X86_SIMD_SORT_FINLINE static __attribute__((always_inline))
5254
#elif defined(__GNUC__)
55+
#define X86_SIMD_SORT_INLINE_ONLY inline
5356
#define X86_SIMD_SORT_INLINE static inline
5457
#define X86_SIMD_SORT_FINLINE static inline __attribute__((always_inline))
5558
#define LIKELY(x) __builtin_expect((x), 1)
5659
#define UNLIKELY(x) __builtin_expect((x), 0)
5760
#else
61+
#define X86_SIMD_SORT_INLINE_ONLY
5862
#define X86_SIMD_SORT_INLINE static
5963
#define X86_SIMD_SORT_FINLINE static
6064
#define LIKELY(x) (x)

0 commit comments

Comments
 (0)