Skip to content

Commit e4b110a

Browse files
authored
[Headers][X86] Allow FMA3/FMA4 vector intrinsics to be used in constexpr (llvm#154558)
Now that llvm#152455 is done, we can make all the vector fma intrinsics that wrap __builtin_elementwise_fma to be constexpr Fixes llvm#154555
1 parent 810ea69 commit e4b110a

File tree

4 files changed

+102
-64
lines changed

4 files changed

+102
-64
lines changed

clang/lib/Headers/fma4intrin.h

Lines changed: 42 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,22 @@
2020
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(128)))
2121
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(256)))
2222

23-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
24-
_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
25-
{
23+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
24+
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
25+
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
26+
#else
27+
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
28+
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
29+
#endif
30+
31+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
32+
_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) {
2633
return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B,
2734
(__v4sf)__C);
2835
}
2936

30-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
31-
_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
32-
{
37+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
38+
_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) {
3339
return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B,
3440
(__v2df)__C);
3541
}
@@ -46,16 +52,14 @@ _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
4652
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
4753
}
4854

49-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
50-
_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
51-
{
55+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
56+
_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C) {
5257
return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B,
5358
-(__v4sf)__C);
5459
}
5560

56-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
57-
_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
58-
{
61+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
62+
_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) {
5963
return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B,
6064
-(__v2df)__C);
6165
}
@@ -72,16 +76,14 @@ _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
7276
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
7377
}
7478

75-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
76-
_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
77-
{
79+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
80+
_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) {
7881
return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B,
7982
(__v4sf)__C);
8083
}
8184

82-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
83-
_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
84-
{
85+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
86+
_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) {
8587
return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B,
8688
(__v2df)__C);
8789
}
@@ -98,16 +100,14 @@ _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
98100
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
99101
}
100102

101-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
102-
_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
103-
{
103+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
104+
_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
104105
return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B,
105106
-(__v4sf)__C);
106107
}
107108

108-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
109-
_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
110-
{
109+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
110+
_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
111111
return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B,
112112
-(__v2df)__C);
113113
}
@@ -148,58 +148,50 @@ _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
148148
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
149149
}
150150

151-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
152-
_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)
153-
{
151+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
152+
_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) {
154153
return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B,
155154
(__v8sf)__C);
156155
}
157156

158-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
159-
_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)
160-
{
157+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
158+
_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) {
161159
return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B,
162160
(__v4df)__C);
163161
}
164162

165-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
166-
_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
167-
{
163+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
164+
_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C) {
168165
return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B,
169166
-(__v8sf)__C);
170167
}
171168

172-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
173-
_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
174-
{
169+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
170+
_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) {
175171
return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B,
176172
-(__v4df)__C);
177173
}
178174

179-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
180-
_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
181-
{
175+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
176+
_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) {
182177
return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B,
183178
(__v8sf)__C);
184179
}
185180

186-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
187-
_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
188-
{
181+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
182+
_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) {
189183
return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B,
190184
(__v4df)__C);
191185
}
192186

193-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
194-
_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
195-
{
187+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
188+
_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
196189
return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B,
197190
-(__v8sf)__C);
198191
}
199192

200-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
201-
_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
202-
{
193+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
194+
_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
203195
return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B,
204196
-(__v4df)__C);
205197
}
@@ -230,5 +222,7 @@ _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
230222

231223
#undef __DEFAULT_FN_ATTRS128
232224
#undef __DEFAULT_FN_ATTRS256
225+
#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
226+
#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
233227

234228
#endif /* __FMA4INTRIN_H */

clang/lib/Headers/fmaintrin.h

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,14 @@
1818
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128)))
1919
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256)))
2020

21+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
22+
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
23+
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
24+
#else
25+
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
26+
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
27+
#endif
28+
2129
/// Computes a multiply-add of 128-bit vectors of [4 x float].
2230
/// For each element, computes <c> (__A * __B) + __C </c>.
2331
///
@@ -32,7 +40,7 @@
3240
/// \param __C
3341
/// A 128-bit vector of [4 x float] containing the addend.
3442
/// \returns A 128-bit vector of [4 x float] containing the result.
35-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
43+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
3644
_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
3745
{
3846
return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B,
@@ -53,7 +61,7 @@ _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
5361
/// \param __C
5462
/// A 128-bit vector of [2 x double] containing the addend.
5563
/// \returns A 128-bit [2 x double] vector containing the result.
56-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
64+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
5765
_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
5866
{
5967
return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B,
@@ -132,7 +140,7 @@ _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
132140
/// \param __C
133141
/// A 128-bit vector of [4 x float] containing the subtrahend.
134142
/// \returns A 128-bit vector of [4 x float] containing the result.
135-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
143+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
136144
_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
137145
{
138146
return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B,
@@ -153,7 +161,7 @@ _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
153161
/// \param __C
154162
/// A 128-bit vector of [2 x double] containing the addend.
155163
/// \returns A 128-bit vector of [2 x double] containing the result.
156-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
164+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
157165
_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
158166
{
159167
return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B,
@@ -232,7 +240,7 @@ _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
232240
/// \param __C
233241
/// A 128-bit vector of [4 x float] containing the addend.
234242
/// \returns A 128-bit [4 x float] vector containing the result.
235-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
243+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
236244
_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
237245
{
238246
return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B,
@@ -253,7 +261,7 @@ _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
253261
/// \param __C
254262
/// A 128-bit vector of [2 x double] containing the addend.
255263
/// \returns A 128-bit vector of [2 x double] containing the result.
256-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
264+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
257265
_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
258266
{
259267
return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B,
@@ -332,7 +340,7 @@ _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
332340
/// \param __C
333341
/// A 128-bit vector of [4 x float] containing the subtrahend.
334342
/// \returns A 128-bit vector of [4 x float] containing the result.
335-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
343+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
336344
_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
337345
{
338346
return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B,
@@ -353,7 +361,7 @@ _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
353361
/// \param __C
354362
/// A 128-bit vector of [2 x double] containing the subtrahend.
355363
/// \returns A 128-bit vector of [2 x double] containing the result.
356-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
364+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
357365
_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
358366
{
359367
return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B,
@@ -536,7 +544,7 @@ _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
536544
/// \param __C
537545
/// A 256-bit vector of [8 x float] containing the addend.
538546
/// \returns A 256-bit vector of [8 x float] containing the result.
539-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
547+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
540548
_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
541549
{
542550
return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B,
@@ -557,7 +565,7 @@ _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
557565
/// \param __C
558566
/// A 256-bit vector of [4 x double] containing the addend.
559567
/// \returns A 256-bit vector of [4 x double] containing the result.
560-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
568+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
561569
_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
562570
{
563571
return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B,
@@ -578,7 +586,7 @@ _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
578586
/// \param __C
579587
/// A 256-bit vector of [8 x float] containing the subtrahend.
580588
/// \returns A 256-bit vector of [8 x float] containing the result.
581-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
589+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
582590
_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
583591
{
584592
return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B,
@@ -599,7 +607,7 @@ _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
599607
/// \param __C
600608
/// A 256-bit vector of [4 x double] containing the subtrahend.
601609
/// \returns A 256-bit vector of [4 x double] containing the result.
602-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
610+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
603611
_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
604612
{
605613
return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B,
@@ -620,7 +628,7 @@ _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
620628
/// \param __C
621629
/// A 256-bit vector of [8 x float] containing the addend.
622630
/// \returns A 256-bit vector of [8 x float] containing the result.
623-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
631+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
624632
_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
625633
{
626634
return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B,
@@ -641,7 +649,7 @@ _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
641649
/// \param __C
642650
/// A 256-bit vector of [4 x double] containing the addend.
643651
/// \returns A 256-bit vector of [4 x double] containing the result.
644-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
652+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
645653
_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
646654
{
647655
return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B,
@@ -662,7 +670,7 @@ _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
662670
/// \param __C
663671
/// A 256-bit vector of [8 x float] containing the subtrahend.
664672
/// \returns A 256-bit vector of [8 x float] containing the result.
665-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
673+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
666674
_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
667675
{
668676
return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B,
@@ -683,7 +691,7 @@ _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
683691
/// \param __C
684692
/// A 256-bit vector of [4 x double] containing the subtrahend.
685693
/// \returns A 256-bit vector of [4 x double] containing the result.
686-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
694+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
687695
_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
688696
{
689697
return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B,
@@ -808,5 +816,7 @@ _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
808816

809817
#undef __DEFAULT_FN_ATTRS128
810818
#undef __DEFAULT_FN_ATTRS256
819+
#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
820+
#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
811821

812822
#endif /* __FMAINTRIN_H */

0 commit comments

Comments
 (0)