Skip to content

Commit 63c95b4

Browse files
committed
refactoring and add test code
1 parent 69bf326 commit 63c95b4

File tree

2 files changed

+48
-47
lines changed

2 files changed

+48
-47
lines changed

clang/lib/Headers/avx512fintrin.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3166,7 +3166,7 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
31663166

31673167
#define _mm512_extractf64x4_pd(A, I) \
31683168
((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3169-
(__v4df)_mm256_undefined_pd(), \
3169+
(__v4df)_mm256_setzero_pd(), \
31703170
(__mmask8)-1))
31713171

31723172
#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
@@ -3181,7 +3181,7 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
31813181

31823182
#define _mm512_extractf32x4_ps(A, I) \
31833183
((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3184-
(__v4sf)_mm_undefined_ps(), \
3184+
(__v4sf)_mm_setzero_ps(), \
31853185
(__mmask8)-1))
31863186

31873187
#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
@@ -7107,7 +7107,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
71077107

71087108
#define _mm512_extracti32x4_epi32(A, imm) \
71097109
((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7110-
(__v4si)_mm_undefined_si128(), \
7110+
(__v4si)_mm_setzero_si128(), \
71117111
(__mmask8)-1))
71127112

71137113
#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
@@ -7122,7 +7122,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
71227122

71237123
#define _mm512_extracti64x4_epi64(A, imm) \
71247124
((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7125-
(__v4di)_mm256_undefined_si256(), \
7125+
(__v4di)_mm256_setzero_si256(), \
71267126
(__mmask8)-1))
71277127

71287128
#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \

clang/test/CodeGen/X86/avx512f-builtins.c

Lines changed: 44 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -2452,46 +2452,46 @@ __m256d test_mm512_extractf64x4_pd(__m512d a)
24522452
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
24532453
return _mm512_extractf64x4_pd(a, 1);
24542454
}
2455-
TEST_CONSTEXPR(match_m256d(_mm512_extractf64x4_pd(((__m512d){
2456-
0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0
2457-
}), 1),
2458-
4.0, 5.0, 6.0, 7.0));
2459-
2455+
TEST_CONSTEXPR(match_m256d(_mm512_extractf64x4_pd(((__m512d)
2456+
{0.0,1.0,2.0,3.0, 4.0,5.0,6.0,7.0}),1),
2457+
4.0, 5.0, 6.0, 7.0));
24602458

24612459
__m256d test_mm512_mask_extractf64x4_pd(__m256d __W,__mmask8 __U,__m512d __A){
24622460
// CHECK-LABEL: test_mm512_mask_extractf64x4_pd
24632461
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
24642462
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
24652463
return _mm512_mask_extractf64x4_pd( __W, __U, __A, 1);
24662464
}
2467-
TEST_CONSTEXPR(match_m256d(_mm512_mask_extractf64x4_pd(
2468-
(__m256d){100.0,101.0,102.0,103.0}, // W(merge)
2469-
(__mmask8)0x5, // 0101b
2470-
(__m512d){0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0},
2471-
1),
2472-
4.0, 101.0, 6.0, 103.0));
2465+
TEST_CONSTEXPR(match_m256d(
2466+
_mm512_mask_extractf64x4_pd(
2467+
((__m256d){100.0,101.0,102.0,103.0}), // W (merge)
2468+
(__mmask8)0x5,
2469+
((__m512d){0.0,1.0,2.0,3.0, 4.0,5.0,6.0,7.0}),
2470+
1),
2471+
4.0, 101.0, 6.0, 103.0));
24732472

24742473
__m256d test_mm512_maskz_extractf64x4_pd(__mmask8 __U,__m512d __A){
24752474
// CHECK-LABEL: test_mm512_maskz_extractf64x4_pd
24762475
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
24772476
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
24782477
return _mm512_maskz_extractf64x4_pd( __U, __A, 1);
24792478
}
2480-
TEST_CONSTEXPR(match_m256d(_mm512_maskz_extractf64x4_pd(
2481-
(__mmask8)0x3,
2482-
(__m512d){0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0},
2483-
1),
2484-
4.0, 5.0, 0.0, 0.0));
2479+
TEST_CONSTEXPR(match_m256d(
2480+
_mm512_maskz_extractf64x4_pd(
2481+
(__mmask8)0x3,
2482+
((__m512d){0.0,1.0,2.0,3.0, 4.0,5.0,6.0,7.0}),
2483+
1),
2484+
4.0, 5.0, 0.0, 0.0));
24852485

24862486
__m128 test_mm512_extractf32x4_ps(__m512 a)
24872487
{
24882488
// CHECK-LABEL: test_mm512_extractf32x4_ps
24892489
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
24902490
return _mm512_extractf32x4_ps(a, 1);
24912491
}
2492-
TEST_CONSTEXPR(match_m128(_mm512_extractf32x4_ps(((__m512){
2493-
0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15
2494-
}), 1),
2492+
TEST_CONSTEXPR(match_m128(_mm512_extractf32x4_ps(
2493+
((__m512){0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}),
2494+
1),
24952495
4.0f, 5.0f, 6.0f, 7.0f));
24962496

24972497
__m128 test_mm512_mask_extractf32x4_ps(__m128 __W, __mmask8 __U,__m512 __A){
@@ -2501,9 +2501,9 @@ __m128 test_mm512_mask_extractf32x4_ps(__m128 __W, __mmask8 __U,__m512 __A){
25012501
return _mm512_mask_extractf32x4_ps( __W, __U, __A, 1);
25022502
}
25032503
TEST_CONSTEXPR(match_m128(_mm512_mask_extractf32x4_ps(
2504-
(__m128){100,101,102,103}, // W(merge)
2505-
(__mmask8)0x5, // 0101b
2506-
(__m512){0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15},
2504+
((__m128){100,101,102,103}),
2505+
(__mmask8)0x5,
2506+
((__m512){0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}),
25072507
1),
25082508
4.0f, 101.0f, 6.0f, 103.0f));
25092509

@@ -2515,7 +2515,7 @@ __m128 test_mm512_maskz_extractf32x4_ps( __mmask8 __U,__m512 __A){
25152515
}
25162516
TEST_CONSTEXPR(match_m128(_mm512_maskz_extractf32x4_ps(
25172517
(__mmask8)0x3,
2518-
(__m512){0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15},
2518+
((__m512){0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}),
25192519
1),
25202520
4.0f, 5.0f, 0.0f, 0.0f));
25212521

@@ -7078,10 +7078,11 @@ __m128i test_mm512_extracti32x4_epi32(__m512i __A) {
70787078
// CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
70797079
return _mm512_extracti32x4_epi32(__A, 3);
70807080
}
7081-
TEST_CONSTEXPR(match_m128i(_mm512_extracti32x4_epi32(((__m512i){
7082-
0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15
7083-
}), 3),
7084-
12, 13, 14, 15));
7081+
TEST_CONSTEXPR(match_m128i(_mm512_extracti32x4_epi32(((__m512i)(__v16si)
7082+
{0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}), 3),
7083+
0x0000000D0000000CULL, // (13<<32)|12
7084+
0x0000000F0000000EULL
7085+
));
70857086

70867087
__m128i test_mm512_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m512i __A) {
70877088
// CHECK-LABEL: test_mm512_mask_extracti32x4_epi32
@@ -7090,14 +7091,16 @@ __m128i test_mm512_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m512i __
70907091
return _mm512_mask_extracti32x4_epi32(__W, __U, __A, 3);
70917092
}
70927093
TEST_CONSTEXPR(match_m128i(_mm512_mask_extracti32x4_epi32(
7093-
(__m128i){100,101,102,103}, // merge=W
7094+
((__m128i)(__v4si){100,101,102,103}), // merge=W
70947095
(__mmask8)0x5, // 0101b
7095-
(__m512i){
7096+
((__m512i)(__v16si){
70967097
0,1,2,3, 4,5,6,7,
70977098
8,9,10,11, 12,13,14,15
7098-
},
7099+
}),
70997100
3),
7100-
12, 101, 14, 103));
7101+
0x000000650000000CULL, // (101<<32)|12
7102+
0x000000670000000EULL // (103<<32)|14
7103+
));
71017104

71027105
__m128i test_mm512_maskz_extracti32x4_epi32(__mmask8 __U, __m512i __A) {
71037106
// CHECK-LABEL: test_mm512_maskz_extracti32x4_epi32
@@ -7107,21 +7110,19 @@ __m128i test_mm512_maskz_extracti32x4_epi32(__mmask8 __U, __m512i __A) {
71077110
}
71087111
TEST_CONSTEXPR(match_m128i(_mm512_maskz_extracti32x4_epi32(
71097112
(__mmask8)0x3,
7110-
(__m512i){
7111-
0,1,2,3, 4,5,6,7,
7112-
8,9,10,11, 12,13,14,15
7113-
},
7113+
((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),
71147114
3),
7115-
12, 13, 0, 0));
7115+
0x0000000D0000000CULL, // (13<<32)|12
7116+
0x0000000000000000ULL
7117+
));
71167118

71177119
__m256i test_mm512_extracti64x4_epi64(__m512i __A) {
71187120
// CHECK-LABEL: test_mm512_extracti64x4_epi64
71197121
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
71207122
return _mm512_extracti64x4_epi64(__A, 1);
71217123
}
7122-
TEST_CONSTEXPR(match_m256i(_mm512_extracti64x4_epi64(((__m512i){
7123-
0ULL,1ULL,2ULL,3ULL, 4ULL,5ULL,6ULL,7ULL
7124-
}), 1),
7124+
TEST_CONSTEXPR(match_m256i(
7125+
_mm512_extracti64x4_epi64(((__m512i)(__v8di){0,1,2,3,4,5,6,7}), 1),
71257126
4ULL, 5ULL, 6ULL, 7ULL));
71267127

71277128
__m256i test_mm512_mask_extracti64x4_epi64(__m256i __W, __mmask8 __U, __m512i __A) {
@@ -7130,10 +7131,10 @@ __m256i test_mm512_mask_extracti64x4_epi64(__m256i __W, __mmask8 __U, __m512i __
71307131
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
71317132
return _mm512_mask_extracti64x4_epi64(__W, __U, __A, 1);
71327133
}
7133-
TEST_CONSTEXPR(match_m256i_64(_mm512_mask_extracti64x4_epi64(
7134-
(__m256i){100ULL,101ULL,102ULL,103ULL},
7134+
TEST_CONSTEXPR(match_m256i(_mm512_mask_extracti64x4_epi64(
7135+
((__m256i)(__v4di){100ULL,101ULL,102ULL,103ULL}), // W
71357136
(__mmask8)0x5,
7136-
(__m512i){0ULL,1ULL,2ULL,3ULL, 4ULL,5ULL,6ULL,7ULL},
7137+
(((__m512i)(__v8di){0ULL,1ULL,2ULL,3ULL, 4ULL,5ULL,6ULL,7ULL})),
71377138
1),
71387139
4ULL, 101ULL, 6ULL, 103ULL));
71397140

@@ -7145,7 +7146,7 @@ __m256i test_mm512_maskz_extracti64x4_epi64(__mmask8 __U, __m512i __A) {
71457146
}
71467147
TEST_CONSTEXPR(match_m256i(_mm512_maskz_extracti64x4_epi64(
71477148
(__mmask8)0x3,
7148-
(__m512i){0ULL,1ULL,2ULL,3ULL, 4ULL,5ULL,6ULL,7ULL},
7149+
(((__m512i)(__v8di){0ULL,1ULL,2ULL,3ULL, 4ULL,5ULL,6ULL,7ULL})),
71497150
1),
71507151
4ULL, 5ULL, 0ULL, 0ULL));
71517152

0 commit comments

Comments
 (0)