|
| 1 | +/*===---- avx10_2niintrin.h - AVX10.2 new instruction intrinsics -----------=== |
| 2 | + * |
| 3 | + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | + * See https://llvm.org/LICENSE.txt for license information. |
| 5 | + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | + * |
| 7 | + *===-----------------------------------------------------------------------=== |
| 8 | + */ |
| 9 | +#ifndef __IMMINTRIN_H |
| 10 | +#error "Never use <avx10_2niintrin.h> directly; include <immintrin.h> instead." |
| 11 | +#endif |
| 12 | + |
| 13 | +#ifdef __SSE2__ |
| 14 | + |
| 15 | +#ifndef __AVX10_2INTRIN_H |
| 16 | +#define __AVX10_2INTRIN_H |
| 17 | + |
| 18 | +/* VMPSADBW */ |
| 19 | +#define _mm_mask_mpsadbw_epu8(W, U, A, B, imm) \ |
| 20 | + ((__m128i)__builtin_ia32_selectw_128( \ |
| 21 | + (__mmask8)(U), (__v8hi)_mm_mpsadbw_epu8((A), (B), (imm)), \ |
| 22 | + (__v8hi)(__m128i)(W))) |
| 23 | + |
| 24 | +#define _mm_maskz_mpsadbw_epu8(U, A, B, imm) \ |
| 25 | + ((__m128i)__builtin_ia32_selectw_128( \ |
| 26 | + (__mmask8)(U), (__v8hi)_mm_mpsadbw_epu8((A), (B), (imm)), \ |
| 27 | + (__v8hi)_mm_setzero_si128())) |
| 28 | + |
| 29 | +#define _mm256_mask_mpsadbw_epu8(W, U, A, B, imm) \ |
| 30 | + ((__m256i)__builtin_ia32_selectw_256( \ |
| 31 | + (__mmask16)(U), (__v16hi)_mm256_mpsadbw_epu8((A), (B), (imm)), \ |
| 32 | + (__v16hi)(__m256i)(W))) |
| 33 | + |
| 34 | +#define _mm256_maskz_mpsadbw_epu8(U, A, B, imm) \ |
| 35 | + ((__m256i)__builtin_ia32_selectw_256( \ |
| 36 | + (__mmask16)(U), (__v16hi)_mm256_mpsadbw_epu8((A), (B), (imm)), \ |
| 37 | + (__v16hi)_mm256_setzero_si256())) |
| 38 | + |
| 39 | +/* YMM Rounding */ |
| 40 | +#define _mm256_add_round_pd(A, B, R) \ |
| 41 | + ((__m256d)__builtin_ia32_vaddpd256_round((__v4df)(__m256d)(A), \ |
| 42 | + (__v4df)(__m256d)(B), (int)(R))) |
| 43 | + |
| 44 | +#define _mm256_mask_add_round_pd(W, U, A, B, R) \ |
| 45 | + ((__m256d)__builtin_ia32_selectpd_256( \ |
| 46 | + (__mmask8)(U), (__v4df)_mm256_add_round_pd((A), (B), (R)), \ |
| 47 | + (__v4df)(__m256d)(W))) |
| 48 | + |
| 49 | +#define _mm256_maskz_add_round_pd(U, A, B, R) \ |
| 50 | + ((__m256d)__builtin_ia32_selectpd_256( \ |
| 51 | + (__mmask8)(U), (__v4df)_mm256_add_round_pd((A), (B), (R)), \ |
| 52 | + (__v4df)_mm256_setzero_pd())) |
| 53 | + |
| 54 | +#define _mm256_add_round_ph(A, B, R) \ |
| 55 | + ((__m256h)__builtin_ia32_vaddph256_round((__v16hf)(__m256h)(A), \ |
| 56 | + (__v16hf)(__m256h)(B), (int)(R))) |
| 57 | + |
| 58 | +#define _mm256_mask_add_round_ph(W, U, A, B, R) \ |
| 59 | + ((__m256h)__builtin_ia32_selectph_256( \ |
| 60 | + (__mmask16)(U), (__v16hf)_mm256_add_round_ph((A), (B), (R)), \ |
| 61 | + (__v16hf)(__m256h)(W))) |
| 62 | + |
| 63 | +#define _mm256_maskz_add_round_ph(U, A, B, R) \ |
| 64 | + ((__m256h)__builtin_ia32_selectph_256( \ |
| 65 | + (__mmask16)(U), (__v16hf)_mm256_add_round_ph((A), (B), (R)), \ |
| 66 | + (__v16hf)_mm256_setzero_ph())) |
| 67 | + |
| 68 | +#define _mm256_add_round_ps(A, B, R) \ |
| 69 | + ((__m256)__builtin_ia32_vaddps256_round((__v8sf)(__m256)(A), \ |
| 70 | + (__v8sf)(__m256)(B), (int)(R))) |
| 71 | + |
| 72 | +#define _mm256_mask_add_round_ps(W, U, A, B, R) \ |
| 73 | + ((__m256)__builtin_ia32_selectps_256( \ |
| 74 | + (__mmask8)(U), (__v8sf)_mm256_add_round_ps((A), (B), (R)), \ |
| 75 | + (__v8sf)(__m256)(W))) |
| 76 | + |
| 77 | +#define _mm256_maskz_add_round_ps(U, A, B, R) \ |
| 78 | + ((__m256)__builtin_ia32_selectps_256( \ |
| 79 | + (__mmask8)(U), (__v8sf)_mm256_add_round_ps((A), (B), (R)), \ |
| 80 | + (__v8sf)_mm256_setzero_ps())) |
| 81 | + |
| 82 | +#endif /* __AVX10_2INTRIN_H */ |
| 83 | +#endif /* __SSE2__ */ |
0 commit comments