From ee0b13bf42d7d6d85a27accaf3f48018b88cca42 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 15 Apr 2023 17:53:39 -0700 Subject: [PATCH 1/5] Updating XML doc comments for hardware intrinsics to include VEX and EVEX forms --- .../X86/Aes.PlatformNotSupported.cs | 20 +- .../src/System/Runtime/Intrinsics/X86/Aes.cs | 18 +- .../X86/Avx.PlatformNotSupported.cs | 1025 +++++----- .../src/System/Runtime/Intrinsics/X86/Avx.cs | 1032 ++++++----- .../X86/Avx2.PlatformNotSupported.cs | 1167 +++++++----- .../src/System/Runtime/Intrinsics/X86/Avx2.cs | 1166 +++++++----- .../System/Runtime/Intrinsics/X86/Avx512BW.cs | 1 - .../System/Runtime/Intrinsics/X86/Avx512DQ.cs | 1 - .../X86/Avx512F.PlatformNotSupported.cs | 310 ++-- .../System/Runtime/Intrinsics/X86/Avx512F.cs | 311 ++-- .../X86/Bmi1.PlatformNotSupported.cs | 28 +- .../src/System/Runtime/Intrinsics/X86/Bmi1.cs | 28 +- .../X86/Bmi2.PlatformNotSupported.cs | 20 +- .../src/System/Runtime/Intrinsics/X86/Bmi2.cs | 20 +- .../X86/Fma.PlatformNotSupported.cs | 96 +- .../src/System/Runtime/Intrinsics/X86/Fma.cs | 96 +- .../X86/Lzcnt.PlatformNotSupported.cs | 4 +- .../System/Runtime/Intrinsics/X86/Lzcnt.cs | 4 +- .../X86/Pclmulqdq.PlatformNotSupported.cs | 6 +- .../Runtime/Intrinsics/X86/Pclmulqdq.cs | 6 +- .../X86/Popcnt.PlatformNotSupported.cs | 4 +- .../System/Runtime/Intrinsics/X86/Popcnt.cs | 4 +- .../X86/Sse.PlatformNotSupported.cs | 549 +++--- .../src/System/Runtime/Intrinsics/X86/Sse.cs | 537 +++--- .../X86/Sse2.PlatformNotSupported.cs | 1640 ++++++++++------ .../src/System/Runtime/Intrinsics/X86/Sse2.cs | 1642 +++++++++++------ .../X86/Sse3.PlatformNotSupported.cs | 74 +- .../src/System/Runtime/Intrinsics/X86/Sse3.cs | 74 +- .../X86/Sse41.PlatformNotSupported.cs | 702 ++++--- .../System/Runtime/Intrinsics/X86/Sse41.cs | 702 ++++--- .../X86/Sse42.PlatformNotSupported.cs | 11 +- .../System/Runtime/Intrinsics/X86/Sse42.cs | 11 +- .../X86/Ssse3.PlatformNotSupported.cs | 96 +- .../System/Runtime/Intrinsics/X86/Ssse3.cs | 96 +- 34 files changed, 6854 insertions(+), 4647 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Aes.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Aes.PlatformNotSupported.cs index 72da97c9c4b77d..b4a56e0b9d60a7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Aes.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Aes.PlatformNotSupported.cs @@ -27,40 +27,44 @@ internal X64() { } /// /// __m128i _mm_aesdec_si128 (__m128i a, __m128i RoundKey) - /// AESDEC xmm, xmm/m128 + /// AESDEC xmm1, xmm2/m128 + /// VAESDEC xmm1, xmm2, xmm3/m128 /// public static Vector128 Decrypt(Vector128 value, Vector128 roundKey) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_aesdeclast_si128 (__m128i a, __m128i RoundKey) - /// AESDECLAST xmm, xmm/m128 + /// AESDECLAST xmm1, xmm2/m128 + /// VAESDECLAST xmm1, xmm2, xmm3/m128 /// public static Vector128 DecryptLast(Vector128 value, Vector128 roundKey) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_aesenc_si128 (__m128i a, __m128i RoundKey) - /// AESENC xmm, xmm/m128 + /// AESENC xmm1, xmm2/m128 + /// VAESENC xmm1, xmm2, xmm3/m128 /// public static Vector128 Encrypt(Vector128 value, Vector128 roundKey) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_aesenclast_si128 (__m128i a, __m128i RoundKey) - /// AESENCLAST xmm, xmm/m128 + /// AESENCLAST xmm1, xmm2/m128 + /// VAESENCLAST xmm1, xmm2, xmm3/m128 /// public static Vector128 EncryptLast(Vector128 value, Vector128 roundKey) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_aesimc_si128 (__m128i a) - /// AESIMC xmm, xmm/m128 + /// AESIMC xmm1, xmm2/m128 + /// VAESIMC xmm1, xmm2/m128 /// public static Vector128 InverseMixColumns(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_aeskeygenassist_si128 (__m128i a, const int imm8) - /// AESKEYGENASSIST xmm, xmm/m128, imm8 + /// AESKEYGENASSIST xmm1, xmm2/m128, imm8 + /// VAESKEYGENASSIST xmm1, xmm2/m128, imm8 /// public static Vector128 KeygenAssist(Vector128 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - } - } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Aes.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Aes.cs index 77008b678c92db..1424a66dc83521 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Aes.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Aes.cs @@ -27,37 +27,43 @@ internal X64() { } /// /// __m128i _mm_aesdec_si128 (__m128i a, __m128i RoundKey) - /// AESDEC xmm, xmm/m128 + /// AESDEC xmm1, xmm2/m128 + /// VAESDEC xmm1, xmm2, xmm3/m128 /// public static Vector128 Decrypt(Vector128 value, Vector128 roundKey) => Decrypt(value, roundKey); /// /// __m128i _mm_aesdeclast_si128 (__m128i a, __m128i RoundKey) - /// AESDECLAST xmm, xmm/m128 + /// AESDECLAST xmm1, xmm2/m128 + /// VAESDECLAST xmm1, xmm2, xmm3/m128 /// public static Vector128 DecryptLast(Vector128 value, Vector128 roundKey) => DecryptLast(value, roundKey); /// /// __m128i _mm_aesenc_si128 (__m128i a, __m128i RoundKey) - /// AESENC xmm, xmm/m128 + /// AESENC xmm1, xmm2/m128 + /// VAESENC xmm1, xmm2, xmm3/m128 /// public static Vector128 Encrypt(Vector128 value, Vector128 roundKey) => Encrypt(value, roundKey); /// /// __m128i _mm_aesenclast_si128 (__m128i a, __m128i RoundKey) - /// AESENCLAST xmm, xmm/m128 + /// AESENCLAST xmm1, xmm2/m128 + /// VAESENCLAST xmm1, xmm2, xmm3/m128 /// public static Vector128 EncryptLast(Vector128 value, Vector128 roundKey) => EncryptLast(value, roundKey); /// /// __m128i _mm_aesimc_si128 (__m128i a) - /// AESIMC xmm, xmm/m128 + /// AESIMC xmm1, xmm2/m128 + /// VAESIMC xmm1, xmm2/m128 /// public static Vector128 InverseMixColumns(Vector128 value) => InverseMixColumns(value); /// /// __m128i _mm_aeskeygenassist_si128 (__m128i a, const int imm8) - /// AESKEYGENASSIST xmm, xmm/m128, imm8 + /// AESKEYGENASSIST xmm1, xmm2/m128, imm8 + /// VAESKEYGENASSIST xmm1, xmm2/m128, imm8 /// public static Vector128 KeygenAssist(Vector128 value, [ConstantExpected] byte control) => KeygenAssist(value, control); } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.PlatformNotSupported.cs index 715cebdfe192fc..0f2b983cfd2745 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.PlatformNotSupported.cs @@ -27,347 +27,359 @@ internal X64() { } /// /// __m256 _mm256_add_ps (__m256 a, __m256 b) - /// VADDPS ymm, ymm, ymm/m256 + /// VADDPS ymm1, ymm2, ymm3/m256 + /// VADDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Add(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_add_pd (__m256d a, __m256d b) - /// VADDPD ymm, ymm, ymm/m256 + /// VADDPD ymm1, ymm2, ymm3/m256 + /// VADDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Add(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_addsub_ps (__m256 a, __m256 b) - /// VADDSUBPS ymm, ymm, ymm/m256 + /// VADDSUBPS ymm1, ymm2, ymm3/m256 /// public static Vector256 AddSubtract(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_addsub_pd (__m256d a, __m256d b) - /// VADDSUBPD ymm, ymm, ymm/m256 + /// VADDSUBPD ymm1, ymm2, ymm3/m256 /// public static Vector256 AddSubtract(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_and_ps (__m256 a, __m256 b) - /// VANDPS ymm, ymm, ymm/m256 + /// VANDPS ymm1, ymm2, ymm2/m256 + /// VANDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 And(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_and_pd (__m256d a, __m256d b) - /// VANDPD ymm, ymm, ymm/m256 + /// VANDPD ymm1, ymm2, ymm2/m256 + /// VANDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 And(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_andnot_ps (__m256 a, __m256 b) - /// VANDNPS ymm, ymm, ymm/m256 + /// VANDNPS ymm1, ymm2, ymm2/m256 + /// VANDNPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 AndNot(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_andnot_pd (__m256d a, __m256d b) - /// VANDNPD ymm, ymm, ymm/m256 + /// VANDNPD ymm1, ymm2, ymm2/m256 + /// VANDNPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 AndNot(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_blend_ps (__m256 a, __m256 b, const int imm8) - /// VBLENDPS ymm, ymm, ymm/m256, imm8 + /// VBLENDPS ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Blend(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_blend_pd (__m256d a, __m256d b, const int imm8) - /// VBLENDPD ymm, ymm, ymm/m256, imm8 + /// VBLENDPD ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Blend(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_blendv_ps (__m256 a, __m256 b, __m256 mask) - /// VBLENDVPS ymm, ymm, ymm/m256, ymm + /// VBLENDVPS ymm1, ymm2, ymm3/m256, ymm4 /// public static Vector256 BlendVariable(Vector256 left, Vector256 right, Vector256 mask) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_blendv_pd (__m256d a, __m256d b, __m256d mask) - /// VBLENDVPD ymm, ymm, ymm/m256, ymm + /// VBLENDVPD ymm1, ymm2, ymm3/m256, ymm4 /// public static Vector256 BlendVariable(Vector256 left, Vector256 right, Vector256 mask) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_broadcast_ss (float const * mem_addr) - /// VBROADCASTSS xmm, m32 + /// VBROADCASTSS xmm1, m32 + /// VBROADCASTSS xmm1 {k1}{z}, m32 /// public static unsafe Vector128 BroadcastScalarToVector128(float* source) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_broadcast_ss (float const * mem_addr) - /// VBROADCASTSS ymm, m32 + /// VBROADCASTSS ymm1, m32 + /// VBROADCASTSS ymm1 {k1}{z}, m32 /// public static unsafe Vector256 BroadcastScalarToVector256(float* source) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_broadcast_sd (double const * mem_addr) - /// VBROADCASTSD ymm, m64 + /// VBROADCASTSD ymm1, m64 + /// VBROADCASTSD ymm1 {k1}{z}, m64 /// public static unsafe Vector256 BroadcastScalarToVector256(double* source) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_broadcast_ps (__m128 const * mem_addr) - /// VBROADCASTF128, ymm, m128 + /// VBROADCASTF128 ymm1, m128 + /// VBROADCASTF32x4 ymm1 {k1}{z}, m128 /// public static unsafe Vector256 BroadcastVector128ToVector256(float* address) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_broadcast_pd (__m128d const * mem_addr) - /// VBROADCASTF128, ymm, m128 + /// VBROADCASTF128 ymm1, m128 + /// VBROADCASTF64x2 ymm1 {k1}{z}, m128 /// public static unsafe Vector256 BroadcastVector128ToVector256(double* address) { throw new PlatformNotSupportedException(); } /// - /// __m256 _mm256_ceil_ps (__m256 a) - /// VROUNDPS ymm, ymm/m256, imm8(10) + /// __m256 _mm256_ceil_ps (__m128 a) + /// VROUNDPS ymm1, ymm2/m256, imm8(10) /// public static Vector256 Ceiling(Vector256 value) { throw new PlatformNotSupportedException(); } /// - /// __m256d _mm256_ceil_pd (__m256d a) - /// VROUNDPD ymm, ymm/m256, imm8(10) + /// __m256d _mm256_ceil_pd (__m128d a) + /// VROUNDPD ymm1, ymm2/m256, imm8(10) /// public static Vector256 Ceiling(Vector256 value) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_cmp_ps (__m128 a, __m128 b, const int imm8) - /// VCMPPS xmm, xmm, xmm/m128, imm8 + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8 /// public static Vector128 Compare(Vector128 left, Vector128 right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_cmp_pd (__m128d a, __m128d b, const int imm8) - /// VCMPPD xmm, xmm, xmm/m128, imm8 - /// - public static Vector128 Compare(Vector128 left, Vector128 right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) { throw new PlatformNotSupportedException(); } - /// /// __m256 _mm256_cmp_ps (__m256 a, __m256 b, const int imm8) - /// VCMPPS ymm, ymm, ymm/m256, imm8 + /// VCMPPS ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Compare(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) { throw new PlatformNotSupportedException(); } - /// - /// __m256d _mm256_cmp_pd (__m256d a, __m256d b, const int imm8) - /// VCMPPD ymm, ymm, ymm/m256, imm8 - /// - public static Vector256 Compare(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) { throw new PlatformNotSupportedException(); } - /// /// __m256 _mm256_cmpeq_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(0) + /// VCMPPS ymm1, ymm2/m256, imm8(0) /// The above native signature does not exist. We provide this additional overload for completeness. /// public static Vector256 CompareEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// - /// __m256d _mm256_cmpeq_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(0) - /// The above native signature does not exist. We provide this additional overload for completeness. - /// - public static Vector256 CompareEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// __m256 _mm256_cmpgt_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(14) + /// VCMPPS ymm1, ymm2/m256, imm8(14) /// The above native signature does not exist. We provide this additional overload for completeness. /// public static Vector256 CompareGreaterThan(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// - /// __m256d _mm256_cmpgt_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(14) - /// The above native signature does not exist. We provide this additional overload for completeness. - /// - public static Vector256 CompareGreaterThan(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// __m256 _mm256_cmpge_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(13) + /// VCMPPS ymm1, ymm2/m256, imm8(13) /// The above native signature does not exist. We provide this additional overload for completeness. /// public static Vector256 CompareGreaterThanOrEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// - /// __m256d _mm256_cmpge_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(13) + /// __m256 _mm256_cmplt_ps (__m256 a, __m256 b) + /// VCMPPS ymm1, ymm2/m256, imm8(1) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareGreaterThanOrEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - + public static Vector256 CompareLessThan(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// - /// __m256 _mm256_cmplt_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(1) + /// __m256 _mm256_cmple_ps (__m256 a, __m256 b) + /// VCMPPS ymm1, ymm2/m256, imm8(2) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareLessThan(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + public static Vector256 CompareLessThanOrEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// - /// __m256d _mm256_cmplt_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(1) + /// __m256 _mm256_cmpneq_ps (__m256 a, __m256 b) + /// VCMPPS ymm1, ymm2/m256, imm8(4) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareLessThan(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - + public static Vector256 CompareNotEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// - /// __m256 _mm256_cmple_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(2) + /// __m256 _mm256_cmpngt_ps (__m256 a, __m256 b) + /// VCMPPS ymm1, ymm2/m256, imm8(10) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareLessThanOrEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + public static Vector256 CompareNotGreaterThan(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// - /// __m256d _mm256_cmple_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(2) + /// __m256 _mm256_cmpnge_ps (__m256 a, __m256 b) + /// VCMPPS ymm1, ymm2/m256, imm8(9) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareLessThanOrEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - + public static Vector256 CompareNotGreaterThanOrEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// - /// __m256 _mm256_cmpneq_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(4) + /// __m256 _mm256_cmpnlt_ps (__m256 a, __m256 b) + /// VCMPPS ymm1, ymm2/m256, imm8(5) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareNotEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + public static Vector256 CompareNotLessThan(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// - /// __m256d _mm256_cmpneq_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(4) + /// __m256 _mm256_cmpnle_ps (__m256 a, __m256 b) + /// VCMPPS ymm1, ymm2/m256, imm8(6) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareNotEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - + public static Vector256 CompareNotLessThanOrEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// - /// __m256 _mm256_cmpngt_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(10) + /// __m256 _mm256_cmpord_ps (__m256 a, __m256 b) + /// VCMPPS ymm1, ymm2/m256, imm8(7) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareNotGreaterThan(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + public static Vector256 CompareOrdered(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// - /// __m256d _mm256_cmpngt_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(10) + /// __m256 _mm256_cmpunord_ps (__m256 a, __m256 b) + /// CMPPS ymm1, ymm2/m256, imm8(3) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareNotGreaterThan(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + public static Vector256 CompareUnordered(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// - /// __m256 _mm256_cmpnge_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(9) + /// __m128d _mm_cmp_pd (__m128d a, __m128d b, const int imm8) + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8 + /// + public static Vector128 Compare(Vector128 left, Vector128 right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) { throw new PlatformNotSupportedException(); } + /// + /// __m256d _mm256_cmp_pd (__m256d a, __m256d b, const int imm8) + /// VCMPPD ymm1, ymm2, ymm3/m256, imm8 + /// + public static Vector256 Compare(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) { throw new PlatformNotSupportedException(); } + /// + /// __m256d _mm256_cmpeq_pd (__m256d a, __m256d b) + /// VCMPPD ymm1, ymm2/m256, imm8(0) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareNotGreaterThanOrEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + public static Vector256 CompareEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// - /// __m256d _mm256_cmpnge_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(9) + /// __m256d _mm256_cmpgt_pd (__m256d a, __m256d b) + /// VCMPPD ymm1, ymm2/m256, imm8(14) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareNotGreaterThanOrEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - + public static Vector256 CompareGreaterThan(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// - /// __m256 _mm256_cmpnlt_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(5) + /// __m256d _mm256_cmpge_pd (__m256d a, __m256d b) + /// VCMPPD ymm1, ymm2/m256, imm8(13) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareNotLessThan(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + public static Vector256 CompareGreaterThanOrEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// - /// __m256d _mm256_cmpnlt_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(5) + /// __m256d _mm256_cmplt_pd (__m256d a, __m256d b) + /// VCMPPD ymm1, ymm2/m256, imm8(1) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareNotLessThan(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - + public static Vector256 CompareLessThan(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// - /// __m256 _mm256_cmpnle_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(6) + /// __m256d _mm256_cmple_pd (__m256d a, __m256d b) + /// VCMPPD ymm1, ymm2/m256, imm8(2) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareNotLessThanOrEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + public static Vector256 CompareLessThanOrEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// - /// __m256d _mm256_cmpnle_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(6) + /// __m256d _mm256_cmpneq_pd (__m256d a, __m256d b) + /// VCMPPD ymm1, ymm2/m256, imm8(4) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareNotLessThanOrEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - + public static Vector256 CompareNotEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// - /// __m256 _mm256_cmpord_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(7) + /// __m256d _mm256_cmpngt_pd (__m256d a, __m256d b) + /// VCMPPD ymm1, ymm2/m256, imm8(10) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareOrdered(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + public static Vector256 CompareNotGreaterThan(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// - /// __m256d _mm256_cmpord_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(7) + /// __m256d _mm256_cmpnge_pd (__m256d a, __m256d b) + /// VCMPPD ymm1, ymm2/m256, imm8(9) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareOrdered(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - + public static Vector256 CompareNotGreaterThanOrEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_cmp_sd (__m128d a, __m128d b, const int imm8) - /// VCMPSS xmm, xmm, xmm/m32, imm8 + /// __m256d _mm256_cmpnlt_pd (__m256d a, __m256d b) + /// VCMPPD ymm1, ymm2/m256, imm8(5) + /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector128 CompareScalar(Vector128 left, Vector128 right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) { throw new PlatformNotSupportedException(); } + public static Vector256 CompareNotLessThan(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cmp_ss (__m128 a, __m128 b, const int imm8) - /// VCMPSD xmm, xmm, xmm/m64, imm8 + /// __m256d _mm256_cmpnle_pd (__m256d a, __m256d b) + /// VCMPPD ymm1, ymm2/m256, imm8(6) + /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector128 CompareScalar(Vector128 left, Vector128 right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) { throw new PlatformNotSupportedException(); } - + public static Vector256 CompareNotLessThanOrEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// - /// __m256 _mm256_cmpunord_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(3) + /// __m256d _mm256_cmpord_pd (__m256d a, __m256d b) + /// VCMPPD ymm1, ymm2/m256, imm8(7) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareUnordered(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + public static Vector256 CompareOrdered(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_cmpunord_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(3) + /// CMPPD ymm1, ymm2/m256, imm8(3) /// The above native signature does not exist. We provide this additional overload for completeness. /// public static Vector256 CompareUnordered(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + /// + /// __m128 _mm_cmp_ss (__m128 a, __m128 b, const int imm8) + /// VCMPSD xmm1, xmm2, xmm3/m64, imm8 + /// + public static Vector128 CompareScalar(Vector128 left, Vector128 right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) { throw new PlatformNotSupportedException(); } + /// + /// __m128d _mm_cmp_sd (__m128d a, __m128d b, const int imm8) + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8 + /// + public static Vector128 CompareScalar(Vector128 left, Vector128 right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) { throw new PlatformNotSupportedException(); } + /// /// __m128i _mm256_cvtpd_epi32 (__m256d a) - /// VCVTPD2DQ xmm, ymm/m256 + /// VCVTPD2DQ xmm1, ymm2/m256 + /// VCVTPD2DQ xmm1 {k1}{z}, ymm2/m256/m64bcst /// public static Vector128 ConvertToVector128Int32(Vector256 value) { throw new PlatformNotSupportedException(); } + /// /// __m128 _mm256_cvtpd_ps (__m256d a) - /// VCVTPD2PS xmm, ymm/m256 + /// VCVTPD2PS xmm1, ymm2/m256 + /// VCVTPD2PS xmm1 {k1}{z}, ymm2/m256/m64bcst /// public static Vector128 ConvertToVector128Single(Vector256 value) { throw new PlatformNotSupportedException(); } + /// - /// __m256i _mm256_cvtps_epi32 (__m256 a) - /// VCVTPS2DQ ymm, ymm/m256 - /// - public static Vector256 ConvertToVector256Int32(Vector256 value) { throw new PlatformNotSupportedException(); } - /// - /// __m256 _mm256_cvtepi32_ps (__m256i a) - /// VCVTDQ2PS ymm, ymm/m256 + /// __m256d _mm256_cvtepi32_pd (__m128i a) + /// VCVTDQ2PD ymm1, xmm2/m128 + /// VCVTDQ2PD ymm1 {k1}{z}, xmm2/m128/m32bcst /// - public static Vector256 ConvertToVector256Single(Vector256 value) { throw new PlatformNotSupportedException(); } + public static Vector256 ConvertToVector256Double(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_cvtps_pd (__m128 a) - /// VCVTPS2PD ymm, xmm/m128 + /// VCVTPS2PD ymm1, xmm2/m128 + /// VCVTPS2PD ymm1 {k1}{z}, xmm2/m128/m32bcst /// public static Vector256 ConvertToVector256Double(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m256d _mm256_cvtepi32_pd (__m128i a) - /// VCVTDQ2PD ymm, xmm/m128 + /// __m256i _mm256_cvtps_epi32 (__m256 a) + /// VCVTPS2DQ ymm1, ymm2/m256 + /// VCVTPS2DQ ymm1 {k1}{z}, ymm2/m256/m32bcst /// - public static Vector256 ConvertToVector256Double(Vector128 value) { throw new PlatformNotSupportedException(); } - + public static Vector256 ConvertToVector256Int32(Vector256 value) { throw new PlatformNotSupportedException(); } /// - /// __m128i _mm256_cvttpd_epi32 (__m256d a) - /// VCVTTPD2DQ xmm, ymm/m256 + /// __m256 _mm256_cvtepi32_ps (__m256i a) + /// VCVTDQ2PS ymm1, ymm2/m256 + /// VCVTDQ2PS ymm1 {k1}{z}, ymm2/m256/m32bcst /// - public static Vector128 ConvertToVector128Int32WithTruncation(Vector256 value) { throw new PlatformNotSupportedException(); } + public static Vector256 ConvertToVector256Single(Vector256 value) { throw new PlatformNotSupportedException(); } + /// /// __m256i _mm256_cvttps_epi32 (__m256 a) - /// VCVTTPS2DQ ymm, ymm/m256 + /// VCVTTPS2DQ ymm1, ymm2/m256 + /// VCVTTPS2DQ ymm1 {k1}{z}, ymm2/m256/m32bcst /// public static Vector256 ConvertToVector256Int32WithTruncation(Vector256 value) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm256_cvttpd_epi32 (__m256d a) + /// VCVTTPD2DQ xmm1, ymm2/m256 + /// VCVTTPD2DQ xmm1 {k1}{z}, ymm2/m256/m64bcst + /// + public static Vector128 ConvertToVector128Int32WithTruncation(Vector256 value) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_div_ps (__m256 a, __m256 b) - /// VDIVPS ymm, ymm, ymm/m256 + /// VDIVPS ymm1, ymm2, ymm3/m256 + /// VDIVPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Divide(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_div_pd (__m256d a, __m256d b) - /// VDIVPD ymm, ymm, ymm/m256 + /// VDIVPD ymm1, ymm2, ymm3/m256 + /// VDIVPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Divide(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } @@ -379,1011 +391,1056 @@ internal X64() { } /// /// __m256 _mm256_moveldup_ps (__m256 a) - /// VMOVSLDUP ymm, ymm/m256 + /// VMOVSLDUP ymm1, ymm2/m256 + /// VMOVSLDUP ymm1 {k1}{z}, ymm2/m256 /// public static Vector256 DuplicateEvenIndexed(Vector256 value) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_movedup_pd (__m256d a) - /// VMOVDDUP ymm, ymm/m256 + /// VMOVDDUP ymm1, ymm2/m256 + /// VMOVDDUP ymm1 {k1}{z}, ymm2/m256 /// public static Vector256 DuplicateEvenIndexed(Vector256 value) { throw new PlatformNotSupportedException(); } - /// /// __m256 _mm256_movehdup_ps (__m256 a) - /// VMOVSHDUP ymm, ymm/m256 + /// VMOVSHDUP ymm1, ymm2/m256 + /// VMOVSHDUP ymm1 {k1}{z}, ymm2/m256 /// public static Vector256 DuplicateOddIndexed(Vector256 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8) - /// VEXTRACTF128 xmm/m128, ymm, imm8 + /// VEXTRACTF128 xmm1/m128, ymm2, imm8 + /// VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8) - /// VEXTRACTF128 xmm/m128, ymm, imm8 + /// VEXTRACTF128 xmm1/m128, ymm2, imm8 + /// VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8) - /// VEXTRACTF128 xmm/m128, ymm, imm8 + /// VEXTRACTF128 xmm1/m128, ymm2, imm8 + /// VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8) - /// VEXTRACTF128 xmm/m128, ymm, imm8 + /// VEXTRACTF128 xmm1/m128, ymm2, imm8 + /// VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8) - /// VEXTRACTF128 xmm/m128, ymm, imm8 + /// VEXTRACTF128 xmm1/m128, ymm2, imm8 + /// VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8) - /// VEXTRACTF128 xmm/m128, ymm, imm8 + /// VEXTRACTF128 xmm1/m128, ymm2, imm8 + /// VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8) - /// VEXTRACTF128 xmm/m128, ymm, imm8 + /// VEXTRACTF128 xmm1/m128, ymm2, imm8 + /// VEXTRACTF64x2 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8) - /// VEXTRACTF128 xmm/m128, ymm, imm8 + /// VEXTRACTF128 xmm1/m128, ymm2, imm8 + /// VEXTRACTF64x2 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m128 _mm256_extractf128_ps (__m256 a, const int imm8) - /// VEXTRACTF128 xmm/m128, ymm, imm8 + /// VEXTRACTF128 xmm1/m128, ymm2, imm8 + /// VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m128d _mm256_extractf128_pd (__m256d a, const int imm8) - /// VEXTRACTF128 xmm/m128, ymm, imm8 + /// VEXTRACTF128 xmm1/m128, ymm2, imm8 + /// VEXTRACTF64x2 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_floor_ps (__m256 a) - /// VROUNDPS ymm, ymm/m256, imm8(9) + /// VROUNDPS ymm1, ymm2/m256, imm8(9) /// public static Vector256 Floor(Vector256 value) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_floor_pd (__m256d a) - /// VROUNDPS ymm, ymm/m256, imm8(9) + /// VROUNDPD ymm1, ymm2/m256, imm8(9) /// public static Vector256 Floor(Vector256 value) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_hadd_ps (__m256 a, __m256 b) - /// VHADDPS ymm, ymm, ymm/m256 + /// VHADDPS ymm1, ymm2, ymm3/m256 /// public static Vector256 HorizontalAdd(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_hadd_pd (__m256d a, __m256d b) - /// VHADDPD ymm, ymm, ymm/m256 + /// VHADDPD ymm1, ymm2, ymm3/m256 /// public static Vector256 HorizontalAdd(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_hsub_ps (__m256 a, __m256 b) - /// VHSUBPS ymm, ymm, ymm/m256 + /// VHSUBPS ymm1, ymm2, ymm3/m256 /// public static Vector256 HorizontalSubtract(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_hsub_pd (__m256d a, __m256d b) - /// VHSUBPD ymm, ymm, ymm/m256 + /// VHSUBPD ymm1, ymm2, ymm3/m256 /// public static Vector256 HorizontalSubtract(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8) - /// VINSERTF128 ymm, ymm, xmm/m128, imm8 + /// VINSERTF128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8) - /// VINSERTF128 ymm, ymm, xmm/m128, imm8 + /// VINSERTF128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8) - /// VINSERTF128 ymm, ymm, xmm/m128, imm8 + /// VINSERTF128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8) - /// VINSERTF128 ymm, ymm, xmm/m128, imm8 + /// VINSERTF128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8) - /// VINSERTF128 ymm, ymm, xmm/m128, imm8 + /// VINSERTF128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8) - /// VINSERTF128 ymm, ymm, xmm/m128, imm8 + /// VINSERTF128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8) - /// VINSERTF128 ymm, ymm, xmm/m128, imm8 + /// VINSERTF128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTF64x2 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8) - /// VINSERTF128 ymm, ymm, xmm/m128, imm8 + /// VINSERTF128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTF64x2 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m256 _mm256_insertf128_ps (__m256 a, __m128 b, int imm8) - /// VINSERTF128 ymm, ymm, xmm/m128, imm8 + /// VINSERTF128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m256d _mm256_insertf128_pd (__m256d a, __m128d b, int imm8) - /// VINSERTF128 ymm, ymm, xmm/m128, imm8 + /// VINSERTF128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTF64x2 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// - /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) - /// VMOVDQU ymm, m256 - /// - public static unsafe Vector256 LoadVector256(sbyte* address) { throw new PlatformNotSupportedException(); } - /// - /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) - /// VMOVDQU ymm, m256 - /// - public static unsafe Vector256 LoadVector256(byte* address) { throw new PlatformNotSupportedException(); } - /// - /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) - /// VMOVDQU ymm, m256 - /// - public static unsafe Vector256 LoadVector256(short* address) { throw new PlatformNotSupportedException(); } - /// - /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) - /// VMOVDQU ymm, m256 - /// - public static unsafe Vector256 LoadVector256(ushort* address) { throw new PlatformNotSupportedException(); } - /// - /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) - /// VMOVDQU ymm, m256 - /// - public static unsafe Vector256 LoadVector256(int* address) { throw new PlatformNotSupportedException(); } - /// - /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) - /// VMOVDQU ymm, m256 - /// - public static unsafe Vector256 LoadVector256(uint* address) { throw new PlatformNotSupportedException(); } - /// - /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) - /// VMOVDQU ymm, m256 - /// - public static unsafe Vector256 LoadVector256(long* address) { throw new PlatformNotSupportedException(); } - /// - /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) - /// VMOVDQU ymm, m256 - /// - public static unsafe Vector256 LoadVector256(ulong* address) { throw new PlatformNotSupportedException(); } - /// - /// __m256 _mm256_loadu_ps (float const * mem_addr) - /// VMOVUPS ymm, ymm/m256 - /// - public static unsafe Vector256 LoadVector256(float* address) { throw new PlatformNotSupportedException(); } - /// - /// __m256d _mm256_loadu_pd (double const * mem_addr) - /// VMOVUPD ymm, ymm/m256 - /// - public static unsafe Vector256 LoadVector256(double* address) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_load_si256 (__m256i const * mem_addr) - /// VMOVDQA ymm, m256 + /// VMOVDQA ymm1, m256 + /// VMOVDQA32 ymm1 {k1}{z}, m256 /// public static unsafe Vector256 LoadAlignedVector256(sbyte* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_load_si256 (__m256i const * mem_addr) - /// VMOVDQA ymm, m256 + /// VMOVDQA ymm1, m256 + /// VMOVDQA32 ymm1 {k1}{z}, m256 /// public static unsafe Vector256 LoadAlignedVector256(byte* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_load_si256 (__m256i const * mem_addr) - /// VMOVDQA ymm, m256 + /// VMOVDQA ymm1, m256 + /// VMOVDQA32 ymm1 {k1}{z}, m256 /// public static unsafe Vector256 LoadAlignedVector256(short* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_load_si256 (__m256i const * mem_addr) - /// VMOVDQA ymm, m256 + /// VMOVDQA ymm1, m256 + /// VMOVDQA32 ymm1 {k1}{z}, m256 /// public static unsafe Vector256 LoadAlignedVector256(ushort* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_load_si256 (__m256i const * mem_addr) - /// VMOVDQA ymm, m256 + /// VMOVDQA ymm1, m256 + /// VMOVDQA32 ymm1 {k1}{z}, m256 /// public static unsafe Vector256 LoadAlignedVector256(int* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_load_si256 (__m256i const * mem_addr) - /// VMOVDQA ymm, m256 + /// VMOVDQA ymm1, m256 + /// VMOVDQA32 ymm1 {k1}{z}, m256 /// public static unsafe Vector256 LoadAlignedVector256(uint* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_load_si256 (__m256i const * mem_addr) - /// VMOVDQA ymm, m256 + /// VMOVDQA ymm1, m256 + /// VMOVDQA64 ymm1 {k1}{z}, m256 /// public static unsafe Vector256 LoadAlignedVector256(long* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_load_si256 (__m256i const * mem_addr) - /// VMOVDQA ymm, m256 + /// VMOVDQA ymm1, m256 + /// VMOVDQA64 ymm1 {k1}{z}, m256 /// public static unsafe Vector256 LoadAlignedVector256(ulong* address) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_load_ps (float const * mem_addr) - /// VMOVAPS ymm, ymm/m256 + /// VMOVAPS ymm1, m256 + /// VMOVAPS ymm1 {k1}{z}, m256 /// public static unsafe Vector256 LoadAlignedVector256(float* address) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_load_pd (double const * mem_addr) - /// VMOVAPD ymm, ymm/m256 + /// VMOVAPD ymm1, m256 + /// VMOVAPD ymm1 {k1}{z}, m256 /// public static unsafe Vector256 LoadAlignedVector256(double* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr) - /// VLDDQU ymm, m256 + /// VLDDQU ymm1, m256 /// public static unsafe Vector256 LoadDquVector256(sbyte* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr) - /// VLDDQU ymm, m256 + /// VLDDQU ymm1, m256 /// public static unsafe Vector256 LoadDquVector256(byte* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr) - /// VLDDQU ymm, m256 + /// VLDDQU ymm1, m256 /// public static unsafe Vector256 LoadDquVector256(short* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr) - /// VLDDQU ymm, m256 + /// VLDDQU ymm1, m256 /// public static unsafe Vector256 LoadDquVector256(ushort* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr) - /// VLDDQU ymm, m256 + /// VLDDQU ymm1, m256 /// public static unsafe Vector256 LoadDquVector256(int* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr) - /// VLDDQU ymm, m256 + /// VLDDQU ymm1, m256 /// public static unsafe Vector256 LoadDquVector256(uint* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr) - /// VLDDQU ymm, m256 + /// VLDDQU ymm1, m256 /// public static unsafe Vector256 LoadDquVector256(long* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr) - /// VLDDQU ymm, m256 + /// VLDDQU ymm1, m256 /// public static unsafe Vector256 LoadDquVector256(ulong* address) { throw new PlatformNotSupportedException(); } + /// + /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) + /// VMOVDQU ymm1, m256 + /// VMOVDQU8 ymm1 {k1}{z}, m256 + /// + public static unsafe Vector256 LoadVector256(sbyte* address) { throw new PlatformNotSupportedException(); } + /// + /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) + /// VMOVDQU ymm1, m256 + /// VMOVDQU8 ymm1 {k1}{z}, m256 + /// + public static unsafe Vector256 LoadVector256(byte* address) { throw new PlatformNotSupportedException(); } + /// + /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) + /// VMOVDQU ymm1, m256 + /// VMOVDQU16 ymm1 {k1}{z}, m256 + /// + public static unsafe Vector256 LoadVector256(short* address) { throw new PlatformNotSupportedException(); } + /// + /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) + /// VMOVDQU ymm1, m256 + /// VMOVDQU16 ymm1 {k1}{z}, m256 + /// + public static unsafe Vector256 LoadVector256(ushort* address) { throw new PlatformNotSupportedException(); } + /// + /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) + /// VMOVDQU ymm1, m256 + /// VMOVDQU32 ymm1 {k1}{z}, m256 + /// + public static unsafe Vector256 LoadVector256(int* address) { throw new PlatformNotSupportedException(); } + /// + /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) + /// VMOVDQU ymm1, m256 + /// VMOVDQU32 ymm1 {k1}{z}, m256 + /// + public static unsafe Vector256 LoadVector256(uint* address) { throw new PlatformNotSupportedException(); } + /// + /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) + /// VMOVDQU ymm1, m256 + /// VMOVDQU64 ymm1 {k1}{z}, m256 + /// + public static unsafe Vector256 LoadVector256(long* address) { throw new PlatformNotSupportedException(); } + /// + /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) + /// VMOVDQU ymm1, m256 + /// VMOVDQU64 ymm1 {k1}{z}, m256 + /// + public static unsafe Vector256 LoadVector256(ulong* address) { throw new PlatformNotSupportedException(); } + /// + /// __m256 _mm256_loadu_ps (float const * mem_addr) + /// VMOVUPS ymm1, m256 + /// VMOVUPS ymm1 {k1}{z}, m256 + /// + public static unsafe Vector256 LoadVector256(float* address) { throw new PlatformNotSupportedException(); } + /// + /// __m256d _mm256_loadu_pd (double const * mem_addr) + /// VMOVUPD ymm1, m256 + /// VMOVUPD ymm1 {k1}{z}, m256 + /// + public static unsafe Vector256 LoadVector256(double* address) { throw new PlatformNotSupportedException(); } + /// /// __m128 _mm_maskload_ps (float const * mem_addr, __m128i mask) - /// VMASKMOVPS xmm, xmm, m128 + /// VMASKMOVPS xmm1, xmm2, m128 /// public static unsafe Vector128 MaskLoad(float* address, Vector128 mask) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_maskload_pd (double const * mem_addr, __m128i mask) - /// VMASKMOVPD xmm, xmm, m128 + /// VMASKMOVPD xmm1, xmm2, m128 /// public static unsafe Vector128 MaskLoad(double* address, Vector128 mask) { throw new PlatformNotSupportedException(); } - /// /// __m256 _mm256_maskload_ps (float const * mem_addr, __m256i mask) - /// VMASKMOVPS ymm, ymm, m256 + /// VMASKMOVPS ymm1, ymm2, m256 /// public static unsafe Vector256 MaskLoad(float* address, Vector256 mask) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_maskload_pd (double const * mem_addr, __m256i mask) - /// VMASKMOVPD ymm, ymm, m256 + /// VMASKMOVPD ymm1, ymm2, m256 /// public static unsafe Vector256 MaskLoad(double* address, Vector256 mask) { throw new PlatformNotSupportedException(); } /// /// void _mm_maskstore_ps (float * mem_addr, __m128i mask, __m128 a) - /// VMASKMOVPS m128, xmm, xmm + /// VMASKMOVPS m128, xmm1, xmm2 /// public static unsafe void MaskStore(float* address, Vector128 mask, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_maskstore_pd (double * mem_addr, __m128i mask, __m128d a) - /// VMASKMOVPD m128, xmm, xmm + /// VMASKMOVPD m128, xmm1, xmm2 /// public static unsafe void MaskStore(double* address, Vector128 mask, Vector128 source) { throw new PlatformNotSupportedException(); } - /// /// void _mm256_maskstore_ps (float * mem_addr, __m256i mask, __m256 a) - /// VMASKMOVPS m256, ymm, ymm + /// VMASKMOVPS m256, ymm1, ymm2 /// public static unsafe void MaskStore(float* address, Vector256 mask, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_maskstore_pd (double * mem_addr, __m256i mask, __m256d a) - /// VMASKMOVPD m256, ymm, ymm + /// VMASKMOVPD m256, ymm1, ymm2 /// public static unsafe void MaskStore(double* address, Vector256 mask, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_max_ps (__m256 a, __m256 b) - /// VMAXPS ymm, ymm, ymm/m256 + /// VMAXPS ymm1, ymm2, ymm3/m256 + /// VMAXPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Max(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_max_pd (__m256d a, __m256d b) - /// VMAXPD ymm, ymm, ymm/m256 + /// VMAXPD ymm1, ymm2, ymm3/m256 + /// VMAXPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Max(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_min_ps (__m256 a, __m256 b) - /// VMINPS ymm, ymm, ymm/m256 + /// VMINPS ymm1, ymm2, ymm3/m256 + /// VMINPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Min(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_min_pd (__m256d a, __m256d b) - /// VMINPD ymm, ymm, ymm/m256 + /// VMINPD ymm1, ymm2, ymm3/m256 + /// VMINPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Min(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// int _mm256_movemask_ps (__m256 a) - /// VMOVMSKPS reg, ymm + /// VMOVMSKPS r32, ymm1 /// public static int MoveMask(Vector256 value) { throw new PlatformNotSupportedException(); } /// /// int _mm256_movemask_pd (__m256d a) - /// VMOVMSKPD reg, ymm + /// VMOVMSKPD r32, ymm1 /// public static int MoveMask(Vector256 value) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_mul_ps (__m256 a, __m256 b) - /// VMULPS ymm, ymm, ymm/m256 + /// VMULPS ymm1, ymm2, ymm3/m256 + /// VMULPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Multiply(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_mul_pd (__m256d a, __m256d b) - /// VMULPD ymm, ymm, ymm/m256 + /// VMULPD ymm1, ymm2, ymm3/m256 + /// VMULPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Multiply(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_or_ps (__m256 a, __m256 b) - /// VORPS ymm, ymm, ymm/m256 + /// VORPS ymm1, ymm2, ymm3/m256 + /// VORPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Or(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_or_pd (__m256d a, __m256d b) - /// VORPD ymm, ymm, ymm/m256 + /// VORPD ymm1, ymm2, ymm3/m256 + /// VORPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Or(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_permute_ps (__m128 a, int imm8) - /// VPERMILPS xmm, xmm, imm8 + /// VPERMILPS xmm1, xmm2/m128, imm8 + /// VPERMILPS xmm1 {k1}{z}, xmm2/m128/m32bcst, imm8 /// public static Vector128 Permute(Vector128 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_permute_pd (__m128d a, int imm8) - /// VPERMILPD xmm, xmm, imm8 + /// VPERMILPD xmm1, xmm2/m128, imm8 + /// VPERMILPD xmm1 {k1}{z}, xmm2/m128/m64bcst, imm8 /// public static Vector128 Permute(Vector128 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_permute_ps (__m256 a, int imm8) - /// VPERMILPS ymm, ymm, imm8 + /// VPERMILPS ymm1, ymm2/m256, imm8 + /// VPERMILPS ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8 /// public static Vector256 Permute(Vector256 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_permute_pd (__m256d a, int imm8) - /// VPERMILPD ymm, ymm, imm8 + /// VPERMILPD ymm1, ymm2/m256, imm8 + /// VPERMILPD ymm1 {k1}{z}, ymm2/m256/m64bcst, imm8 /// public static Vector256 Permute(Vector256 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8) - /// VPERM2F128 ymm, ymm, ymm/m256, imm8 + /// VPERM2F128 ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8) - /// VPERM2F128 ymm, ymm, ymm/m256, imm8 + /// VPERM2F128 ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8) - /// VPERM2F128 ymm, ymm, ymm/m256, imm8 + /// VPERM2F128 ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8) - /// VPERM2F128 ymm, ymm, ymm/m256, imm8 + /// VPERM2F128 ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8) - /// VPERM2F128 ymm, ymm, ymm/m256, imm8 + /// VPERM2F128 ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8) - /// VPERM2F128 ymm, ymm, ymm/m256, imm8 + /// VPERM2F128 ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8) - /// VPERM2F128 ymm, ymm, ymm/m256, imm8 + /// VPERM2F128 ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8) - /// VPERM2F128 ymm, ymm, ymm/m256, imm8 + /// VPERM2F128 ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_permute2f128_ps (__m256 a, __m256 b, int imm8) - /// VPERM2F128 ymm, ymm, ymm/m256, imm8 + /// VPERM2F128 ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - /// /// __m256d _mm256_permute2f128_pd (__m256d a, __m256d b, int imm8) - /// VPERM2F128 ymm, ymm, ymm/m256, imm8 + /// VPERM2F128 ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_permutevar_ps (__m128 a, __m128i b) - /// VPERMILPS xmm, xmm, xmm/m128 + /// VPERMILPS xmm1, xmm2, xmm3/m128 + /// VPERMILPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 PermuteVar(Vector128 left, Vector128 control) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_permutevar_pd (__m128d a, __m128i b) - /// VPERMILPD xmm, xmm, xmm/m128 + /// VPERMILPD xmm1, xmm2, xmm3/m128 + /// VPERMILPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 PermuteVar(Vector128 left, Vector128 control) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_permutevar_ps (__m256 a, __m256i b) - /// VPERMILPS ymm, ymm, ymm/m256 + /// VPERMILPS ymm1, ymm2, ymm3/m256 + /// VPERMILPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 PermuteVar(Vector256 left, Vector256 control) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_permutevar_pd (__m256d a, __m256i b) - /// VPERMILPD ymm, ymm, ymm/m256 + /// VPERMILPD ymm1, ymm2, ymm3/m256 + /// VPERMILPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 PermuteVar(Vector256 left, Vector256 control) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_rcp_ps (__m256 a) - /// VRCPPS ymm, ymm/m256 + /// VRCPPS ymm1, ymm2/m256 /// public static Vector256 Reciprocal(Vector256 value) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_rsqrt_ps (__m256 a) - /// VRSQRTPS ymm, ymm/m256 + /// VRSQRTPS ymm1, ymm2/m256 /// public static Vector256 ReciprocalSqrt(Vector256 value) { throw new PlatformNotSupportedException(); } /// - /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) - /// VROUNDPS ymm, ymm/m256, imm8(8) - /// - public static Vector256 RoundToNearestInteger(Vector256 value) { throw new PlatformNotSupportedException(); } - /// - /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) - /// VROUNDPS ymm, ymm/m256, imm8(9) + /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_CUR_DIRECTION) + /// VROUNDPS ymm1, ymm2/m256, imm8(4) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector256 RoundToNegativeInfinity(Vector256 value) { throw new PlatformNotSupportedException(); } + public static Vector256 RoundCurrentDirection(Vector256 value) { throw new PlatformNotSupportedException(); } /// - /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) - /// VROUNDPS ymm, ymm/m256, imm8(10) + /// __m256d _mm256_round_ps (__m256d a, _MM_FROUND_CUR_DIRECTION) + /// VROUNDPD ymm1, ymm2/m256, imm8(4) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector256 RoundToPositiveInfinity(Vector256 value) { throw new PlatformNotSupportedException(); } + public static Vector256 RoundCurrentDirection(Vector256 value) { throw new PlatformNotSupportedException(); } + /// - /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC) - /// VROUNDPS ymm, ymm/m256, imm8(11) + /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_NEAREST_INT) + /// VROUNDPS ymm1, ymm2/m256, imm8(8) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector256 RoundToZero(Vector256 value) { throw new PlatformNotSupportedException(); } + public static Vector256 RoundToNearestInteger(Vector256 value) { throw new PlatformNotSupportedException(); } /// - /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_CUR_DIRECTION) - /// VROUNDPS ymm, ymm/m256, imm8(4) + /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_NEAREST_INT) + /// VROUNDPD ymm1, ymm2/m256, imm8(8) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector256 RoundCurrentDirection(Vector256 value) { throw new PlatformNotSupportedException(); } + public static Vector256 RoundToNearestInteger(Vector256 value) { throw new PlatformNotSupportedException(); } /// - /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) - /// VROUNDPD ymm, ymm/m256, imm8(8) + /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) + /// VROUNDPS ymm1, ymm2/m256, imm8(9) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector256 RoundToNearestInteger(Vector256 value) { throw new PlatformNotSupportedException(); } + public static Vector256 RoundToNegativeInfinity(Vector256 value) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) - /// VROUNDPD ymm, ymm/m256, imm8(9) + /// VROUNDPD ymm1, ymm2/m256, imm8(9) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// public static Vector256 RoundToNegativeInfinity(Vector256 value) { throw new PlatformNotSupportedException(); } + + /// + /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) + /// VROUNDPS ymm1, ymm2/m256, imm8(10) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. + /// + public static Vector256 RoundToPositiveInfinity(Vector256 value) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) - /// VROUNDPD ymm, ymm/m256, imm8(10) + /// VROUNDPD ymm1, ymm2/m256, imm8(10) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// public static Vector256 RoundToPositiveInfinity(Vector256 value) { throw new PlatformNotSupportedException(); } + /// - /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC) - /// VROUNDPD ymm, ymm/m256, imm8(11) + /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC) + /// VROUNDPS ymm1, ymm2/m256, imm8(11) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector256 RoundToZero(Vector256 value) { throw new PlatformNotSupportedException(); } + public static Vector256 RoundToZero(Vector256 value) { throw new PlatformNotSupportedException(); } /// - /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_CUR_DIRECTION) - /// VROUNDPD ymm, ymm/m256, imm8(4) + /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC) + /// VROUNDPD ymm1, ymm2/m256, imm8(11) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector256 RoundCurrentDirection(Vector256 value) { throw new PlatformNotSupportedException(); } + public static Vector256 RoundToZero(Vector256 value) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_shuffle_ps (__m256 a, __m256 b, const int imm8) - /// VSHUFPS ymm, ymm, ymm/m256, imm8 + /// VSHUFPS ymm1, ymm2, ymm3/m256, imm8 + /// VSHUFPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst, imm8 /// public static Vector256 Shuffle(Vector256 value, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_shuffle_pd (__m256d a, __m256d b, const int imm8) - /// VSHUFPD ymm, ymm, ymm/m256, imm8 + /// VSHUFPD ymm1, ymm2, ymm3/m256, imm8 + /// VSHUFPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst, imm8 /// public static Vector256 Shuffle(Vector256 value, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_sqrt_ps (__m256 a) - /// VSQRTPS ymm, ymm/m256 + /// VSQRTPS ymm1, ymm2/m256 + /// VSQRTPS ymm1 {k1}{z}, ymm2/m256/m32bcst /// public static Vector256 Sqrt(Vector256 value) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_sqrt_pd (__m256d a) - /// VSQRTPD ymm, ymm/m256 + /// VSQRTPD ymm1, ymm2/m256 + /// VSQRTPD ymm1 {k1}{z}, ymm2/m256/m64bcst /// public static Vector256 Sqrt(Vector256 value) { throw new PlatformNotSupportedException(); } + /// + /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) + /// VMOVDQU m256, ymm1 + /// VMOVDQU8 m256 {k1}{z}, ymm1 + /// + public static unsafe void Store(sbyte* address, Vector256 source) { throw new PlatformNotSupportedException(); } + /// + /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) + /// VMOVDQU m256, ymm1 + /// VMOVDQU8 m256 {k1}{z}, ymm1 + /// + public static unsafe void Store(byte* address, Vector256 source) { throw new PlatformNotSupportedException(); } + /// + /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) + /// VMOVDQU m256, ymm1 + /// VMOVDQU16 m256 {k1}{z}, ymm1 + /// + public static unsafe void Store(short* address, Vector256 source) { throw new PlatformNotSupportedException(); } + /// + /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) + /// VMOVDQU m256, ymm1 + /// VMOVDQU16 m256 {k1}{z}, ymm1 + /// + public static unsafe void Store(ushort* address, Vector256 source) { throw new PlatformNotSupportedException(); } + /// + /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) + /// VMOVDQU m256, ymm1 + /// VMOVDQU32 m256 {k1}{z}, ymm1 + /// + public static unsafe void Store(int* address, Vector256 source) { throw new PlatformNotSupportedException(); } + /// + /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) + /// VMOVDQU m256, ymm1 + /// VMOVDQU32 m256 {k1}{z}, ymm1 + /// + public static unsafe void Store(uint* address, Vector256 source) { throw new PlatformNotSupportedException(); } + /// + /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) + /// VMOVDQU m256, ymm1 + /// VMOVDQU64 m256 {k1}{z}, ymm1 + /// + public static unsafe void Store(long* address, Vector256 source) { throw new PlatformNotSupportedException(); } + /// + /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) + /// VMOVDQU m256, ymm1 + /// VMOVDQU64 m256 {k1}{z}, ymm1 + /// + public static unsafe void Store(ulong* address, Vector256 source) { throw new PlatformNotSupportedException(); } + /// + /// void _mm256_storeu_ps (float * mem_addr, __m256 a) + /// VMOVUPS m256, ymm1 + /// VMOVUPS m256 {k1}{z}, ymm1 + /// + public static unsafe void Store(float* address, Vector256 source) { throw new PlatformNotSupportedException(); } + /// + /// void _mm256_storeu_pd (double * mem_addr, __m256d a) + /// VMOVUPD m256, ymm1 + /// VMOVUPD m256 {k1}{z}, ymm1 + /// + public static unsafe void Store(double* address, Vector256 source) { throw new PlatformNotSupportedException(); } + /// /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQA m256, ymm + /// VMOVDQA m256, ymm1 + /// VMOVDQA32 m256 {k1}{z}, ymm1 /// public static unsafe void StoreAligned(sbyte* address, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQA m256, ymm + /// VMOVDQA m256, ymm1 + /// VMOVDQA32 m256 {k1}{z}, ymm1 /// public static unsafe void StoreAligned(byte* address, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQA m256, ymm + /// VMOVDQA m256, ymm1 + /// VMOVDQA32 m256 {k1}{z}, ymm1 /// public static unsafe void StoreAligned(short* address, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQA m256, ymm + /// VMOVDQA m256, ymm1 + /// VMOVDQA32 m256 {k1}{z}, ymm1 /// public static unsafe void StoreAligned(ushort* address, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQA m256, ymm + /// VMOVDQA m256, ymm1 + /// VMOVDQA32 m256 {k1}{z}, ymm1 /// public static unsafe void StoreAligned(int* address, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQA m256, ymm + /// VMOVDQA m256, ymm1 + /// VMOVDQA32 m256 {k1}{z}, ymm1 /// public static unsafe void StoreAligned(uint* address, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQA m256, ymm + /// VMOVDQA m256, ymm1 + /// VMOVDQA64 m256 {k1}{z}, ymm1 /// public static unsafe void StoreAligned(long* address, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQA m256, ymm + /// VMOVDQA m256, ymm1 + /// VMOVDQA64 m256 {k1}{z}, ymm1 /// public static unsafe void StoreAligned(ulong* address, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_store_ps (float * mem_addr, __m256 a) - /// VMOVAPS m256, ymm + /// VMOVAPS m256, ymm1 + /// VMOVAPS m256 {k1}{z}, ymm1 /// public static unsafe void StoreAligned(float* address, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_store_pd (double * mem_addr, __m256d a) - /// VMOVAPD m256, ymm + /// VMOVAPD m256, ymm1 + /// VMOVAPD m256 {k1}{z}, ymm1 /// public static unsafe void StoreAligned(double* address, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a) - /// VMOVNTDQ m256, ymm + /// VMOVNTDQ m256, ymm1 /// public static unsafe void StoreAlignedNonTemporal(sbyte* address, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a) - /// VMOVNTDQ m256, ymm + /// VMOVNTDQ m256, ymm1 /// public static unsafe void StoreAlignedNonTemporal(byte* address, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a) - /// VMOVNTDQ m256, ymm + /// VMOVNTDQ m256, ymm1 /// public static unsafe void StoreAlignedNonTemporal(short* address, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a) - /// VMOVNTDQ m256, ymm + /// VMOVNTDQ m256, ymm1 /// public static unsafe void StoreAlignedNonTemporal(ushort* address, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a) - /// VMOVNTDQ m256, ymm + /// VMOVNTDQ m256, ymm1 /// public static unsafe void StoreAlignedNonTemporal(int* address, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a) - /// VMOVNTDQ m256, ymm + /// VMOVNTDQ m256, ymm1 /// public static unsafe void StoreAlignedNonTemporal(uint* address, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a) - /// VMOVNTDQ m256, ymm + /// VMOVNTDQ m256, ymm1 /// public static unsafe void StoreAlignedNonTemporal(long* address, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a) - /// VMOVNTDQ m256, ymm + /// VMOVNTDQ m256, ymm1 /// public static unsafe void StoreAlignedNonTemporal(ulong* address, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_stream_ps (float * mem_addr, __m256 a) - /// MOVNTPS m256, ymm + /// VMOVNTPS m256, ymm1 /// public static unsafe void StoreAlignedNonTemporal(float* address, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_stream_pd (double * mem_addr, __m256d a) - /// MOVNTPD m256, ymm + /// VMOVNTPD m256, ymm1 /// public static unsafe void StoreAlignedNonTemporal(double* address, Vector256 source) { throw new PlatformNotSupportedException(); } - /// - /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQU m256, ymm - /// - public static unsafe void Store(sbyte* address, Vector256 source) { throw new PlatformNotSupportedException(); } - /// - /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQU m256, ymm - /// - public static unsafe void Store(byte* address, Vector256 source) { throw new PlatformNotSupportedException(); } - /// - /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQU m256, ymm - /// - public static unsafe void Store(short* address, Vector256 source) { throw new PlatformNotSupportedException(); } - /// - /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQU m256, ymm - /// - public static unsafe void Store(ushort* address, Vector256 source) { throw new PlatformNotSupportedException(); } - /// - /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQU m256, ymm - /// - public static unsafe void Store(int* address, Vector256 source) { throw new PlatformNotSupportedException(); } - /// - /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQU m256, ymm - /// - public static unsafe void Store(uint* address, Vector256 source) { throw new PlatformNotSupportedException(); } - /// - /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQU m256, ymm - /// - public static unsafe void Store(long* address, Vector256 source) { throw new PlatformNotSupportedException(); } - /// - /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQU m256, ymm - /// - public static unsafe void Store(ulong* address, Vector256 source) { throw new PlatformNotSupportedException(); } - /// - /// void _mm256_storeu_ps (float * mem_addr, __m256 a) - /// MOVUPS m256, ymm - /// - public static unsafe void Store(float* address, Vector256 source) { throw new PlatformNotSupportedException(); } - /// - /// void _mm256_storeu_pd (double * mem_addr, __m256d a) - /// MOVUPD m256, ymm - /// - public static unsafe void Store(double* address, Vector256 source) { throw new PlatformNotSupportedException(); } - /// /// __m256 _mm256_sub_ps (__m256 a, __m256 b) - /// VSUBPS ymm, ymm, ymm/m256 + /// VSUBPS ymm1, ymm2, ymm3/m256 + /// VSUBPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Subtract(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_sub_pd (__m256d a, __m256d b) - /// VSUBPD ymm, ymm, ymm/m256 + /// VSUBPD ymm1, ymm2, ymm3/m256 + /// VSUBPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Subtract(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testc_ps (__m128 a, __m128 b) - /// VTESTPS xmm, xmm/m128 + /// VTESTPS xmm1, xmm2/m128 ; CF=1 /// public static bool TestC(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testc_pd (__m128d a, __m128d b) - /// VTESTPD xmm, xmm/m128 + /// VTESTPD xmm1, xmm2/m128 ; CF=1 /// public static bool TestC(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; CF=1 /// public static bool TestC(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; CF=1 /// public static bool TestC(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; CF=1 /// public static bool TestC(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; CF=1 /// public static bool TestC(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; CF=1 /// public static bool TestC(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; CF=1 /// public static bool TestC(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; CF=1 /// public static bool TestC(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; CF=1 /// public static bool TestC(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testc_ps (__m256 a, __m256 b) - /// VTESTPS ymm, ymm/m256 + /// VTESTPS ymm1, ymm2/m256 ; CF=1 /// public static bool TestC(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testc_pd (__m256d a, __m256d b) - /// VTESTPS ymm, ymm/m256 + /// VTESTPD ymm1, ymm2/m256 ; CF=1 /// public static bool TestC(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testnzc_ps (__m128 a, __m128 b) - /// VTESTPS xmm, xmm/m128 + /// VTESTPS xmm1, ymm2/m128 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testnzc_pd (__m128d a, __m128d b) - /// VTESTPD xmm, xmm/m128 + /// VTESTPD xmm1, xmm2/m128 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testnzc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testnzc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testnzc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testnzc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testnzc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testnzc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testnzc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testnzc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testnzc_ps (__m256 a, __m256 b) - /// VTESTPS ymm, ymm/m256 + /// VTESTPS ymm1, ymm2/m256 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testnzc_pd (__m256d a, __m256d b) - /// VTESTPD ymm, ymm/m256 + /// VTESTPD ymm1, ymm2/m256 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testz_ps (__m128 a, __m128 b) - /// VTESTPS xmm, xmm/m128 + /// VTESTPS xmm1, xmm2/m128 ; ZF=1 /// public static bool TestZ(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testz_pd (__m128d a, __m128d b) - /// VTESTPD xmm, xmm/m128 + /// VTESTPD xmm1, xmm2/m128 ; ZF=1 /// public static bool TestZ(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testz_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=1 /// public static bool TestZ(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testz_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=1 /// public static bool TestZ(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testz_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=1 /// public static bool TestZ(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testz_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=1 /// public static bool TestZ(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testz_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=1 /// public static bool TestZ(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testz_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=1 /// public static bool TestZ(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testz_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=1 /// public static bool TestZ(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testz_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=1 /// public static bool TestZ(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testz_ps (__m256 a, __m256 b) - /// VTESTPS ymm, ymm/m256 + /// VTESTPS ymm1, ymm2/m256 ; ZF=1 /// public static bool TestZ(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm256_testz_pd (__m256d a, __m256d b) - /// VTESTPD ymm, ymm/m256 + /// VTESTPD ymm1, ymm2/m256 ; ZF=1 /// public static bool TestZ(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_unpackhi_ps (__m256 a, __m256 b) - /// VUNPCKHPS ymm, ymm, ymm/m256 + /// VUNPCKHPS ymm1, ymm2, ymm3/m256 + /// VUNPCKHPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 UnpackHigh(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_unpackhi_pd (__m256d a, __m256d b) - /// VUNPCKHPD ymm, ymm, ymm/m256 + /// VUNPCKHPD ymm1, ymm2, ymm3/m256 + /// VUNPCKHPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 UnpackHigh(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_unpacklo_ps (__m256 a, __m256 b) - /// VUNPCKLPS ymm, ymm, ymm/m256 + /// VUNPCKLPS ymm1, ymm2, ymm3/m256 + /// VUNPCKLPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 UnpackLow(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_unpacklo_pd (__m256d a, __m256d b) - /// VUNPCKLPD ymm, ymm, ymm/m256 + /// VUNPCKLPD ymm1, ymm2, ymm3/m256 + /// VUNPCKLPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 UnpackLow(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_xor_ps (__m256 a, __m256 b) - /// VXORPS ymm, ymm, ymm/m256 + /// VXORPS ymm1, ymm2, ymm3/m256 + /// VXORPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Xor(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_xor_pd (__m256d a, __m256d b) - /// VXORPS ymm, ymm, ymm/m256 + /// VXORPD ymm1, ymm2, ymm3/m256 + /// VXORPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Xor(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.cs index 0d13a2bf8607ef..7a65fb5f7b1e1e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.cs @@ -28,1363 +28,1419 @@ internal X64() { } /// /// __m256 _mm256_add_ps (__m256 a, __m256 b) - /// VADDPS ymm, ymm, ymm/m256 + /// VADDPS ymm1, ymm2, ymm3/m256 + /// VADDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Add(Vector256 left, Vector256 right) => Add(left, right); /// /// __m256d _mm256_add_pd (__m256d a, __m256d b) - /// VADDPD ymm, ymm, ymm/m256 + /// VADDPD ymm1, ymm2, ymm3/m256 + /// VADDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Add(Vector256 left, Vector256 right) => Add(left, right); /// /// __m256 _mm256_addsub_ps (__m256 a, __m256 b) - /// VADDSUBPS ymm, ymm, ymm/m256 + /// VADDSUBPS ymm1, ymm2, ymm3/m256 /// public static Vector256 AddSubtract(Vector256 left, Vector256 right) => AddSubtract(left, right); /// /// __m256d _mm256_addsub_pd (__m256d a, __m256d b) - /// VADDSUBPD ymm, ymm, ymm/m256 + /// VADDSUBPD ymm1, ymm2, ymm3/m256 /// public static Vector256 AddSubtract(Vector256 left, Vector256 right) => AddSubtract(left, right); /// /// __m256 _mm256_and_ps (__m256 a, __m256 b) - /// VANDPS ymm, ymm, ymm/m256 + /// VANDPS ymm1, ymm2, ymm2/m256 + /// VANDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 And(Vector256 left, Vector256 right) => And(left, right); /// /// __m256d _mm256_and_pd (__m256d a, __m256d b) - /// VANDPD ymm, ymm, ymm/m256 + /// VANDPD ymm1, ymm2, ymm2/m256 + /// VANDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 And(Vector256 left, Vector256 right) => And(left, right); /// /// __m256 _mm256_andnot_ps (__m256 a, __m256 b) - /// VANDNPS ymm, ymm, ymm/m256 + /// VANDNPS ymm1, ymm2, ymm2/m256 + /// VANDNPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 AndNot(Vector256 left, Vector256 right) => AndNot(left, right); /// /// __m256d _mm256_andnot_pd (__m256d a, __m256d b) - /// VANDNPD ymm, ymm, ymm/m256 + /// VANDNPD ymm1, ymm2, ymm2/m256 + /// VANDNPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 AndNot(Vector256 left, Vector256 right) => AndNot(left, right); /// /// __m256 _mm256_blend_ps (__m256 a, __m256 b, const int imm8) - /// VBLENDPS ymm, ymm, ymm/m256, imm8 + /// VBLENDPS ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Blend(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Blend(left, right, control); /// /// __m256d _mm256_blend_pd (__m256d a, __m256d b, const int imm8) - /// VBLENDPD ymm, ymm, ymm/m256, imm8 + /// VBLENDPD ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Blend(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Blend(left, right, control); /// /// __m256 _mm256_blendv_ps (__m256 a, __m256 b, __m256 mask) - /// VBLENDVPS ymm, ymm, ymm/m256, ymm + /// VBLENDVPS ymm1, ymm2, ymm3/m256, ymm4 /// public static Vector256 BlendVariable(Vector256 left, Vector256 right, Vector256 mask) => BlendVariable(left, right, mask); /// /// __m256d _mm256_blendv_pd (__m256d a, __m256d b, __m256d mask) - /// VBLENDVPD ymm, ymm, ymm/m256, ymm + /// VBLENDVPD ymm1, ymm2, ymm3/m256, ymm4 /// public static Vector256 BlendVariable(Vector256 left, Vector256 right, Vector256 mask) => BlendVariable(left, right, mask); /// /// __m128 _mm_broadcast_ss (float const * mem_addr) - /// VBROADCASTSS xmm, m32 + /// VBROADCASTSS xmm1, m32 + /// VBROADCASTSS xmm1 {k1}{z}, m32 /// public static unsafe Vector128 BroadcastScalarToVector128(float* source) => BroadcastScalarToVector128(source); /// /// __m256 _mm256_broadcast_ss (float const * mem_addr) - /// VBROADCASTSS ymm, m32 + /// VBROADCASTSS ymm1, m32 + /// VBROADCASTSS ymm1 {k1}{z}, m32 /// public static unsafe Vector256 BroadcastScalarToVector256(float* source) => BroadcastScalarToVector256(source); /// /// __m256d _mm256_broadcast_sd (double const * mem_addr) - /// VBROADCASTSD ymm, m64 + /// VBROADCASTSD ymm1, m64 + /// VBROADCASTSD ymm1 {k1}{z}, m64 /// public static unsafe Vector256 BroadcastScalarToVector256(double* source) => BroadcastScalarToVector256(source); /// /// __m256 _mm256_broadcast_ps (__m128 const * mem_addr) - /// VBROADCASTF128, ymm, m128 + /// VBROADCASTF128 ymm1, m128 + /// VBROADCASTF32x4 ymm1 {k1}{z}, m128 /// public static unsafe Vector256 BroadcastVector128ToVector256(float* address) => BroadcastVector128ToVector256(address); /// /// __m256d _mm256_broadcast_pd (__m128d const * mem_addr) - /// VBROADCASTF128, ymm, m128 + /// VBROADCASTF128 ymm1, m128 + /// VBROADCASTF64x2 ymm1 {k1}{z}, m128 /// public static unsafe Vector256 BroadcastVector128ToVector256(double* address) => BroadcastVector128ToVector256(address); /// - /// __m256 _mm256_ceil_ps (__m256 a) - /// VROUNDPS ymm, ymm/m256, imm8(10) + /// __m256 _mm256_ceil_ps (__m128 a) + /// VROUNDPS ymm1, ymm2/m256, imm8(10) /// public static Vector256 Ceiling(Vector256 value) => Ceiling(value); /// - /// __m256d _mm256_ceil_pd (__m256d a) - /// VROUNDPD ymm, ymm/m256, imm8(10) + /// __m256d _mm256_ceil_pd (__m128d a) + /// VROUNDPD ymm1, ymm2/m256, imm8(10) /// public static Vector256 Ceiling(Vector256 value) => Ceiling(value); /// /// __m128 _mm_cmp_ps (__m128 a, __m128 b, const int imm8) - /// VCMPPS xmm, xmm, xmm/m128, imm8 + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8 /// public static Vector128 Compare(Vector128 left, Vector128 right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) => Compare(left, right, mode); /// - /// __m128d _mm_cmp_pd (__m128d a, __m128d b, const int imm8) - /// VCMPPD xmm, xmm, xmm/m128, imm8 - /// - public static Vector128 Compare(Vector128 left, Vector128 right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) => Compare(left, right, mode); - /// /// __m256 _mm256_cmp_ps (__m256 a, __m256 b, const int imm8) - /// VCMPPS ymm, ymm, ymm/m256, imm8 + /// VCMPPS ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Compare(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) => Compare(left, right, mode); - /// - /// __m256d _mm256_cmp_pd (__m256d a, __m256d b, const int imm8) - /// VCMPPD ymm, ymm, ymm/m256, imm8 - /// - public static Vector256 Compare(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) => Compare(left, right, mode); - /// /// __m256 _mm256_cmpeq_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(0) + /// VCMPPS ymm1, ymm2/m256, imm8(0) /// The above native signature does not exist. We provide this additional overload for completeness. /// public static Vector256 CompareEqual(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedEqualNonSignaling); - /// - /// __m256d _mm256_cmpeq_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(0) - /// The above native signature does not exist. We provide this additional overload for completeness. - /// - public static Vector256 CompareEqual(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedEqualNonSignaling); - /// /// __m256 _mm256_cmpgt_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(14) + /// VCMPPS ymm1, ymm2/m256, imm8(14) /// The above native signature does not exist. We provide this additional overload for completeness. /// public static Vector256 CompareGreaterThan(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedGreaterThanSignaling); - /// - /// __m256d _mm256_cmpgt_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(14) - /// The above native signature does not exist. We provide this additional overload for completeness. - /// - public static Vector256 CompareGreaterThan(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedGreaterThanSignaling); - /// /// __m256 _mm256_cmpge_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(13) + /// VCMPPS ymm1, ymm2/m256, imm8(13) /// The above native signature does not exist. We provide this additional overload for completeness. /// public static Vector256 CompareGreaterThanOrEqual(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedGreaterThanOrEqualSignaling); /// - /// __m256d _mm256_cmpge_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(13) + /// __m256 _mm256_cmplt_ps (__m256 a, __m256 b) + /// VCMPPS ymm1, ymm2/m256, imm8(1) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareGreaterThanOrEqual(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedGreaterThanOrEqualSignaling); - + public static Vector256 CompareLessThan(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedLessThanSignaling); /// - /// __m256 _mm256_cmplt_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(1) + /// __m256 _mm256_cmple_ps (__m256 a, __m256 b) + /// VCMPPS ymm1, ymm2/m256, imm8(2) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareLessThan(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedLessThanSignaling); + public static Vector256 CompareLessThanOrEqual(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedLessThanOrEqualSignaling); /// - /// __m256d _mm256_cmplt_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(1) + /// __m256 _mm256_cmpneq_ps (__m256 a, __m256 b) + /// VCMPPS ymm1, ymm2/m256, imm8(4) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareLessThan(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedLessThanSignaling); - + public static Vector256 CompareNotEqual(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNotEqualNonSignaling); /// - /// __m256 _mm256_cmple_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(2) + /// __m256 _mm256_cmpngt_ps (__m256 a, __m256 b) + /// VCMPPS ymm1, ymm2/m256, imm8(10) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareLessThanOrEqual(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedLessThanOrEqualSignaling); + public static Vector256 CompareNotGreaterThan(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNotGreaterThanSignaling); /// - /// __m256d _mm256_cmple_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(2) + /// __m256 _mm256_cmpnge_ps (__m256 a, __m256 b) + /// VCMPPS ymm1, ymm2/m256, imm8(9) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareLessThanOrEqual(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedLessThanOrEqualSignaling); - + public static Vector256 CompareNotGreaterThanOrEqual(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNotGreaterThanOrEqualSignaling); /// - /// __m256 _mm256_cmpneq_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(4) + /// __m256 _mm256_cmpnlt_ps (__m256 a, __m256 b) + /// VCMPPS ymm1, ymm2/m256, imm8(5) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareNotEqual(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNotEqualNonSignaling); + public static Vector256 CompareNotLessThan(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNotLessThanSignaling); /// - /// __m256d _mm256_cmpneq_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(4) + /// __m256 _mm256_cmpnle_ps (__m256 a, __m256 b) + /// VCMPPS ymm1, ymm2/m256, imm8(6) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareNotEqual(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNotEqualNonSignaling); - + public static Vector256 CompareNotLessThanOrEqual(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNotLessThanOrEqualSignaling); /// - /// __m256 _mm256_cmpngt_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(10) + /// __m256 _mm256_cmpord_ps (__m256 a, __m256 b) + /// VCMPPS ymm1, ymm2/m256, imm8(7) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareNotGreaterThan(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNotGreaterThanSignaling); + public static Vector256 CompareOrdered(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedNonSignaling); /// - /// __m256d _mm256_cmpngt_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(10) + /// __m256 _mm256_cmpunord_ps (__m256 a, __m256 b) + /// CMPPS ymm1, ymm2/m256, imm8(3) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareNotGreaterThan(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNotGreaterThanSignaling); + public static Vector256 CompareUnordered(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNonSignaling); /// - /// __m256 _mm256_cmpnge_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(9) + /// __m128d _mm_cmp_pd (__m128d a, __m128d b, const int imm8) + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8 + /// + public static Vector128 Compare(Vector128 left, Vector128 right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) => Compare(left, right, mode); + /// + /// __m256d _mm256_cmp_pd (__m256d a, __m256d b, const int imm8) + /// VCMPPD ymm1, ymm2, ymm3/m256, imm8 + /// + public static Vector256 Compare(Vector256 left, Vector256 right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) => Compare(left, right, mode); + /// + /// __m256d _mm256_cmpeq_pd (__m256d a, __m256d b) + /// VCMPPD ymm1, ymm2/m256, imm8(0) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareNotGreaterThanOrEqual(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNotGreaterThanOrEqualSignaling); + public static Vector256 CompareEqual(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedEqualNonSignaling); /// - /// __m256d _mm256_cmpnge_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(9) + /// __m256d _mm256_cmpgt_pd (__m256d a, __m256d b) + /// VCMPPD ymm1, ymm2/m256, imm8(14) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareNotGreaterThanOrEqual(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNotGreaterThanOrEqualSignaling); - + public static Vector256 CompareGreaterThan(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedGreaterThanSignaling); /// - /// __m256 _mm256_cmpnlt_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(5) + /// __m256d _mm256_cmpge_pd (__m256d a, __m256d b) + /// VCMPPD ymm1, ymm2/m256, imm8(13) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareNotLessThan(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNotLessThanSignaling); + public static Vector256 CompareGreaterThanOrEqual(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedGreaterThanOrEqualSignaling); /// - /// __m256d _mm256_cmpnlt_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(5) + /// __m256d _mm256_cmplt_pd (__m256d a, __m256d b) + /// VCMPPD ymm1, ymm2/m256, imm8(1) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareNotLessThan(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNotLessThanSignaling); - + public static Vector256 CompareLessThan(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedLessThanSignaling); /// - /// __m256 _mm256_cmpnle_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(6) + /// __m256d _mm256_cmple_pd (__m256d a, __m256d b) + /// VCMPPD ymm1, ymm2/m256, imm8(2) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareNotLessThanOrEqual(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNotLessThanOrEqualSignaling); + public static Vector256 CompareLessThanOrEqual(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedLessThanOrEqualSignaling); /// - /// __m256d _mm256_cmpnle_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(6) + /// __m256d _mm256_cmpneq_pd (__m256d a, __m256d b) + /// VCMPPD ymm1, ymm2/m256, imm8(4) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareNotLessThanOrEqual(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNotLessThanOrEqualSignaling); - + public static Vector256 CompareNotEqual(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNotEqualNonSignaling); /// - /// __m256 _mm256_cmpord_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(7) + /// __m256d _mm256_cmpngt_pd (__m256d a, __m256d b) + /// VCMPPD ymm1, ymm2/m256, imm8(10) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareOrdered(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedNonSignaling); + public static Vector256 CompareNotGreaterThan(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNotGreaterThanSignaling); /// - /// __m256d _mm256_cmpord_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(7) + /// __m256d _mm256_cmpnge_pd (__m256d a, __m256d b) + /// VCMPPD ymm1, ymm2/m256, imm8(9) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareOrdered(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedNonSignaling); - + public static Vector256 CompareNotGreaterThanOrEqual(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNotGreaterThanOrEqualSignaling); /// - /// __m128d _mm_cmp_sd (__m128d a, __m128d b, const int imm8) - /// VCMPSS xmm, xmm, xmm/m32, imm8 + /// __m256d _mm256_cmpnlt_pd (__m256d a, __m256d b) + /// VCMPPD ymm1, ymm2/m256, imm8(5) + /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector128 CompareScalar(Vector128 left, Vector128 right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) => CompareScalar(left, right, mode); + public static Vector256 CompareNotLessThan(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNotLessThanSignaling); /// - /// __m128 _mm_cmp_ss (__m128 a, __m128 b, const int imm8) - /// VCMPSD xmm, xmm, xmm/m64, imm8 + /// __m256d _mm256_cmpnle_pd (__m256d a, __m256d b) + /// VCMPPD ymm1, ymm2/m256, imm8(6) + /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector128 CompareScalar(Vector128 left, Vector128 right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) => CompareScalar(left, right, mode); - + public static Vector256 CompareNotLessThanOrEqual(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNotLessThanOrEqualSignaling); /// - /// __m256 _mm256_cmpunord_ps (__m256 a, __m256 b) - /// CMPPS ymm, ymm/m256, imm8(3) + /// __m256d _mm256_cmpord_pd (__m256d a, __m256d b) + /// VCMPPD ymm1, ymm2/m256, imm8(7) /// The above native signature does not exist. We provide this additional overload for completeness. /// - public static Vector256 CompareUnordered(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNonSignaling); + public static Vector256 CompareOrdered(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedNonSignaling); /// /// __m256d _mm256_cmpunord_pd (__m256d a, __m256d b) - /// CMPPD ymm, ymm/m256, imm8(3) + /// CMPPD ymm1, ymm2/m256, imm8(3) /// The above native signature does not exist. We provide this additional overload for completeness. /// public static Vector256 CompareUnordered(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNonSignaling); + /// + /// __m128 _mm_cmp_ss (__m128 a, __m128 b, const int imm8) + /// VCMPSD xmm1, xmm2, xmm3/m64, imm8 + /// + public static Vector128 CompareScalar(Vector128 left, Vector128 right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) => CompareScalar(left, right, mode); + /// + /// __m128d _mm_cmp_sd (__m128d a, __m128d b, const int imm8) + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8 + /// + public static Vector128 CompareScalar(Vector128 left, Vector128 right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) => CompareScalar(left, right, mode); + /// /// __m128i _mm256_cvtpd_epi32 (__m256d a) - /// VCVTPD2DQ xmm, ymm/m256 + /// VCVTPD2DQ xmm1, ymm2/m256 + /// VCVTPD2DQ xmm1 {k1}{z}, ymm2/m256/m64bcst /// public static Vector128 ConvertToVector128Int32(Vector256 value) => ConvertToVector128Int32(value); + /// /// __m128 _mm256_cvtpd_ps (__m256d a) - /// VCVTPD2PS xmm, ymm/m256 + /// VCVTPD2PS xmm1, ymm2/m256 + /// VCVTPD2PS xmm1 {k1}{z}, ymm2/m256/m64bcst /// public static Vector128 ConvertToVector128Single(Vector256 value) => ConvertToVector128Single(value); + /// - /// __m256i _mm256_cvtps_epi32 (__m256 a) - /// VCVTPS2DQ ymm, ymm/m256 - /// - public static Vector256 ConvertToVector256Int32(Vector256 value) => ConvertToVector256Int32(value); - /// - /// __m256 _mm256_cvtepi32_ps (__m256i a) - /// VCVTDQ2PS ymm, ymm/m256 + /// __m256d _mm256_cvtepi32_pd (__m128i a) + /// VCVTDQ2PD ymm1, xmm2/m128 + /// VCVTDQ2PD ymm1 {k1}{z}, xmm2/m128/m32bcst /// - public static Vector256 ConvertToVector256Single(Vector256 value) => ConvertToVector256Single(value); + public static Vector256 ConvertToVector256Double(Vector128 value) => ConvertToVector256Double(value); /// /// __m256d _mm256_cvtps_pd (__m128 a) - /// VCVTPS2PD ymm, xmm/m128 + /// VCVTPS2PD ymm1, xmm2/m128 + /// VCVTPS2PD ymm1 {k1}{z}, xmm2/m128/m32bcst /// public static Vector256 ConvertToVector256Double(Vector128 value) => ConvertToVector256Double(value); /// - /// __m256d _mm256_cvtepi32_pd (__m128i a) - /// VCVTDQ2PD ymm, xmm/m128 + /// __m256i _mm256_cvtps_epi32 (__m256 a) + /// VCVTPS2DQ ymm1, ymm2/m256 + /// VCVTPS2DQ ymm1 {k1}{z}, ymm2/m256/m32bcst /// - public static Vector256 ConvertToVector256Double(Vector128 value) => ConvertToVector256Double(value); - + public static Vector256 ConvertToVector256Int32(Vector256 value) => ConvertToVector256Int32(value); /// - /// __m128i _mm256_cvttpd_epi32 (__m256d a) - /// VCVTTPD2DQ xmm, ymm/m256 + /// __m256 _mm256_cvtepi32_ps (__m256i a) + /// VCVTDQ2PS ymm1, ymm2/m256 + /// VCVTDQ2PS ymm1 {k1}{z}, ymm2/m256/m32bcst /// - public static Vector128 ConvertToVector128Int32WithTruncation(Vector256 value) => ConvertToVector128Int32WithTruncation(value); + public static Vector256 ConvertToVector256Single(Vector256 value) => ConvertToVector256Single(value); + /// /// __m256i _mm256_cvttps_epi32 (__m256 a) - /// VCVTTPS2DQ ymm, ymm/m256 + /// VCVTTPS2DQ ymm1, ymm2/m256 + /// VCVTTPS2DQ ymm1 {k1}{z}, ymm2/m256/m32bcst /// public static Vector256 ConvertToVector256Int32WithTruncation(Vector256 value) => ConvertToVector256Int32WithTruncation(value); + /// + /// __m128i _mm256_cvttpd_epi32 (__m256d a) + /// VCVTTPD2DQ xmm1, ymm2/m256 + /// VCVTTPD2DQ xmm1 {k1}{z}, ymm2/m256/m64bcst + /// + public static Vector128 ConvertToVector128Int32WithTruncation(Vector256 value) => ConvertToVector128Int32WithTruncation(value); /// /// __m256 _mm256_div_ps (__m256 a, __m256 b) - /// VDIVPS ymm, ymm, ymm/m256 + /// VDIVPS ymm1, ymm2, ymm3/m256 + /// VDIVPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Divide(Vector256 left, Vector256 right) => Divide(left, right); /// /// __m256d _mm256_div_pd (__m256d a, __m256d b) - /// VDIVPD ymm, ymm, ymm/m256 + /// VDIVPD ymm1, ymm2, ymm3/m256 + /// VDIVPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Divide(Vector256 left, Vector256 right) => Divide(left, right); /// /// __m256 _mm256_dp_ps (__m256 a, __m256 b, const int imm8) - /// VDPPS ymm, ymm, ymm/m256, imm8 + /// VDPPS ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 DotProduct(Vector256 left, Vector256 right, [ConstantExpected] byte control) => DotProduct(left, right, control); /// /// __m256 _mm256_moveldup_ps (__m256 a) - /// VMOVSLDUP ymm, ymm/m256 + /// VMOVSLDUP ymm1, ymm2/m256 + /// VMOVSLDUP ymm1 {k1}{z}, ymm2/m256 /// public static Vector256 DuplicateEvenIndexed(Vector256 value) => DuplicateEvenIndexed(value); /// /// __m256d _mm256_movedup_pd (__m256d a) - /// VMOVDDUP ymm, ymm/m256 + /// VMOVDDUP ymm1, ymm2/m256 + /// VMOVDDUP ymm1 {k1}{z}, ymm2/m256 /// public static Vector256 DuplicateEvenIndexed(Vector256 value) => DuplicateEvenIndexed(value); - /// /// __m256 _mm256_movehdup_ps (__m256 a) - /// VMOVSHDUP ymm, ymm/m256 + /// VMOVSHDUP ymm1, ymm2/m256 + /// VMOVSHDUP ymm1 {k1}{z}, ymm2/m256 /// public static Vector256 DuplicateOddIndexed(Vector256 value) => DuplicateOddIndexed(value); /// /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8) - /// VEXTRACTF128 xmm/m128, ymm, imm8 + /// VEXTRACTF128 xmm1/m128, ymm2, imm8 + /// VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) => ExtractVector128(value, index); - /// /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8) - /// VEXTRACTF128 xmm/m128, ymm, imm8 + /// VEXTRACTF128 xmm1/m128, ymm2, imm8 + /// VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) => ExtractVector128(value, index); - /// /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8) - /// VEXTRACTF128 xmm/m128, ymm, imm8 + /// VEXTRACTF128 xmm1/m128, ymm2, imm8 + /// VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) => ExtractVector128(value, index); - /// /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8) - /// VEXTRACTF128 xmm/m128, ymm, imm8 + /// VEXTRACTF128 xmm1/m128, ymm2, imm8 + /// VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) => ExtractVector128(value, index); - /// /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8) - /// VEXTRACTF128 xmm/m128, ymm, imm8 + /// VEXTRACTF128 xmm1/m128, ymm2, imm8 + /// VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) => ExtractVector128(value, index); - /// /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8) - /// VEXTRACTF128 xmm/m128, ymm, imm8 + /// VEXTRACTF128 xmm1/m128, ymm2, imm8 + /// VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) => ExtractVector128(value, index); - /// /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8) - /// VEXTRACTF128 xmm/m128, ymm, imm8 + /// VEXTRACTF128 xmm1/m128, ymm2, imm8 + /// VEXTRACTF64x2 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) => ExtractVector128(value, index); - /// /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8) - /// VEXTRACTF128 xmm/m128, ymm, imm8 + /// VEXTRACTF128 xmm1/m128, ymm2, imm8 + /// VEXTRACTF64x2 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) => ExtractVector128(value, index); - /// /// __m128 _mm256_extractf128_ps (__m256 a, const int imm8) - /// VEXTRACTF128 xmm/m128, ymm, imm8 + /// VEXTRACTF128 xmm1/m128, ymm2, imm8 + /// VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) => ExtractVector128(value, index); - /// /// __m128d _mm256_extractf128_pd (__m256d a, const int imm8) - /// VEXTRACTF128 xmm/m128, ymm, imm8 + /// VEXTRACTF128 xmm1/m128, ymm2, imm8 + /// VEXTRACTF64x2 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) => ExtractVector128(value, index); /// - /// __m256 _mm256_floor_ps (__m256 a) - /// VROUNDPS ymm, ymm/m256, imm8(9) + /// __m256 _mm256_ceil_ps (__m128 a) + /// VROUNDPS ymm1, ymm2/m256, imm8(9) /// public static Vector256 Floor(Vector256 value) => Floor(value); /// - /// __m256d _mm256_floor_pd (__m256d a) - /// VROUNDPS ymm, ymm/m256, imm8(9) + /// __m256d _mm256_ceil_pd (__m128d a) + /// VROUNDPD ymm1, ymm2/m256, imm8(9) /// public static Vector256 Floor(Vector256 value) => Floor(value); /// /// __m256 _mm256_hadd_ps (__m256 a, __m256 b) - /// VHADDPS ymm, ymm, ymm/m256 + /// VHADDPS ymm1, ymm2, ymm3/m256 /// public static Vector256 HorizontalAdd(Vector256 left, Vector256 right) => HorizontalAdd(left, right); /// /// __m256d _mm256_hadd_pd (__m256d a, __m256d b) - /// VHADDPD ymm, ymm, ymm/m256 + /// VHADDPD ymm1, ymm2, ymm3/m256 /// public static Vector256 HorizontalAdd(Vector256 left, Vector256 right) => HorizontalAdd(left, right); /// /// __m256 _mm256_hsub_ps (__m256 a, __m256 b) - /// VHSUBPS ymm, ymm, ymm/m256 + /// VHSUBPS ymm1, ymm2, ymm3/m256 /// public static Vector256 HorizontalSubtract(Vector256 left, Vector256 right) => HorizontalSubtract(left, right); /// /// __m256d _mm256_hsub_pd (__m256d a, __m256d b) - /// VHSUBPD ymm, ymm, ymm/m256 + /// VHSUBPD ymm1, ymm2, ymm3/m256 /// public static Vector256 HorizontalSubtract(Vector256 left, Vector256 right) => HorizontalSubtract(left, right); /// /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8) - /// VINSERTF128 ymm, ymm, xmm/m128, imm8 + /// VINSERTF128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) => InsertVector128(value, data, index); - /// /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8) - /// VINSERTF128 ymm, ymm, xmm/m128, imm8 + /// VINSERTF128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) => InsertVector128(value, data, index); - /// /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8) - /// VINSERTF128 ymm, ymm, xmm/m128, imm8 + /// VINSERTF128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) => InsertVector128(value, data, index); - /// /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8) - /// VINSERTF128 ymm, ymm, xmm/m128, imm8 + /// VINSERTF128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) => InsertVector128(value, data, index); - /// /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8) - /// VINSERTF128 ymm, ymm, xmm/m128, imm8 + /// VINSERTF128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) => InsertVector128(value, data, index); - /// /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8) - /// VINSERTF128 ymm, ymm, xmm/m128, imm8 + /// VINSERTF128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) => InsertVector128(value, data, index); - /// /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8) - /// VINSERTF128 ymm, ymm, xmm/m128, imm8 + /// VINSERTF128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTF64x2 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) => InsertVector128(value, data, index); - /// /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8) - /// VINSERTF128 ymm, ymm, xmm/m128, imm8 + /// VINSERTF128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTF64x2 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) => InsertVector128(value, data, index); - /// /// __m256 _mm256_insertf128_ps (__m256 a, __m128 b, int imm8) - /// VINSERTF128 ymm, ymm, xmm/m128, imm8 + /// VINSERTF128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) => InsertVector128(value, data, index); - /// /// __m256d _mm256_insertf128_pd (__m256d a, __m128d b, int imm8) - /// VINSERTF128 ymm, ymm, xmm/m128, imm8 + /// VINSERTF128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTF64x2 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) => InsertVector128(value, data, index); - /// - /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) - /// VMOVDQU ymm, m256 - /// - public static unsafe Vector256 LoadVector256(sbyte* address) => LoadVector256(address); - /// - /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) - /// VMOVDQU ymm, m256 - /// - public static unsafe Vector256 LoadVector256(byte* address) => LoadVector256(address); - /// - /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) - /// VMOVDQU ymm, m256 - /// - public static unsafe Vector256 LoadVector256(short* address) => LoadVector256(address); - /// - /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) - /// VMOVDQU ymm, m256 - /// - public static unsafe Vector256 LoadVector256(ushort* address) => LoadVector256(address); - /// - /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) - /// VMOVDQU ymm, m256 - /// - public static unsafe Vector256 LoadVector256(int* address) => LoadVector256(address); - /// - /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) - /// VMOVDQU ymm, m256 - /// - public static unsafe Vector256 LoadVector256(uint* address) => LoadVector256(address); - /// - /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) - /// VMOVDQU ymm, m256 - /// - public static unsafe Vector256 LoadVector256(long* address) => LoadVector256(address); - /// - /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) - /// VMOVDQU ymm, m256 - /// - public static unsafe Vector256 LoadVector256(ulong* address) => LoadVector256(address); - /// - /// __m256 _mm256_loadu_ps (float const * mem_addr) - /// VMOVUPS ymm, ymm/m256 - /// - public static unsafe Vector256 LoadVector256(float* address) => LoadVector256(address); - /// - /// __m256d _mm256_loadu_pd (double const * mem_addr) - /// VMOVUPD ymm, ymm/m256 - /// - public static unsafe Vector256 LoadVector256(double* address) => LoadVector256(address); - /// /// __m256i _mm256_load_si256 (__m256i const * mem_addr) - /// VMOVDQA ymm, m256 + /// VMOVDQA ymm1, m256 + /// VMOVDQA32 ymm1 {k1}{z}, m256 /// public static unsafe Vector256 LoadAlignedVector256(sbyte* address) => LoadAlignedVector256(address); /// /// __m256i _mm256_load_si256 (__m256i const * mem_addr) - /// VMOVDQA ymm, m256 + /// VMOVDQA ymm1, m256 + /// VMOVDQA32 ymm1 {k1}{z}, m256 /// public static unsafe Vector256 LoadAlignedVector256(byte* address) => LoadAlignedVector256(address); /// /// __m256i _mm256_load_si256 (__m256i const * mem_addr) - /// VMOVDQA ymm, m256 + /// VMOVDQA ymm1, m256 + /// VMOVDQA32 ymm1 {k1}{z}, m256 /// public static unsafe Vector256 LoadAlignedVector256(short* address) => LoadAlignedVector256(address); /// /// __m256i _mm256_load_si256 (__m256i const * mem_addr) - /// VMOVDQA ymm, m256 + /// VMOVDQA ymm1, m256 + /// VMOVDQA32 ymm1 {k1}{z}, m256 /// public static unsafe Vector256 LoadAlignedVector256(ushort* address) => LoadAlignedVector256(address); /// /// __m256i _mm256_load_si256 (__m256i const * mem_addr) - /// VMOVDQA ymm, m256 + /// VMOVDQA ymm1, m256 + /// VMOVDQA32 ymm1 {k1}{z}, m256 /// public static unsafe Vector256 LoadAlignedVector256(int* address) => LoadAlignedVector256(address); /// /// __m256i _mm256_load_si256 (__m256i const * mem_addr) - /// VMOVDQA ymm, m256 + /// VMOVDQA ymm1, m256 + /// VMOVDQA32 ymm1 {k1}{z}, m256 /// public static unsafe Vector256 LoadAlignedVector256(uint* address) => LoadAlignedVector256(address); /// /// __m256i _mm256_load_si256 (__m256i const * mem_addr) - /// VMOVDQA ymm, m256 + /// VMOVDQA ymm1, m256 + /// VMOVDQA64 ymm1 {k1}{z}, m256 /// public static unsafe Vector256 LoadAlignedVector256(long* address) => LoadAlignedVector256(address); /// /// __m256i _mm256_load_si256 (__m256i const * mem_addr) - /// VMOVDQA ymm, m256 + /// VMOVDQA ymm1, m256 + /// VMOVDQA64 ymm1 {k1}{z}, m256 /// public static unsafe Vector256 LoadAlignedVector256(ulong* address) => LoadAlignedVector256(address); /// /// __m256 _mm256_load_ps (float const * mem_addr) - /// VMOVAPS ymm, ymm/m256 + /// VMOVAPS ymm1, m256 + /// VMOVAPS ymm1 {k1}{z}, m256 /// public static unsafe Vector256 LoadAlignedVector256(float* address) => LoadAlignedVector256(address); /// /// __m256d _mm256_load_pd (double const * mem_addr) - /// VMOVAPD ymm, ymm/m256 + /// VMOVAPD ymm1, m256 + /// VMOVAPD ymm1 {k1}{z}, m256 /// public static unsafe Vector256 LoadAlignedVector256(double* address) => LoadAlignedVector256(address); /// /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr) - /// VLDDQU ymm, m256 + /// VLDDQU ymm1, m256 /// public static unsafe Vector256 LoadDquVector256(sbyte* address) => LoadDquVector256(address); /// /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr) - /// VLDDQU ymm, m256 + /// VLDDQU ymm1, m256 /// public static unsafe Vector256 LoadDquVector256(byte* address) => LoadDquVector256(address); /// /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr) - /// VLDDQU ymm, m256 + /// VLDDQU ymm1, m256 /// public static unsafe Vector256 LoadDquVector256(short* address) => LoadDquVector256(address); /// /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr) - /// VLDDQU ymm, m256 + /// VLDDQU ymm1, m256 /// public static unsafe Vector256 LoadDquVector256(ushort* address) => LoadDquVector256(address); /// /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr) - /// VLDDQU ymm, m256 + /// VLDDQU ymm1, m256 /// public static unsafe Vector256 LoadDquVector256(int* address) => LoadDquVector256(address); /// /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr) - /// VLDDQU ymm, m256 + /// VLDDQU ymm1, m256 /// public static unsafe Vector256 LoadDquVector256(uint* address) => LoadDquVector256(address); /// /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr) - /// VLDDQU ymm, m256 + /// VLDDQU ymm1, m256 /// public static unsafe Vector256 LoadDquVector256(long* address) => LoadDquVector256(address); /// /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr) - /// VLDDQU ymm, m256 + /// VLDDQU ymm1, m256 /// public static unsafe Vector256 LoadDquVector256(ulong* address) => LoadDquVector256(address); + /// + /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) + /// VMOVDQU ymm1, m256 + /// VMOVDQU8 ymm1 {k1}{z}, m256 + /// + public static unsafe Vector256 LoadVector256(sbyte* address) => LoadVector256(address); + /// + /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) + /// VMOVDQU ymm1, m256 + /// VMOVDQU8 ymm1 {k1}{z}, m256 + /// + public static unsafe Vector256 LoadVector256(byte* address) => LoadVector256(address); + /// + /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) + /// VMOVDQU ymm1, m256 + /// VMOVDQU16 ymm1 {k1}{z}, m256 + /// + public static unsafe Vector256 LoadVector256(short* address) => LoadVector256(address); + /// + /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) + /// VMOVDQU ymm1, m256 + /// VMOVDQU16 ymm1 {k1}{z}, m256 + /// + public static unsafe Vector256 LoadVector256(ushort* address) => LoadVector256(address); + /// + /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) + /// VMOVDQU ymm1, m256 + /// VMOVDQU32 ymm1 {k1}{z}, m256 + /// + public static unsafe Vector256 LoadVector256(int* address) => LoadVector256(address); + /// + /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) + /// VMOVDQU ymm1, m256 + /// VMOVDQU32 ymm1 {k1}{z}, m256 + /// + public static unsafe Vector256 LoadVector256(uint* address) => LoadVector256(address); + /// + /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) + /// VMOVDQU ymm1, m256 + /// VMOVDQU64 ymm1 {k1}{z}, m256 + /// + public static unsafe Vector256 LoadVector256(long* address) => LoadVector256(address); + /// + /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr) + /// VMOVDQU ymm1, m256 + /// VMOVDQU64 ymm1 {k1}{z}, m256 + /// + public static unsafe Vector256 LoadVector256(ulong* address) => LoadVector256(address); + /// + /// __m256 _mm256_loadu_ps (float const * mem_addr) + /// VMOVUPS ymm1, m256 + /// VMOVUPS ymm1 {k1}{z}, m256 + /// + public static unsafe Vector256 LoadVector256(float* address) => LoadVector256(address); + /// + /// __m256d _mm256_loadu_pd (double const * mem_addr) + /// VMOVUPD ymm1, m256 + /// VMOVUPD ymm1 {k1}{z}, m256 + /// + public static unsafe Vector256 LoadVector256(double* address) => LoadVector256(address); + /// /// __m128 _mm_maskload_ps (float const * mem_addr, __m128i mask) - /// VMASKMOVPS xmm, xmm, m128 + /// VMASKMOVPS xmm1, xmm2, m128 /// public static unsafe Vector128 MaskLoad(float* address, Vector128 mask) => MaskLoad(address, mask); /// /// __m128d _mm_maskload_pd (double const * mem_addr, __m128i mask) - /// VMASKMOVPD xmm, xmm, m128 + /// VMASKMOVPD xmm1, xmm2, m128 /// public static unsafe Vector128 MaskLoad(double* address, Vector128 mask) => MaskLoad(address, mask); - /// /// __m256 _mm256_maskload_ps (float const * mem_addr, __m256i mask) - /// VMASKMOVPS ymm, ymm, m256 + /// VMASKMOVPS ymm1, ymm2, m256 /// public static unsafe Vector256 MaskLoad(float* address, Vector256 mask) => MaskLoad(address, mask); /// /// __m256d _mm256_maskload_pd (double const * mem_addr, __m256i mask) - /// VMASKMOVPD ymm, ymm, m256 + /// VMASKMOVPD ymm1, ymm2, m256 /// public static unsafe Vector256 MaskLoad(double* address, Vector256 mask) => MaskLoad(address, mask); /// /// void _mm_maskstore_ps (float * mem_addr, __m128i mask, __m128 a) - /// VMASKMOVPS m128, xmm, xmm + /// VMASKMOVPS m128, xmm1, xmm2 /// public static unsafe void MaskStore(float* address, Vector128 mask, Vector128 source) => MaskStore(address, mask, source); /// /// void _mm_maskstore_pd (double * mem_addr, __m128i mask, __m128d a) - /// VMASKMOVPD m128, xmm, xmm + /// VMASKMOVPD m128, xmm1, xmm2 /// public static unsafe void MaskStore(double* address, Vector128 mask, Vector128 source) => MaskStore(address, mask, source); - /// /// void _mm256_maskstore_ps (float * mem_addr, __m256i mask, __m256 a) - /// VMASKMOVPS m256, ymm, ymm + /// VMASKMOVPS m256, ymm1, ymm2 /// public static unsafe void MaskStore(float* address, Vector256 mask, Vector256 source) => MaskStore(address, mask, source); /// /// void _mm256_maskstore_pd (double * mem_addr, __m256i mask, __m256d a) - /// VMASKMOVPD m256, ymm, ymm + /// VMASKMOVPD m256, ymm1, ymm2 /// public static unsafe void MaskStore(double* address, Vector256 mask, Vector256 source) => MaskStore(address, mask, source); /// /// __m256 _mm256_max_ps (__m256 a, __m256 b) - /// VMAXPS ymm, ymm, ymm/m256 + /// VMAXPS ymm1, ymm2, ymm3/m256 + /// VMAXPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Max(Vector256 left, Vector256 right) => Max(left, right); /// /// __m256d _mm256_max_pd (__m256d a, __m256d b) - /// VMAXPD ymm, ymm, ymm/m256 + /// VMAXPD ymm1, ymm2, ymm3/m256 + /// VMAXPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Max(Vector256 left, Vector256 right) => Max(left, right); /// /// __m256 _mm256_min_ps (__m256 a, __m256 b) - /// VMINPS ymm, ymm, ymm/m256 + /// VMINPS ymm1, ymm2, ymm3/m256 + /// VMINPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Min(Vector256 left, Vector256 right) => Min(left, right); /// /// __m256d _mm256_min_pd (__m256d a, __m256d b) - /// VMINPD ymm, ymm, ymm/m256 + /// VMINPD ymm1, ymm2, ymm3/m256 + /// VMINPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Min(Vector256 left, Vector256 right) => Min(left, right); /// /// int _mm256_movemask_ps (__m256 a) - /// VMOVMSKPS reg, ymm + /// VMOVMSKPS r32, ymm1 /// public static int MoveMask(Vector256 value) => MoveMask(value); /// /// int _mm256_movemask_pd (__m256d a) - /// VMOVMSKPD reg, ymm + /// VMOVMSKPD r32, ymm1 /// public static int MoveMask(Vector256 value) => MoveMask(value); /// /// __m256 _mm256_mul_ps (__m256 a, __m256 b) - /// VMULPS ymm, ymm, ymm/m256 + /// VMULPS ymm1, ymm2, ymm3/m256 + /// VMULPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Multiply(Vector256 left, Vector256 right) => Multiply(left, right); /// /// __m256d _mm256_mul_pd (__m256d a, __m256d b) - /// VMULPD ymm, ymm, ymm/m256 + /// VMULPD ymm1, ymm2, ymm3/m256 + /// VMULPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Multiply(Vector256 left, Vector256 right) => Multiply(left, right); /// /// __m256 _mm256_or_ps (__m256 a, __m256 b) - /// VORPS ymm, ymm, ymm/m256 + /// VORPS ymm1, ymm2, ymm3/m256 + /// VORPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Or(Vector256 left, Vector256 right) => Or(left, right); /// /// __m256d _mm256_or_pd (__m256d a, __m256d b) - /// VORPD ymm, ymm, ymm/m256 + /// VORPD ymm1, ymm2, ymm3/m256 + /// VORPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Or(Vector256 left, Vector256 right) => Or(left, right); /// /// __m128 _mm_permute_ps (__m128 a, int imm8) - /// VPERMILPS xmm, xmm, imm8 + /// VPERMILPS xmm1, xmm2/m128, imm8 + /// VPERMILPS xmm1 {k1}{z}, xmm2/m128/m32bcst, imm8 /// public static Vector128 Permute(Vector128 value, [ConstantExpected] byte control) => Permute(value, control); /// /// __m128d _mm_permute_pd (__m128d a, int imm8) - /// VPERMILPD xmm, xmm, imm8 + /// VPERMILPD xmm1, xmm2/m128, imm8 + /// VPERMILPD xmm1 {k1}{z}, xmm2/m128/m64bcst, imm8 /// public static Vector128 Permute(Vector128 value, [ConstantExpected] byte control) => Permute(value, control); - /// /// __m256 _mm256_permute_ps (__m256 a, int imm8) - /// VPERMILPS ymm, ymm, imm8 + /// VPERMILPS ymm1, ymm2/m256, imm8 + /// VPERMILPS ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8 /// public static Vector256 Permute(Vector256 value, [ConstantExpected] byte control) => Permute(value, control); /// /// __m256d _mm256_permute_pd (__m256d a, int imm8) - /// VPERMILPD ymm, ymm, imm8 + /// VPERMILPD ymm1, ymm2/m256, imm8 + /// VPERMILPD ymm1 {k1}{z}, ymm2/m256/m64bcst, imm8 /// public static Vector256 Permute(Vector256 value, [ConstantExpected] byte control) => Permute(value, control); /// /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8) - /// VPERM2F128 ymm, ymm, ymm/m256, imm8 + /// VPERM2F128 ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Permute2x128(left, right, control); - /// /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8) - /// VPERM2F128 ymm, ymm, ymm/m256, imm8 + /// VPERM2F128 ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Permute2x128(left, right, control); - /// /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8) - /// VPERM2F128 ymm, ymm, ymm/m256, imm8 + /// VPERM2F128 ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Permute2x128(left, right, control); - /// /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8) - /// VPERM2F128 ymm, ymm, ymm/m256, imm8 + /// VPERM2F128 ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Permute2x128(left, right, control); - /// /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8) - /// VPERM2F128 ymm, ymm, ymm/m256, imm8 + /// VPERM2F128 ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Permute2x128(left, right, control); - /// /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8) - /// VPERM2F128 ymm, ymm, ymm/m256, imm8 + /// VPERM2F128 ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Permute2x128(left, right, control); - /// /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8) - /// VPERM2F128 ymm, ymm, ymm/m256, imm8 + /// VPERM2F128 ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Permute2x128(left, right, control); - /// /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8) - /// VPERM2F128 ymm, ymm, ymm/m256, imm8 + /// VPERM2F128 ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Permute2x128(left, right, control); /// /// __m256 _mm256_permute2f128_ps (__m256 a, __m256 b, int imm8) - /// VPERM2F128 ymm, ymm, ymm/m256, imm8 + /// VPERM2F128 ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Permute2x128(left, right, control); - /// /// __m256d _mm256_permute2f128_pd (__m256d a, __m256d b, int imm8) - /// VPERM2F128 ymm, ymm, ymm/m256, imm8 + /// VPERM2F128 ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Permute2x128(left, right, control); /// /// __m128 _mm_permutevar_ps (__m128 a, __m128i b) - /// VPERMILPS xmm, xmm, xmm/m128 + /// VPERMILPS xmm1, xmm2, xmm3/m128 + /// VPERMILPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 PermuteVar(Vector128 left, Vector128 control) => PermuteVar(left, control); /// /// __m128d _mm_permutevar_pd (__m128d a, __m128i b) - /// VPERMILPD xmm, xmm, xmm/m128 + /// VPERMILPD xmm1, xmm2, xmm3/m128 + /// VPERMILPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 PermuteVar(Vector128 left, Vector128 control) => PermuteVar(left, control); /// /// __m256 _mm256_permutevar_ps (__m256 a, __m256i b) - /// VPERMILPS ymm, ymm, ymm/m256 + /// VPERMILPS ymm1, ymm2, ymm3/m256 + /// VPERMILPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 PermuteVar(Vector256 left, Vector256 control) => PermuteVar(left, control); /// /// __m256d _mm256_permutevar_pd (__m256d a, __m256i b) - /// VPERMILPD ymm, ymm, ymm/m256 + /// VPERMILPD ymm1, ymm2, ymm3/m256 + /// VPERMILPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 PermuteVar(Vector256 left, Vector256 control) => PermuteVar(left, control); /// /// __m256 _mm256_rcp_ps (__m256 a) - /// VRCPPS ymm, ymm/m256 + /// VRCPPS ymm1, ymm2/m256 /// public static Vector256 Reciprocal(Vector256 value) => Reciprocal(value); /// /// __m256 _mm256_rsqrt_ps (__m256 a) - /// VRSQRTPS ymm, ymm/m256 + /// VRSQRTPS ymm1, ymm2/m256 /// public static Vector256 ReciprocalSqrt(Vector256 value) => ReciprocalSqrt(value); /// - /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) - /// VROUNDPS ymm, ymm/m256, imm8(8) - /// - public static Vector256 RoundToNearestInteger(Vector256 value) => RoundToNearestInteger(value); - /// - /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) - /// VROUNDPS ymm, ymm/m256, imm8(9) + /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_CUR_DIRECTION) + /// VROUNDPS ymm1, ymm2/m256, imm8(4) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector256 RoundToNegativeInfinity(Vector256 value) => RoundToNegativeInfinity(value); + public static Vector256 RoundCurrentDirection(Vector256 value) => RoundCurrentDirection(value); /// - /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) - /// VROUNDPS ymm, ymm/m256, imm8(10) + /// __m256d _mm256_round_ps (__m256d a, _MM_FROUND_CUR_DIRECTION) + /// VROUNDPD ymm1, ymm2/m256, imm8(4) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector256 RoundToPositiveInfinity(Vector256 value) => RoundToPositiveInfinity(value); + public static Vector256 RoundCurrentDirection(Vector256 value) => RoundCurrentDirection(value); + /// - /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC) - /// VROUNDPS ymm, ymm/m256, imm8(11) + /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_NEAREST_INT) + /// VROUNDPS ymm1, ymm2/m256, imm8(8) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector256 RoundToZero(Vector256 value) => RoundToZero(value); + public static Vector256 RoundToNearestInteger(Vector256 value) => RoundToNearestInteger(value); /// - /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_CUR_DIRECTION) - /// VROUNDPS ymm, ymm/m256, imm8(4) + /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_NEAREST_INT) + /// VROUNDPD ymm1, ymm2/m256, imm8(8) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector256 RoundCurrentDirection(Vector256 value) => RoundCurrentDirection(value); + public static Vector256 RoundToNearestInteger(Vector256 value) => RoundToNearestInteger(value); /// - /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) - /// VROUNDPD ymm, ymm/m256, imm8(8) + /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) + /// VROUNDPS ymm1, ymm2/m256, imm8(9) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector256 RoundToNearestInteger(Vector256 value) => RoundToNearestInteger(value); + public static Vector256 RoundToNegativeInfinity(Vector256 value) => RoundToNegativeInfinity(value); /// /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) - /// VROUNDPD ymm, ymm/m256, imm8(9) + /// VROUNDPD ymm1, ymm2/m256, imm8(9) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// public static Vector256 RoundToNegativeInfinity(Vector256 value) => RoundToNegativeInfinity(value); + + /// + /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) + /// VROUNDPS ymm1, ymm2/m256, imm8(10) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. + /// + public static Vector256 RoundToPositiveInfinity(Vector256 value) => RoundToPositiveInfinity(value); /// /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) - /// VROUNDPD ymm, ymm/m256, imm8(10) + /// VROUNDPD ymm1, ymm2/m256, imm8(10) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// public static Vector256 RoundToPositiveInfinity(Vector256 value) => RoundToPositiveInfinity(value); + /// - /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC) - /// VROUNDPD ymm, ymm/m256, imm8(11) + /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC) + /// VROUNDPS ymm1, ymm2/m256, imm8(11) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector256 RoundToZero(Vector256 value) => RoundToZero(value); + public static Vector256 RoundToZero(Vector256 value) => RoundToZero(value); /// - /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_CUR_DIRECTION) - /// VROUNDPD ymm, ymm/m256, imm8(4) + /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC) + /// VROUNDPD ymm1, ymm2/m256, imm8(11) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector256 RoundCurrentDirection(Vector256 value) => RoundCurrentDirection(value); + public static Vector256 RoundToZero(Vector256 value) => RoundToZero(value); /// /// __m256 _mm256_shuffle_ps (__m256 a, __m256 b, const int imm8) - /// VSHUFPS ymm, ymm, ymm/m256, imm8 + /// VSHUFPS ymm1, ymm2, ymm3/m256, imm8 + /// VSHUFPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst, imm8 /// public static Vector256 Shuffle(Vector256 value, Vector256 right, [ConstantExpected] byte control) => Shuffle(value, right, control); /// /// __m256d _mm256_shuffle_pd (__m256d a, __m256d b, const int imm8) - /// VSHUFPD ymm, ymm, ymm/m256, imm8 + /// VSHUFPD ymm1, ymm2, ymm3/m256, imm8 + /// VSHUFPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst, imm8 /// public static Vector256 Shuffle(Vector256 value, Vector256 right, [ConstantExpected] byte control) => Shuffle(value, right, control); /// /// __m256 _mm256_sqrt_ps (__m256 a) - /// VSQRTPS ymm, ymm/m256 + /// VSQRTPS ymm1, ymm2/m256 + /// VSQRTPS ymm1 {k1}{z}, ymm2/m256/m32bcst /// public static Vector256 Sqrt(Vector256 value) => Sqrt(value); /// /// __m256d _mm256_sqrt_pd (__m256d a) - /// VSQRTPD ymm, ymm/m256 + /// VSQRTPD ymm1, ymm2/m256 + /// VSQRTPD ymm1 {k1}{z}, ymm2/m256/m64bcst /// public static Vector256 Sqrt(Vector256 value) => Sqrt(value); + /// + /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) + /// VMOVDQU m256, ymm1 + /// VMOVDQU8 m256 {k1}{z}, ymm1 + /// + public static unsafe void Store(sbyte* address, Vector256 source) => Store(address, source); + /// + /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) + /// VMOVDQU m256, ymm1 + /// VMOVDQU8 m256 {k1}{z}, ymm1 + /// + public static unsafe void Store(byte* address, Vector256 source) => Store(address, source); + /// + /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) + /// VMOVDQU m256, ymm1 + /// VMOVDQU16 m256 {k1}{z}, ymm1 + /// + public static unsafe void Store(short* address, Vector256 source) => Store(address, source); + /// + /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) + /// VMOVDQU m256, ymm1 + /// VMOVDQU16 m256 {k1}{z}, ymm1 + /// + public static unsafe void Store(ushort* address, Vector256 source) => Store(address, source); + /// + /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) + /// VMOVDQU m256, ymm1 + /// VMOVDQU32 m256 {k1}{z}, ymm1 + /// + public static unsafe void Store(int* address, Vector256 source) => Store(address, source); + /// + /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) + /// VMOVDQU m256, ymm1 + /// VMOVDQU32 m256 {k1}{z}, ymm1 + /// + public static unsafe void Store(uint* address, Vector256 source) => Store(address, source); + /// + /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) + /// VMOVDQU m256, ymm1 + /// VMOVDQU64 m256 {k1}{z}, ymm1 + /// + public static unsafe void Store(long* address, Vector256 source) => Store(address, source); + /// + /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) + /// VMOVDQU m256, ymm1 + /// VMOVDQU64 m256 {k1}{z}, ymm1 + /// + public static unsafe void Store(ulong* address, Vector256 source) => Store(address, source); + /// + /// void _mm256_storeu_ps (float * mem_addr, __m256 a) + /// VMOVUPS m256, ymm1 + /// VMOVUPS m256 {k1}{z}, ymm1 + /// + public static unsafe void Store(float* address, Vector256 source) => Store(address, source); + /// + /// void _mm256_storeu_pd (double * mem_addr, __m256d a) + /// VMOVUPD m256, ymm1 + /// VMOVUPD m256 {k1}{z}, ymm1 + /// + public static unsafe void Store(double* address, Vector256 source) => Store(address, source); + /// /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQA m256, ymm + /// VMOVDQA m256, ymm1 + /// VMOVDQA32 m256 {k1}{z}, ymm1 /// public static unsafe void StoreAligned(sbyte* address, Vector256 source) => StoreAligned(address, source); /// /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQA m256, ymm + /// VMOVDQA m256, ymm1 + /// VMOVDQA32 m256 {k1}{z}, ymm1 /// public static unsafe void StoreAligned(byte* address, Vector256 source) => StoreAligned(address, source); /// /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQA m256, ymm + /// VMOVDQA m256, ymm1 + /// VMOVDQA32 m256 {k1}{z}, ymm1 /// public static unsafe void StoreAligned(short* address, Vector256 source) => StoreAligned(address, source); /// /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQA m256, ymm + /// VMOVDQA m256, ymm1 + /// VMOVDQA32 m256 {k1}{z}, ymm1 /// public static unsafe void StoreAligned(ushort* address, Vector256 source) => StoreAligned(address, source); /// /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQA m256, ymm + /// VMOVDQA m256, ymm1 + /// VMOVDQA32 m256 {k1}{z}, ymm1 /// public static unsafe void StoreAligned(int* address, Vector256 source) => StoreAligned(address, source); /// /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQA m256, ymm + /// VMOVDQA m256, ymm1 + /// VMOVDQA32 m256 {k1}{z}, ymm1 /// public static unsafe void StoreAligned(uint* address, Vector256 source) => StoreAligned(address, source); /// /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQA m256, ymm + /// VMOVDQA m256, ymm1 + /// VMOVDQA64 m256 {k1}{z}, ymm1 /// public static unsafe void StoreAligned(long* address, Vector256 source) => StoreAligned(address, source); /// /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQA m256, ymm + /// VMOVDQA m256, ymm1 + /// VMOVDQA64 m256 {k1}{z}, ymm1 /// public static unsafe void StoreAligned(ulong* address, Vector256 source) => StoreAligned(address, source); /// /// void _mm256_store_ps (float * mem_addr, __m256 a) - /// VMOVAPS m256, ymm + /// VMOVAPS m256, ymm1 + /// VMOVAPS m256 {k1}{z}, ymm1 /// public static unsafe void StoreAligned(float* address, Vector256 source) => StoreAligned(address, source); /// /// void _mm256_store_pd (double * mem_addr, __m256d a) - /// VMOVAPD m256, ymm + /// VMOVAPD m256, ymm1 + /// VMOVAPD m256 {k1}{z}, ymm1 /// public static unsafe void StoreAligned(double* address, Vector256 source) => StoreAligned(address, source); /// /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a) - /// VMOVNTDQ m256, ymm + /// VMOVNTDQ m256, ymm1 /// public static unsafe void StoreAlignedNonTemporal(sbyte* address, Vector256 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a) - /// VMOVNTDQ m256, ymm + /// VMOVNTDQ m256, ymm1 /// public static unsafe void StoreAlignedNonTemporal(byte* address, Vector256 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a) - /// VMOVNTDQ m256, ymm + /// VMOVNTDQ m256, ymm1 /// public static unsafe void StoreAlignedNonTemporal(short* address, Vector256 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a) - /// VMOVNTDQ m256, ymm + /// VMOVNTDQ m256, ymm1 /// public static unsafe void StoreAlignedNonTemporal(ushort* address, Vector256 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a) - /// VMOVNTDQ m256, ymm + /// VMOVNTDQ m256, ymm1 /// public static unsafe void StoreAlignedNonTemporal(int* address, Vector256 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a) - /// VMOVNTDQ m256, ymm + /// VMOVNTDQ m256, ymm1 /// public static unsafe void StoreAlignedNonTemporal(uint* address, Vector256 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a) - /// VMOVNTDQ m256, ymm + /// VMOVNTDQ m256, ymm1 /// public static unsafe void StoreAlignedNonTemporal(long* address, Vector256 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a) - /// VMOVNTDQ m256, ymm + /// VMOVNTDQ m256, ymm1 /// public static unsafe void StoreAlignedNonTemporal(ulong* address, Vector256 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm256_stream_ps (float * mem_addr, __m256 a) - /// MOVNTPS m256, ymm + /// VMOVNTPS m256, ymm1 /// public static unsafe void StoreAlignedNonTemporal(float* address, Vector256 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm256_stream_pd (double * mem_addr, __m256d a) - /// MOVNTPD m256, ymm + /// VMOVNTPD m256, ymm1 /// public static unsafe void StoreAlignedNonTemporal(double* address, Vector256 source) => StoreAlignedNonTemporal(address, source); - /// - /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQU m256, ymm - /// - public static unsafe void Store(sbyte* address, Vector256 source) => Store(address, source); - /// - /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQU m256, ymm - /// - public static unsafe void Store(byte* address, Vector256 source) => Store(address, source); - /// - /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQU m256, ymm - /// - public static unsafe void Store(short* address, Vector256 source) => Store(address, source); - /// - /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQU m256, ymm - /// - public static unsafe void Store(ushort* address, Vector256 source) => Store(address, source); - /// - /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQU m256, ymm - /// - public static unsafe void Store(int* address, Vector256 source) => Store(address, source); - /// - /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQU m256, ymm - /// - public static unsafe void Store(uint* address, Vector256 source) => Store(address, source); - /// - /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQU m256, ymm - /// - public static unsafe void Store(long* address, Vector256 source) => Store(address, source); - /// - /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) - /// MOVDQU m256, ymm - /// - public static unsafe void Store(ulong* address, Vector256 source) => Store(address, source); - /// - /// void _mm256_storeu_ps (float * mem_addr, __m256 a) - /// MOVUPS m256, ymm - /// - public static unsafe void Store(float* address, Vector256 source) => Store(address, source); - /// - /// void _mm256_storeu_pd (double * mem_addr, __m256d a) - /// MOVUPD m256, ymm - /// - public static unsafe void Store(double* address, Vector256 source) => Store(address, source); - /// /// __m256 _mm256_sub_ps (__m256 a, __m256 b) - /// VSUBPS ymm, ymm, ymm/m256 + /// VSUBPS ymm1, ymm2, ymm3/m256 + /// VSUBPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Subtract(Vector256 left, Vector256 right) => Subtract(left, right); /// /// __m256d _mm256_sub_pd (__m256d a, __m256d b) - /// VSUBPD ymm, ymm, ymm/m256 + /// VSUBPD ymm1, ymm2, ymm3/m256 + /// VSUBPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Subtract(Vector256 left, Vector256 right) => Subtract(left, right); /// /// int _mm_testc_ps (__m128 a, __m128 b) - /// VTESTPS xmm, xmm/m128 + /// VTESTPS xmm1, xmm2/m128 ; CF=1 /// public static bool TestC(Vector128 left, Vector128 right) => TestC(left, right); /// /// int _mm_testc_pd (__m128d a, __m128d b) - /// VTESTPD xmm, xmm/m128 + /// VTESTPD xmm1, xmm2/m128 ; CF=1 /// public static bool TestC(Vector128 left, Vector128 right) => TestC(left, right); - /// /// int _mm256_testc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; CF=1 /// public static bool TestC(Vector256 left, Vector256 right) => TestC(left, right); - /// /// int _mm256_testc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; CF=1 /// public static bool TestC(Vector256 left, Vector256 right) => TestC(left, right); - /// /// int _mm256_testc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; CF=1 /// public static bool TestC(Vector256 left, Vector256 right) => TestC(left, right); - /// /// int _mm256_testc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; CF=1 /// public static bool TestC(Vector256 left, Vector256 right) => TestC(left, right); - /// /// int _mm256_testc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; CF=1 /// public static bool TestC(Vector256 left, Vector256 right) => TestC(left, right); - /// /// int _mm256_testc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; CF=1 /// public static bool TestC(Vector256 left, Vector256 right) => TestC(left, right); - /// /// int _mm256_testc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; CF=1 /// public static bool TestC(Vector256 left, Vector256 right) => TestC(left, right); - /// /// int _mm256_testc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; CF=1 /// public static bool TestC(Vector256 left, Vector256 right) => TestC(left, right); - /// /// int _mm256_testc_ps (__m256 a, __m256 b) - /// VTESTPS ymm, ymm/m256 + /// VTESTPS ymm1, ymm2/m256 ; CF=1 /// public static bool TestC(Vector256 left, Vector256 right) => TestC(left, right); - /// /// int _mm256_testc_pd (__m256d a, __m256d b) - /// VTESTPS ymm, ymm/m256 + /// VTESTPD ymm1, ymm2/m256 ; CF=1 /// public static bool TestC(Vector256 left, Vector256 right) => TestC(left, right); /// /// int _mm_testnzc_ps (__m128 a, __m128 b) - /// VTESTPS xmm, xmm/m128 + /// VTESTPS xmm1, ymm2/m128 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector128 left, Vector128 right) => TestNotZAndNotC(left, right); /// /// int _mm_testnzc_pd (__m128d a, __m128d b) - /// VTESTPD xmm, xmm/m128 + /// VTESTPD xmm1, xmm2/m128 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector128 left, Vector128 right) => TestNotZAndNotC(left, right); - /// /// int _mm256_testnzc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector256 left, Vector256 right) => TestNotZAndNotC(left, right); - /// /// int _mm256_testnzc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector256 left, Vector256 right) => TestNotZAndNotC(left, right); - /// /// int _mm256_testnzc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector256 left, Vector256 right) => TestNotZAndNotC(left, right); - /// /// int _mm256_testnzc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector256 left, Vector256 right) => TestNotZAndNotC(left, right); - /// /// int _mm256_testnzc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector256 left, Vector256 right) => TestNotZAndNotC(left, right); - /// /// int _mm256_testnzc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector256 left, Vector256 right) => TestNotZAndNotC(left, right); - /// /// int _mm256_testnzc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector256 left, Vector256 right) => TestNotZAndNotC(left, right); - /// /// int _mm256_testnzc_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector256 left, Vector256 right) => TestNotZAndNotC(left, right); - /// /// int _mm256_testnzc_ps (__m256 a, __m256 b) - /// VTESTPS ymm, ymm/m256 + /// VTESTPS ymm1, ymm2/m256 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector256 left, Vector256 right) => TestNotZAndNotC(left, right); - /// /// int _mm256_testnzc_pd (__m256d a, __m256d b) - /// VTESTPD ymm, ymm/m256 + /// VTESTPD ymm1, ymm2/m256 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector256 left, Vector256 right) => TestNotZAndNotC(left, right); /// /// int _mm_testz_ps (__m128 a, __m128 b) - /// VTESTPS xmm, xmm/m128 + /// VTESTPS xmm1, xmm2/m128 ; ZF=1 /// public static bool TestZ(Vector128 left, Vector128 right) => TestZ(left, right); /// /// int _mm_testz_pd (__m128d a, __m128d b) - /// VTESTPD xmm, xmm/m128 + /// VTESTPD xmm1, xmm2/m128 ; ZF=1 /// public static bool TestZ(Vector128 left, Vector128 right) => TestZ(left, right); - /// /// int _mm256_testz_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=1 /// public static bool TestZ(Vector256 left, Vector256 right) => TestZ(left, right); - /// /// int _mm256_testz_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=1 /// public static bool TestZ(Vector256 left, Vector256 right) => TestZ(left, right); - /// /// int _mm256_testz_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=1 /// public static bool TestZ(Vector256 left, Vector256 right) => TestZ(left, right); - /// /// int _mm256_testz_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=1 /// public static bool TestZ(Vector256 left, Vector256 right) => TestZ(left, right); - /// /// int _mm256_testz_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=1 /// public static bool TestZ(Vector256 left, Vector256 right) => TestZ(left, right); - /// /// int _mm256_testz_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=1 /// public static bool TestZ(Vector256 left, Vector256 right) => TestZ(left, right); - /// /// int _mm256_testz_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=1 /// public static bool TestZ(Vector256 left, Vector256 right) => TestZ(left, right); - /// /// int _mm256_testz_si256 (__m256i a, __m256i b) - /// VPTEST ymm, ymm/m256 + /// VPTEST ymm1, ymm2/m256 ; ZF=1 /// public static bool TestZ(Vector256 left, Vector256 right) => TestZ(left, right); - /// /// int _mm256_testz_ps (__m256 a, __m256 b) - /// VTESTPS ymm, ymm/m256 + /// VTESTPS ymm1, ymm2/m256 ; ZF=1 /// public static bool TestZ(Vector256 left, Vector256 right) => TestZ(left, right); - /// /// int _mm256_testz_pd (__m256d a, __m256d b) - /// VTESTPD ymm, ymm/m256 + /// VTESTPD ymm1, ymm2/m256 ; ZF=1 /// public static bool TestZ(Vector256 left, Vector256 right) => TestZ(left, right); /// /// __m256 _mm256_unpackhi_ps (__m256 a, __m256 b) - /// VUNPCKHPS ymm, ymm, ymm/m256 + /// VUNPCKHPS ymm1, ymm2, ymm3/m256 + /// VUNPCKHPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 UnpackHigh(Vector256 left, Vector256 right) => UnpackHigh(left, right); /// /// __m256d _mm256_unpackhi_pd (__m256d a, __m256d b) - /// VUNPCKHPD ymm, ymm, ymm/m256 + /// VUNPCKHPD ymm1, ymm2, ymm3/m256 + /// VUNPCKHPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 UnpackHigh(Vector256 left, Vector256 right) => UnpackHigh(left, right); /// /// __m256 _mm256_unpacklo_ps (__m256 a, __m256 b) - /// VUNPCKLPS ymm, ymm, ymm/m256 + /// VUNPCKLPS ymm1, ymm2, ymm3/m256 + /// VUNPCKLPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 UnpackLow(Vector256 left, Vector256 right) => UnpackLow(left, right); /// /// __m256d _mm256_unpacklo_pd (__m256d a, __m256d b) - /// VUNPCKLPD ymm, ymm, ymm/m256 + /// VUNPCKLPD ymm1, ymm2, ymm3/m256 + /// VUNPCKLPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 UnpackLow(Vector256 left, Vector256 right) => UnpackLow(left, right); /// /// __m256 _mm256_xor_ps (__m256 a, __m256 b) - /// VXORPS ymm, ymm, ymm/m256 + /// VXORPS ymm1, ymm2, ymm3/m256 + /// VXORPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Xor(Vector256 left, Vector256 right) => Xor(left, right); /// /// __m256d _mm256_xor_pd (__m256d a, __m256d b) - /// VXORPS ymm, ymm, ymm/m256 + /// VXORPD ymm1, ymm2, ymm3/m256 + /// VXORPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Xor(Vector256 left, Vector256 right) => Xor(left, right); } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.PlatformNotSupported.cs index 9e8a0f8e017166..f85e14928be0e0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.PlatformNotSupported.cs @@ -27,2171 +27,2368 @@ internal X64() { } /// /// __m256i _mm256_abs_epi8 (__m256i a) - /// VPABSB ymm, ymm/m256 + /// VPABSB ymm1, ymm2/m256 + /// VPABSB ymm1 {k1}{z}, ymm2/m256 /// public static Vector256 Abs(Vector256 value) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_abs_epi16 (__m256i a) - /// VPABSW ymm, ymm/m256 + /// VPABSW ymm1, ymm2/m256 + /// VPABSW ymm1 {k1}{z}, ymm2/m256 /// public static Vector256 Abs(Vector256 value) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_abs_epi32 (__m256i a) - /// VPABSD ymm, ymm/m256 + /// VPABSD ymm1, ymm2/m256 + /// VPABSD ymm1 {k1}{z}, ymm2/m256/m32bcst /// public static Vector256 Abs(Vector256 value) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_add_epi8 (__m256i a, __m256i b) - /// VPADDB ymm, ymm, ymm/m256 + /// VPADDB ymm1, ymm2, ymm3/m256 + /// VPADDB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Add(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_add_epi8 (__m256i a, __m256i b) - /// VPADDB ymm, ymm, ymm/m256 + /// VPADDB ymm1, ymm2, ymm3/m256 + /// VPADDB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Add(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_add_epi16 (__m256i a, __m256i b) - /// VPADDW ymm, ymm, ymm/m256 + /// VPADDW ymm1, ymm2, ymm3/m256 + /// VPADDW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Add(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_add_epi16 (__m256i a, __m256i b) - /// VPADDW ymm, ymm, ymm/m256 + /// VPADDW ymm1, ymm2, ymm3/m256 + /// VPADDW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Add(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_add_epi32 (__m256i a, __m256i b) - /// VPADDD ymm, ymm, ymm/m256 + /// VPADDD ymm1, ymm2, ymm3/m256 + /// VPADDD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Add(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_add_epi32 (__m256i a, __m256i b) - /// VPADDD ymm, ymm, ymm/m256 + /// VPADDD ymm1, ymm2, ymm3/m256 + /// VPADDD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Add(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_add_epi64 (__m256i a, __m256i b) - /// VPADDQ ymm, ymm, ymm/m256 + /// VPADDQ ymm1, ymm2, ymm3/m256 + /// VPADDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Add(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_add_epi64 (__m256i a, __m256i b) - /// VPADDQ ymm, ymm, ymm/m256 + /// VPADDQ ymm1, ymm2, ymm3/m256 + /// VPADDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Add(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_adds_epi8 (__m256i a, __m256i b) - /// VPADDSB ymm, ymm, ymm/m256 + /// VPADDSB ymm1, ymm2, ymm3/m256 + /// VPADDSB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 AddSaturate(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_adds_epu8 (__m256i a, __m256i b) - /// VPADDUSB ymm, ymm, ymm/m256 + /// VPADDUSB ymm1, ymm2, ymm3/m256 + /// VPADDUSB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 AddSaturate(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_adds_epi16 (__m256i a, __m256i b) - /// VPADDSW ymm, ymm, ymm/m256 + /// VPADDSW ymm1, ymm2, ymm3/m256 + /// VPADDSW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 AddSaturate(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_adds_epu16 (__m256i a, __m256i b) - /// VPADDUSW ymm, ymm, ymm/m256 + /// VPADDUSW ymm1, ymm2, ymm3/m256 + /// VPADDUSW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 AddSaturate(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count) - /// VPALIGNR ymm, ymm, ymm/m256, imm8 + /// VPALIGNR ymm1, ymm2, ymm3/m256, imm8 + /// VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8 /// public static Vector256 AlignRight(Vector256 left, Vector256 right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count) - /// VPALIGNR ymm, ymm, ymm/m256, imm8 + /// VPALIGNR ymm1, ymm2, ymm3/m256, imm8 + /// VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8 /// public static Vector256 AlignRight(Vector256 left, Vector256 right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count) - /// VPALIGNR ymm, ymm, ymm/m256, imm8 + /// VPALIGNR ymm1, ymm2, ymm3/m256, imm8 + /// VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8 /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector256 AlignRight(Vector256 left, Vector256 right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count) - /// VPALIGNR ymm, ymm, ymm/m256, imm8 + /// VPALIGNR ymm1, ymm2, ymm3/m256, imm8 + /// VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8 /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector256 AlignRight(Vector256 left, Vector256 right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count) - /// VPALIGNR ymm, ymm, ymm/m256, imm8 + /// VPALIGNR ymm1, ymm2, ymm3/m256, imm8 + /// VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8 /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector256 AlignRight(Vector256 left, Vector256 right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count) - /// VPALIGNR ymm, ymm, ymm/m256, imm8 + /// VPALIGNR ymm1, ymm2, ymm3/m256, imm8 + /// VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8 /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector256 AlignRight(Vector256 left, Vector256 right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count) - /// VPALIGNR ymm, ymm, ymm/m256, imm8 + /// VPALIGNR ymm1, ymm2, ymm3/m256, imm8 + /// VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8 /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector256 AlignRight(Vector256 left, Vector256 right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count) - /// VPALIGNR ymm, ymm, ymm/m256, imm8 + /// VPALIGNR ymm1, ymm2, ymm3/m256, imm8 + /// VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8 /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector256 AlignRight(Vector256 left, Vector256 right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_and_si256 (__m256i a, __m256i b) - /// VPAND ymm, ymm, ymm/m256 + /// VPAND ymm1, ymm2, ymm3/m256 /// public static Vector256 And(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_and_si256 (__m256i a, __m256i b) - /// VPAND ymm, ymm, ymm/m256 + /// VPAND ymm1, ymm2, ymm3/m256 /// public static Vector256 And(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_and_si256 (__m256i a, __m256i b) - /// VPAND ymm, ymm, ymm/m256 + /// VPAND ymm1, ymm2, ymm3/m256 /// public static Vector256 And(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_and_si256 (__m256i a, __m256i b) - /// VPAND ymm, ymm, ymm/m256 + /// VPAND ymm1, ymm2, ymm3/m256 /// public static Vector256 And(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_and_si256 (__m256i a, __m256i b) - /// VPAND ymm, ymm, ymm/m256 + /// VPAND ymm1, ymm2, ymm3/m256 + /// VPANDD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 And(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_and_si256 (__m256i a, __m256i b) - /// VPAND ymm, ymm, ymm/m256 + /// VPAND ymm1, ymm2, ymm3/m256 + /// VPANDD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 And(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_and_si256 (__m256i a, __m256i b) - /// VPAND ymm, ymm, ymm/m256 + /// VPAND ymm1, ymm2, ymm3/m256 + /// VPANDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 And(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_and_si256 (__m256i a, __m256i b) - /// VPAND ymm, ymm, ymm/m256 + /// VPAND ymm1, ymm2, ymm3/m256 + /// VPANDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 And(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b) - /// VPANDN ymm, ymm, ymm/m256 + /// VPANDN ymm1, ymm2, ymm3/m256 /// public static Vector256 AndNot(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b) - /// VPANDN ymm, ymm, ymm/m256 + /// VPANDN ymm1, ymm2, ymm3/m256 /// public static Vector256 AndNot(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b) - /// VPANDN ymm, ymm, ymm/m256 + /// VPANDN ymm1, ymm2, ymm3/m256 /// public static Vector256 AndNot(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b) - /// VPANDN ymm, ymm, ymm/m256 + /// VPANDN ymm1, ymm2, ymm3/m256 /// public static Vector256 AndNot(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b) - /// VPANDN ymm, ymm, ymm/m256 + /// VPANDN ymm1, ymm2, ymm3/m256 + /// VPANDND ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 AndNot(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b) - /// VPANDN ymm, ymm, ymm/m256 + /// VPANDN ymm1, ymm2, ymm3/m256 + /// VPANDND ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 AndNot(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b) - /// VPANDN ymm, ymm, ymm/m256 + /// VPANDN ymm1, ymm2, ymm3/m256 + /// VPANDNQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 AndNot(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b) - /// VPANDN ymm, ymm, ymm/m256 + /// VPANDN ymm1, ymm2, ymm3/m256 + /// VPANDNQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 AndNot(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_avg_epu8 (__m256i a, __m256i b) - /// VPAVGB ymm, ymm, ymm/m256 + /// VPAVGB ymm1, ymm2, ymm3/m256 + /// VPAVGB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Average(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_avg_epu16 (__m256i a, __m256i b) - /// VPAVGW ymm, ymm, ymm/m256 + /// VPAVGW ymm1, ymm2, ymm3/m256 + /// VPAVGW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Average(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_blend_epi32 (__m128i a, __m128i b, const int imm8) - /// VPBLENDD xmm, xmm, xmm/m128, imm8 + /// VPBLENDD xmm1, xmm2, xmm3/m128, imm8 /// public static Vector128 Blend(Vector128 left, Vector128 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_blend_epi32 (__m128i a, __m128i b, const int imm8) - /// VPBLENDD xmm, xmm, xmm/m128, imm8 + /// VPBLENDD xmm1, xmm2, xmm3/m128, imm8 /// public static Vector128 Blend(Vector128 left, Vector128 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_blend_epi16 (__m256i a, __m256i b, const int imm8) - /// VPBLENDW ymm, ymm, ymm/m256, imm8 + /// VPBLENDW ymm1, ymm2, ymm3/m256 imm8 /// public static Vector256 Blend(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_blend_epi16 (__m256i a, __m256i b, const int imm8) - /// VPBLENDW ymm, ymm, ymm/m256, imm8 + /// VPBLENDW ymm1, ymm2, ymm3/m256 imm8 /// public static Vector256 Blend(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_blend_epi32 (__m256i a, __m256i b, const int imm8) - /// VPBLENDD ymm, ymm, ymm/m256, imm8 + /// VPBLENDD ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Blend(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_blend_epi32 (__m256i a, __m256i b, const int imm8) - /// VPBLENDD ymm, ymm, ymm/m256, imm8 + /// VPBLENDD ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Blend(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask) - /// VPBLENDVB ymm, ymm, ymm/m256, ymm + /// VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4 /// public static Vector256 BlendVariable(Vector256 left, Vector256 right, Vector256 mask) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask) - /// VPBLENDVB ymm, ymm, ymm/m256, ymm + /// VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4 /// public static Vector256 BlendVariable(Vector256 left, Vector256 right, Vector256 mask) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask) - /// VPBLENDVB ymm, ymm, ymm/m256, ymm + /// VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4 /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector256 BlendVariable(Vector256 left, Vector256 right, Vector256 mask) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask) - /// VPBLENDVB ymm, ymm, ymm/m256, ymm + /// VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4 /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector256 BlendVariable(Vector256 left, Vector256 right, Vector256 mask) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask) - /// VPBLENDVB ymm, ymm, ymm/m256, ymm + /// VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4 /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector256 BlendVariable(Vector256 left, Vector256 right, Vector256 mask) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask) - /// VPBLENDVB ymm, ymm, ymm/m256, ymm + /// VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4 /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector256 BlendVariable(Vector256 left, Vector256 right, Vector256 mask) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask) - /// VPBLENDVB ymm, ymm, ymm/m256, ymm + /// VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4 /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector256 BlendVariable(Vector256 left, Vector256 right, Vector256 mask) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask) - /// VPBLENDVB ymm, ymm, ymm/m256, ymm + /// VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4 /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector256 BlendVariable(Vector256 left, Vector256 right, Vector256 mask) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_broadcastb_epi8 (__m128i a) - /// VPBROADCASTB xmm, xmm + /// VPBROADCASTB xmm1, xmm2/m8 + /// VPBROADCASTB xmm1 {k1}{z}, xmm2/m8 /// public static Vector128 BroadcastScalarToVector128(Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_broadcastb_epi8 (__m128i a) - /// VPBROADCASTB xmm, xmm + /// VPBROADCASTB xmm1, xmm2/m8 + /// VPBROADCASTB xmm1 {k1}{z}, xmm2/m8 /// public static Vector128 BroadcastScalarToVector128(Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_broadcastw_epi16 (__m128i a) - /// VPBROADCASTW xmm, xmm + /// VPBROADCASTW xmm1, xmm2/m16 + /// VPBROADCASTW xmm1 {k1}{z}, xmm2/m16 /// public static Vector128 BroadcastScalarToVector128(Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_broadcastw_epi16 (__m128i a) - /// VPBROADCASTW xmm, xmm + /// VPBROADCASTW xmm1, xmm2/m16 + /// VPBROADCASTW xmm1 {k1}{z}, xmm2/m16 /// public static Vector128 BroadcastScalarToVector128(Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_broadcastd_epi32 (__m128i a) - /// VPBROADCASTD xmm, xmm + /// VPBROADCASTD xmm1, xmm2/m32 + /// VPBROADCASTD xmm1 {k1}{z}, xmm2/m32 /// public static Vector128 BroadcastScalarToVector128(Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_broadcastd_epi32 (__m128i a) - /// VPBROADCASTD xmm, xmm + /// VPBROADCASTD xmm1, xmm2/m32 + /// VPBROADCASTD xmm1 {k1}{z}, xmm2/m32 /// public static Vector128 BroadcastScalarToVector128(Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_broadcastq_epi64 (__m128i a) - /// VPBROADCASTQ xmm, xmm + /// VPBROADCASTQ xmm1, xmm2/m64 + /// VPBROADCASTQ xmm1 {k1}{z}, xmm2/m64 /// public static Vector128 BroadcastScalarToVector128(Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_broadcastq_epi64 (__m128i a) - /// VPBROADCASTQ xmm, xmm + /// VPBROADCASTQ xmm1, xmm2/m64 + /// VPBROADCASTQ xmm1 {k1}{z}, xmm2/m64 /// public static Vector128 BroadcastScalarToVector128(Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m128 _mm_broadcastss_ps (__m128 a) - /// VBROADCASTSS xmm, xmm + /// VBROADCASTSS xmm1, xmm2/m32 + /// VBROADCASTSS xmm1 {k1}{z}, xmm2/m32 /// public static Vector128 BroadcastScalarToVector128(Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m128d _mm_broadcastsd_pd (__m128d a) - /// VMOVDDUP xmm, xmm + /// VMOVDDUP xmm1, xmm/m64 /// public static Vector128 BroadcastScalarToVector128(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_broadcastb_epi8 (__m128i a) - /// VPBROADCASTB xmm, m8 + /// VPBROADCASTB xmm1, m8 + /// VPBROADCASTB xmm1 {k1}{z}, m8 /// The above native signature does not directly correspond to the managed signature. - /// We provide this additional overload for the lack of pointers to managed. /// public static unsafe Vector128 BroadcastScalarToVector128(byte* source) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_broadcastb_epi8 (__m128i a) - /// VPBROADCASTB xmm, m8 + /// VPBROADCASTB xmm1, m8 + /// VPBROADCASTB xmm1 {k1}{z}, m8 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector128 BroadcastScalarToVector128(sbyte* source) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_broadcastw_epi16 (__m128i a) - /// VPBROADCASTW xmm, m16 + /// VPBROADCASTW xmm1, m16 + /// VPBROADCASTW xmm1 {k1}{z}, m16 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector128 BroadcastScalarToVector128(short* source) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_broadcastw_epi16 (__m128i a) - /// VPBROADCASTW xmm, m16 + /// VPBROADCASTW xmm1, m16 + /// VPBROADCASTW xmm1 {k1}{z}, m16 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector128 BroadcastScalarToVector128(ushort* source) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_broadcastd_epi32 (__m128i a) - /// VPBROADCASTD xmm, m32 + /// VPBROADCASTD xmm1, m32 + /// VPBROADCASTD xmm1 {k1}{z}, m32 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector128 BroadcastScalarToVector128(int* source) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_broadcastd_epi32 (__m128i a) - /// VPBROADCASTD xmm, m32 + /// VPBROADCASTD xmm1, m32 + /// VPBROADCASTD xmm1 {k1}{z}, m32 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector128 BroadcastScalarToVector128(uint* source) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_broadcastq_epi64 (__m128i a) - /// VPBROADCASTQ xmm, m64 + /// VPBROADCASTQ xmm1, m64 + /// VPBROADCASTQ xmm1 {k1}{z}, m64 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector128 BroadcastScalarToVector128(long* source) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_broadcastq_epi64 (__m128i a) - /// VPBROADCASTQ xmm, m64 + /// VPBROADCASTQ xmm1, m64 + /// VPBROADCASTQ xmm1 {k1}{z}, m64 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector128 BroadcastScalarToVector128(ulong* source) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_broadcastb_epi8 (__m128i a) - /// VPBROADCASTB ymm, xmm + /// VPBROADCASTB ymm1, xmm2/m8 + /// VPBROADCASTB ymm1 {k1}{z}, xmm2/m8 /// public static Vector256 BroadcastScalarToVector256(Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_broadcastb_epi8 (__m128i a) - /// VPBROADCASTB ymm, xmm + /// VPBROADCASTB ymm1, xmm2/m8 + /// VPBROADCASTB ymm1 {k1}{z}, xmm2/m8 /// public static Vector256 BroadcastScalarToVector256(Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_broadcastw_epi16 (__m128i a) - /// VPBROADCASTW ymm, xmm + /// VPBROADCASTW ymm1, xmm2/m16 + /// VPBROADCASTW ymm1 {k1}{z}, xmm2/m16 /// public static Vector256 BroadcastScalarToVector256(Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_broadcastw_epi16 (__m128i a) - /// VPBROADCASTW ymm, xmm + /// VPBROADCASTW ymm1, xmm2/m16 + /// VPBROADCASTW ymm1 {k1}{z}, xmm2/m16 /// public static Vector256 BroadcastScalarToVector256(Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_broadcastd_epi32 (__m128i a) - /// VPBROADCASTD ymm, xmm + /// VPBROADCASTD ymm1, xmm2/m32 + /// VPBROADCASTD ymm1 {k1}{z}, xmm2/m32 /// public static Vector256 BroadcastScalarToVector256(Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_broadcastd_epi32 (__m128i a) - /// VPBROADCASTD ymm, xmm + /// VPBROADCASTD ymm1, xmm2/m32 + /// VPBROADCASTD ymm1 {k1}{z}, xmm2/m32 /// public static Vector256 BroadcastScalarToVector256(Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_broadcastq_epi64 (__m128i a) - /// VPBROADCASTQ ymm, xmm + /// VPBROADCASTQ ymm1, xmm2/m64 + /// VPBROADCASTQ ymm1 {k1}{z}, xmm2/m64 /// public static Vector256 BroadcastScalarToVector256(Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_broadcastq_epi64 (__m128i a) - /// VPBROADCASTQ ymm, xmm + /// VPBROADCASTQ ymm1, xmm2/m64 + /// VPBROADCASTQ ymm1 {k1}{z}, xmm2/m64 /// public static Vector256 BroadcastScalarToVector256(Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m256 _mm256_broadcastss_ps (__m128 a) - /// VBROADCASTSS ymm, xmm + /// VBROADCASTSS ymm1, xmm2/m32 + /// VBROADCASTSS ymm1 {k1}{z}, xmm2/m32 /// public static Vector256 BroadcastScalarToVector256(Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m256d _mm256_broadcastsd_pd (__m128d a) - /// VBROADCASTSD ymm, xmm + /// VBROADCASTSD ymm1, xmm2/m64 + /// VBROADCASTSD ymm1 {k1}{z}, xmm2/m64 /// public static Vector256 BroadcastScalarToVector256(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_broadcastb_epi8 (__m128i a) - /// VPBROADCASTB ymm, m8 + /// VPBROADCASTB ymm1, m8 + /// VPBROADCASTB ymm1 {k1}{z}, m8 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastScalarToVector256(byte* source) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_broadcastb_epi8 (__m128i a) - /// VPBROADCASTB ymm, m8 + /// VPBROADCASTB ymm1, m8 + /// VPBROADCASTB ymm1 {k1}{z}, m8 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastScalarToVector256(sbyte* source) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_broadcastw_epi16 (__m128i a) - /// VPBROADCASTW ymm, m16 + /// VPBROADCASTW ymm1, m16 + /// VPBROADCASTW ymm1 {k1}{z}, m16 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastScalarToVector256(short* source) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_broadcastw_epi16 (__m128i a) - /// VPBROADCASTW ymm, m16 + /// VPBROADCASTW ymm1, m16 + /// VPBROADCASTW ymm1 {k1}{z}, m16 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastScalarToVector256(ushort* source) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_broadcastd_epi32 (__m128i a) - /// VPBROADCASTD ymm, m32 + /// VPBROADCASTD ymm1, m32 + /// VPBROADCASTD ymm1 {k1}{z}, m32 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastScalarToVector256(int* source) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_broadcastd_epi32 (__m128i a) - /// VPBROADCASTD ymm, m32 + /// VPBROADCASTD ymm1, m32 + /// VPBROADCASTD ymm1 {k1}{z}, m32 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastScalarToVector256(uint* source) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_broadcastq_epi64 (__m128i a) - /// VPBROADCASTQ ymm, m64 + /// VPBROADCASTQ ymm1, m64 + /// VPBROADCASTQ ymm1 {k1}{z}, m64 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastScalarToVector256(long* source) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_broadcastq_epi64 (__m128i a) - /// VPBROADCASTQ ymm, m64 + /// VPBROADCASTQ ymm1, m64 + /// VPBROADCASTQ ymm1 {k1}{z}, m64 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastScalarToVector256(ulong* source) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_broadcastsi128_si256 (__m128i a) - /// VBROADCASTI128 ymm, m128 + /// VBROADCASTI128 ymm1, m128 + /// VBROADCASTI32x4 ymm1 {k1}{z}, m128 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastVector128ToVector256(sbyte* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_broadcastsi128_si256 (__m128i a) - /// VBROADCASTI128 ymm, m128 + /// VBROADCASTI128 ymm1, m128 + /// VBROADCASTI32x4 ymm1 {k1}{z}, m128 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastVector128ToVector256(byte* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_broadcastsi128_si256 (__m128i a) - /// VBROADCASTI128 ymm, m128 + /// VBROADCASTI128 ymm1, m128 + /// VBROADCASTI32x4 ymm1 {k1}{z}, m128 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastVector128ToVector256(short* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_broadcastsi128_si256 (__m128i a) - /// VBROADCASTI128 ymm, m128 + /// VBROADCASTI128 ymm1, m128 + /// VBROADCASTI32x4 ymm1 {k1}{z}, m128 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastVector128ToVector256(ushort* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_broadcastsi128_si256 (__m128i a) - /// VBROADCASTI128 ymm, m128 + /// VBROADCASTI128 ymm1, m128 + /// VBROADCASTI32x4 ymm1 {k1}{z}, m128 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastVector128ToVector256(int* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_broadcastsi128_si256 (__m128i a) - /// VBROADCASTI128 ymm, m128 + /// VBROADCASTI128 ymm1, m128 + /// VBROADCASTI32x4 ymm1 {k1}{z}, m128 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastVector128ToVector256(uint* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_broadcastsi128_si256 (__m128i a) - /// VBROADCASTI128 ymm, m128 + /// VBROADCASTI128 ymm1, m128 + /// VBROADCASTI64x2 ymm1 {k1}{z}, m128 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastVector128ToVector256(long* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_broadcastsi128_si256 (__m128i a) - /// VBROADCASTI128 ymm, m128 + /// VBROADCASTI128 ymm1, m128 + /// VBROADCASTI64x2 ymm1 {k1}{z}, m128 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastVector128ToVector256(ulong* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cmpeq_epi8 (__m256i a, __m256i b) - /// VPCMPEQB ymm, ymm, ymm/m256 + /// VPCMPEQB ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cmpeq_epi8 (__m256i a, __m256i b) - /// VPCMPEQB ymm, ymm, ymm/m256 + /// VPCMPEQB ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cmpeq_epi16 (__m256i a, __m256i b) - /// VPCMPEQW ymm, ymm, ymm/m256 + /// VPCMPEQW ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cmpeq_epi16 (__m256i a, __m256i b) - /// VPCMPEQW ymm, ymm, ymm/m256 + /// VPCMPEQW ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cmpeq_epi32 (__m256i a, __m256i b) - /// VPCMPEQD ymm, ymm, ymm/m256 + /// VPCMPEQD ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cmpeq_epi32 (__m256i a, __m256i b) - /// VPCMPEQD ymm, ymm, ymm/m256 + /// VPCMPEQD ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cmpeq_epi64 (__m256i a, __m256i b) - /// VPCMPEQQ ymm, ymm, ymm/m256 + /// VPCMPEQQ ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cmpeq_epi64 (__m256i a, __m256i b) - /// VPCMPEQQ ymm, ymm, ymm/m256 + /// VPCMPEQQ ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareEqual(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cmpgt_epi8 (__m256i a, __m256i b) - /// VPCMPGTB ymm, ymm, ymm/m256 + /// VPCMPGTB ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareGreaterThan(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cmpgt_epi16 (__m256i a, __m256i b) - /// VPCMPGTW ymm, ymm, ymm/m256 + /// VPCMPGTW ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareGreaterThan(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cmpgt_epi32 (__m256i a, __m256i b) - /// VPCMPGTD ymm, ymm, ymm/m256 + /// VPCMPGTD ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareGreaterThan(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cmpgt_epi64 (__m256i a, __m256i b) - /// VPCMPGTQ ymm, ymm, ymm/m256 + /// VPCMPGTQ ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareGreaterThan(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// int _mm256_cvtsi256_si32 (__m256i a) - /// MOVD reg/m32, xmm + /// VMOVD r/m32, ymm1 /// public static int ConvertToInt32(Vector256 value) { throw new PlatformNotSupportedException(); } /// /// int _mm256_cvtsi256_si32 (__m256i a) - /// MOVD reg/m32, xmm + /// VMOVD r/m32, ymm1 /// public static uint ConvertToUInt32(Vector256 value) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cvtepi8_epi16 (__m128i a) - /// VPMOVSXBW ymm, xmm + /// VPMOVSXBW ymm1, xmm2/m128 + /// VPMOVSXBW ymm1 {k1}{z}, xmm2/m128 /// public static Vector256 ConvertToVector256Int16(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cvtepu8_epi16 (__m128i a) - /// VPMOVZXBW ymm, xmm + /// VPMOVZXBW ymm1, xmm2/m128 + /// VPMOVZXBW ymm1 {k1}{z}, xmm2/m128 /// public static Vector256 ConvertToVector256Int16(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cvtepi8_epi32 (__m128i a) - /// VPMOVSXBD ymm, xmm + /// VPMOVSXBD ymm1, xmm2/m64 + /// VPMOVSXBD ymm1 {k1}{z}, xmm2/m64 /// public static Vector256 ConvertToVector256Int32(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cvtepu8_epi32 (__m128i a) - /// VPMOVZXBD ymm, xmm + /// VPMOVZXBD ymm1, xmm2/m64 + /// VPMOVZXBD ymm1 {k1}{z}, xmm2/m64 /// public static Vector256 ConvertToVector256Int32(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cvtepi16_epi32 (__m128i a) - /// VPMOVSXWD ymm, xmm + /// VPMOVSXWD ymm1, xmm2/m128 + /// VPMOVSXWD ymm1 {k1}{z}, xmm2/m128 /// public static Vector256 ConvertToVector256Int32(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cvtepu16_epi32 (__m128i a) - /// VPMOVZXWD ymm, xmm + /// VPMOVZXWD ymm1, xmm2/m128 + /// VPMOVZXWD ymm1 {k1}{z}, xmm2/m128 /// public static Vector256 ConvertToVector256Int32(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cvtepi8_epi64 (__m128i a) - /// VPMOVSXBQ ymm, xmm + /// VPMOVSXBQ ymm1, xmm2/m32 + /// VPMOVSXBQ ymm1 {k1}{z}, xmm2/m32 /// public static Vector256 ConvertToVector256Int64(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cvtepu8_epi64 (__m128i a) - /// VPMOVZXBQ ymm, xmm + /// VPMOVZXBQ ymm1, xmm2/m32 + /// VPMOVZXBQ ymm1 {k1}{z}, xmm2/m32 /// public static Vector256 ConvertToVector256Int64(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cvtepi16_epi64 (__m128i a) - /// VPMOVSXWQ ymm, xmm + /// VPMOVSXWQ ymm1, xmm2/m64 + /// VPMOVSXWQ ymm1 {k1}{z}, xmm2/m64 /// public static Vector256 ConvertToVector256Int64(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cvtepu16_epi64 (__m128i a) - /// VPMOVZXWQ ymm, xmm + /// VPMOVZXWQ ymm1, xmm2/m64 + /// VPMOVZXWQ ymm1 {k1}{z}, xmm2/m64 /// public static Vector256 ConvertToVector256Int64(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cvtepi32_epi64 (__m128i a) - /// VPMOVSXDQ ymm, xmm + /// VPMOVSXDQ ymm1, xmm2/m128 + /// VPMOVSXDQ ymm1 {k1}{z}, xmm2/m128 /// public static Vector256 ConvertToVector256Int64(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_cvtepu32_epi64 (__m128i a) - /// VPMOVZXDQ ymm, xmm + /// VPMOVZXDQ ymm1, xmm2/m128 + /// VPMOVZXDQ ymm1 {k1}{z}, xmm2/m128 /// public static Vector256 ConvertToVector256Int64(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// VPMOVSXBW ymm, m128 + /// VPMOVSXBW ymm1, m128 + /// VPMOVSXBW ymm1 {k1}{z}, m128 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int16(sbyte* address) { throw new PlatformNotSupportedException(); } /// - /// VPMOVZXBW ymm, m128 + /// VPMOVZXBW ymm1, m128 + /// VPMOVZXBW ymm1 {k1}{z}, m128 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int16(byte* address) { throw new PlatformNotSupportedException(); } /// - /// VPMOVSXBD ymm, m64 + /// VPMOVSXBD ymm1, m64 + /// VPMOVSXBD ymm1 {k1}{z}, m64 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int32(sbyte* address) { throw new PlatformNotSupportedException(); } /// - /// VPMOVZXBD ymm, m64 + /// VPMOVZXBD ymm1, m64 + /// VPMOVZXBD ymm1 {k1}{z}, m64 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int32(byte* address) { throw new PlatformNotSupportedException(); } /// - /// VPMOVSXWD ymm, m128 + /// VPMOVSXWD ymm1, m128 + /// VPMOVSXWD ymm1 {k1}{z}, m128 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int32(short* address) { throw new PlatformNotSupportedException(); } /// - /// VPMOVZXWD ymm, m128 + /// VPMOVZXWD ymm1, m128 + /// VPMOVZXWD ymm1 {k1}{z}, m128 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int32(ushort* address) { throw new PlatformNotSupportedException(); } /// - /// VPMOVSXBQ ymm, m32 + /// VPMOVSXBQ ymm1, m32 + /// VPMOVSXBQ ymm1 {k1}{z}, m32 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int64(sbyte* address) { throw new PlatformNotSupportedException(); } /// - /// VPMOVZXBQ ymm, m32 + /// VPMOVZXBQ ymm1, m32 + /// VPMOVZXBQ ymm1 {k1}{z}, m32 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int64(byte* address) { throw new PlatformNotSupportedException(); } /// - /// VPMOVSXWQ ymm, m64 + /// VPMOVSXWQ ymm1, m64 + /// VPMOVSXWQ ymm1 {k1}{z}, m64 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int64(short* address) { throw new PlatformNotSupportedException(); } /// - /// VPMOVZXWQ ymm, m64 + /// VPMOVZXWQ ymm1, m64 + /// VPMOVZXWQ ymm1 {k1}{z}, m64 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int64(ushort* address) { throw new PlatformNotSupportedException(); } /// - /// VPMOVSXDQ ymm, m128 + /// VPMOVSXDQ ymm1, m128 + /// VPMOVSXDQ ymm1 {k1}{z}, m128 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int64(int* address) { throw new PlatformNotSupportedException(); } /// - /// VPMOVZXDQ ymm, m128 + /// VPMOVZXDQ ymm1, m128 + /// VPMOVZXDQ ymm1 {k1}{z}, m128 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int64(uint* address) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8) - /// VEXTRACTI128 xmm, ymm, imm8 + /// VEXTRACTI128 xmm1/m128, ymm2, imm8 + /// VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static new Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8) - /// VEXTRACTI128 xmm, ymm, imm8 + /// VEXTRACTI128 xmm1/m128, ymm2, imm8 + /// VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static new Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8) - /// VEXTRACTI128 xmm, ymm, imm8 + /// VEXTRACTI128 xmm1/m128, ymm2, imm8 + /// VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static new Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8) - /// VEXTRACTI128 xmm, ymm, imm8 + /// VEXTRACTI128 xmm1/m128, ymm2, imm8 + /// VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static new Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8) - /// VEXTRACTI128 xmm, ymm, imm8 + /// VEXTRACTI128 xmm1/m128, ymm2, imm8 + /// VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static new Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8) - /// VEXTRACTI128 xmm, ymm, imm8 + /// VEXTRACTI128 xmm1/m128, ymm2, imm8 + /// VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static new Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8) - /// VEXTRACTI128 xmm, ymm, imm8 + /// VEXTRACTI128 xmm1/m128, ymm2, imm8 + /// VEXTRACTI64x2 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static new Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8) - /// VEXTRACTI128 xmm, ymm, imm8 + /// VEXTRACTI128 xmm1/m128, ymm2, imm8 + /// VEXTRACTI64x2 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static new Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_i32gather_epi32 (int const* base_addr, __m128i vindex, const int scale) - /// VPGATHERDD xmm, vm32x, xmm + /// VPGATHERDD xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(int* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_i32gather_epi32 (int const* base_addr, __m128i vindex, const int scale) - /// VPGATHERDD xmm, vm32x, xmm + /// VPGATHERDD xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(uint* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_i32gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale) - /// VPGATHERDQ xmm, vm32x, xmm + /// VPGATHERDQ xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(long* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_i32gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale) - /// VPGATHERDQ xmm, vm32x, xmm + /// VPGATHERDQ xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(ulong* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_i32gather_ps (float const* base_addr, __m128i vindex, const int scale) - /// VGATHERDPS xmm, vm32x, xmm + /// VGATHERDPS xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(float* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_i32gather_pd (double const* base_addr, __m128i vindex, const int scale) - /// VGATHERDPD xmm, vm32x, xmm + /// VGATHERDPD xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(double* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_i64gather_epi32 (int const* base_addr, __m128i vindex, const int scale) - /// VPGATHERQD xmm, vm64x, xmm + /// VPGATHERQD xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(int* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_i64gather_epi32 (int const* base_addr, __m128i vindex, const int scale) - /// VPGATHERQD xmm, vm64x, xmm + /// VPGATHERQD xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(uint* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_i64gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale) - /// VPGATHERQQ xmm, vm64x, xmm + /// VPGATHERQQ xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(long* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_i64gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale) - /// VPGATHERQQ xmm, vm64x, xmm + /// VPGATHERQQ xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(ulong* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_i64gather_ps (float const* base_addr, __m128i vindex, const int scale) - /// VGATHERQPS xmm, vm64x, xmm + /// VGATHERQPS xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(float* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_i64gather_pd (double const* base_addr, __m128i vindex, const int scale) - /// VGATHERQPD xmm, vm64x, xmm + /// VGATHERQPD xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(double* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_i32gather_epi32 (int const* base_addr, __m256i vindex, const int scale) - /// VPGATHERDD ymm, vm32y, ymm + /// VPGATHERDD ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherVector256(int* baseAddress, Vector256 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_i32gather_epi32 (int const* base_addr, __m256i vindex, const int scale) - /// VPGATHERDD ymm, vm32y, ymm + /// VPGATHERDD ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherVector256(uint* baseAddress, Vector256 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_i32gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale) - /// VPGATHERDQ ymm, vm32y, ymm + /// VPGATHERDQ ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherVector256(long* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_i32gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale) - /// VPGATHERDQ ymm, vm32y, ymm + /// VPGATHERDQ ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherVector256(ulong* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_i32gather_ps (float const* base_addr, __m256i vindex, const int scale) - /// VGATHERDPS ymm, vm32y, ymm + /// VGATHERDPS ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherVector256(float* baseAddress, Vector256 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_i32gather_pd (double const* base_addr, __m128i vindex, const int scale) - /// VGATHERDPD ymm, vm32y, ymm + /// VGATHERDPD ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherVector256(double* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm256_i64gather_epi32 (int const* base_addr, __m256i vindex, const int scale) - /// VPGATHERQD xmm, vm64y, xmm + /// VPGATHERQD xmm1, vm64y, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(int* baseAddress, Vector256 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm256_i64gather_epi32 (int const* base_addr, __m256i vindex, const int scale) - /// VPGATHERQD xmm, vm64y, xmm + /// VPGATHERQD xmm1, vm64y, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(uint* baseAddress, Vector256 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_i64gather_epi64 (__int64 const* base_addr, __m256i vindex, const int scale) - /// VPGATHERQQ ymm, vm64y, ymm + /// VPGATHERQQ ymm1, vm64y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherVector256(long* baseAddress, Vector256 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_i64gather_epi64 (__int64 const* base_addr, __m256i vindex, const int scale) - /// VPGATHERQQ ymm, vm64y, ymm + /// VPGATHERQQ ymm1, vm64y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherVector256(ulong* baseAddress, Vector256 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm256_i64gather_ps (float const* base_addr, __m256i vindex, const int scale) - /// VGATHERQPS xmm, vm64y, xmm + /// VGATHERQPS xmm1, vm64y, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(float* baseAddress, Vector256 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_i64gather_pd (double const* base_addr, __m256i vindex, const int scale) - /// VGATHERQPD ymm, vm64y, ymm + /// VGATHERQPD ymm1, vm64y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherVector256(double* baseAddress, Vector256 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_mask_i32gather_epi32 (__m128i src, int const* base_addr, __m128i vindex, __m128i mask, const int scale) - /// VPGATHERDD xmm, vm32x, xmm + /// VPGATHERDD xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, int* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_mask_i32gather_epi32 (__m128i src, int const* base_addr, __m128i vindex, __m128i mask, const int scale) - /// VPGATHERDD xmm, vm32x, xmm + /// VPGATHERDD xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, uint* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_mask_i32gather_epi64 (__m128i src, __int64 const* base_addr, __m128i vindex, __m128i mask, const int scale) - /// VPGATHERDQ xmm, vm32x, xmm + /// VPGATHERDQ xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, long* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_mask_i32gather_epi64 (__m128i src, __int64 const* base_addr, __m128i vindex, __m128i mask, const int scale) - /// VPGATHERDQ xmm, vm32x, xmm + /// VPGATHERDQ xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, ulong* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_mask_i32gather_ps (__m128 src, float const* base_addr, __m128i vindex, __m128 mask, const int scale) - /// VGATHERDPS xmm, vm32x, xmm + /// VGATHERDPS xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, float* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_mask_i32gather_pd (__m128d src, double const* base_addr, __m128i vindex, __m128d mask, const int scale) - /// VGATHERDPD xmm, vm32x, xmm + /// VGATHERDPD xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, double* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_mask_i64gather_epi32 (__m128i src, int const* base_addr, __m128i vindex, __m128i mask, const int scale) - /// VPGATHERQD xmm, vm64x, xmm + /// VPGATHERQD xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, int* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_mask_i64gather_epi32 (__m128i src, int const* base_addr, __m128i vindex, __m128i mask, const int scale) - /// VPGATHERQD xmm, vm64x, xmm + /// VPGATHERQD xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, uint* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_mask_i64gather_epi64 (__m128i src, __int64 const* base_addr, __m128i vindex, __m128i mask, const int scale) - /// VPGATHERQQ xmm, vm64x, xmm + /// VPGATHERQQ xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, long* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_mask_i64gather_epi64 (__m128i src, __int64 const* base_addr, __m128i vindex, __m128i mask, const int scale) - /// VPGATHERQQ xmm, vm64x, xmm + /// VPGATHERQQ xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, ulong* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_mask_i64gather_ps (__m128 src, float const* base_addr, __m128i vindex, __m128 mask, const int scale) - /// VGATHERQPS xmm, vm64x, xmm + /// VGATHERQPS xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, float* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_mask_i64gather_pd (__m128d src, double const* base_addr, __m128i vindex, __m128d mask, const int scale) - /// VGATHERQPD xmm, vm64x, xmm + /// VGATHERQPD xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, double* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_mask_i32gather_epi32 (__m256i src, int const* base_addr, __m256i vindex, __m256i mask, const int scale) - /// VPGATHERDD ymm, vm32y, ymm + /// VPGATHERDD ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherMaskVector256(Vector256 source, int* baseAddress, Vector256 index, Vector256 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_mask_i32gather_epi32 (__m256i src, int const* base_addr, __m256i vindex, __m256i mask, const int scale) - /// VPGATHERDD ymm, vm32y, ymm + /// VPGATHERDD ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherMaskVector256(Vector256 source, uint* baseAddress, Vector256 index, Vector256 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_mask_i32gather_epi64 (__m256i src, __int64 const* base_addr, __m128i vindex, __m256i mask, const int scale) - /// VPGATHERDQ ymm, vm32y, ymm + /// VPGATHERDQ ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherMaskVector256(Vector256 source, long* baseAddress, Vector128 index, Vector256 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_mask_i32gather_epi64 (__m256i src, __int64 const* base_addr, __m128i vindex, __m256i mask, const int scale) - /// VPGATHERDQ ymm, vm32y, ymm + /// VPGATHERDQ ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherMaskVector256(Vector256 source, ulong* baseAddress, Vector128 index, Vector256 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_mask_i32gather_ps (__m256 src, float const* base_addr, __m256i vindex, __m256 mask, const int scale) - /// VPGATHERDPS ymm, vm32y, ymm + /// VPGATHERDPS ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherMaskVector256(Vector256 source, float* baseAddress, Vector256 index, Vector256 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_mask_i32gather_pd (__m256d src, double const* base_addr, __m128i vindex, __m256d mask, const int scale) - /// VPGATHERDPD ymm, vm32y, ymm + /// VPGATHERDPD ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherMaskVector256(Vector256 source, double* baseAddress, Vector128 index, Vector256 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm256_mask_i64gather_epi32 (__m128i src, int const* base_addr, __m256i vindex, __m128i mask, const int scale) - /// VPGATHERQD xmm, vm32y, xmm + /// VPGATHERQD xmm1, vm32y, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, int* baseAddress, Vector256 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm256_mask_i64gather_epi32 (__m128i src, int const* base_addr, __m256i vindex, __m128i mask, const int scale) - /// VPGATHERQD xmm, vm32y, xmm + /// VPGATHERQD xmm1, vm32y, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, uint* baseAddress, Vector256 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_mask_i64gather_epi64 (__m256i src, __int64 const* base_addr, __m256i vindex, __m256i mask, const int scale) - /// VPGATHERQQ ymm, vm32y, ymm + /// VPGATHERQQ ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherMaskVector256(Vector256 source, long* baseAddress, Vector256 index, Vector256 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_mask_i64gather_epi64 (__m256i src, __int64 const* base_addr, __m256i vindex, __m256i mask, const int scale) - /// VPGATHERQQ ymm, vm32y, ymm + /// VPGATHERQQ ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherMaskVector256(Vector256 source, ulong* baseAddress, Vector256 index, Vector256 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm256_mask_i64gather_ps (__m128 src, float const* base_addr, __m256i vindex, __m128 mask, const int scale) - /// VGATHERQPS xmm, vm32y, xmm + /// VGATHERQPS xmm1, vm32y, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, float* baseAddress, Vector256 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_mask_i64gather_pd (__m256d src, double const* base_addr, __m256i vindex, __m256d mask, const int scale) - /// VGATHERQPD ymm, vm32y, ymm + /// VGATHERQPD ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherMaskVector256(Vector256 source, double* baseAddress, Vector256 index, Vector256 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_hadd_epi16 (__m256i a, __m256i b) - /// VPHADDW ymm, ymm, ymm/m256 + /// VPHADDW ymm1, ymm2, ymm3/m256 /// public static Vector256 HorizontalAdd(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_hadd_epi32 (__m256i a, __m256i b) - /// VPHADDD ymm, ymm, ymm/m256 + /// VPHADDD ymm1, ymm2, ymm3/m256 /// public static Vector256 HorizontalAdd(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_hadds_epi16 (__m256i a, __m256i b) - /// VPHADDSW ymm, ymm, ymm/m256 + /// VPHADDSW ymm1, ymm2, ymm3/m256 /// public static Vector256 HorizontalAddSaturate(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_hsub_epi16 (__m256i a, __m256i b) - /// VPHSUBW ymm, ymm, ymm/m256 + /// VPHSUBW ymm1, ymm2, ymm3/m256 /// public static Vector256 HorizontalSubtract(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_hsub_epi32 (__m256i a, __m256i b) - /// VPHSUBD ymm, ymm, ymm/m256 + /// VPHSUBD ymm1, ymm2, ymm3/m256 /// public static Vector256 HorizontalSubtract(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_hsubs_epi16 (__m256i a, __m256i b) - /// VPHSUBSW ymm, ymm, ymm/m256 + /// VPHSUBSW ymm1, ymm2, ymm3/m256 /// public static Vector256 HorizontalSubtractSaturate(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8) - /// VINSERTI128 ymm, ymm, xmm, imm8 + /// VINSERTI128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static new Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8) - /// VINSERTI128 ymm, ymm, xmm, imm8 + /// VINSERTI128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static new Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8) - /// VINSERTI128 ymm, ymm, xmm, imm8 + /// VINSERTI128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static new Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8) - /// VINSERTI128 ymm, ymm, xmm, imm8 + /// VINSERTI128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static new Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8) - /// VINSERTI128 ymm, ymm, xmm, imm8 + /// VINSERTI128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static new Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8) - /// VINSERTI128 ymm, ymm, xmm, imm8 + /// VINSERTI128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static new Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8) - /// VINSERTI128 ymm, ymm, xmm, imm8 + /// VINSERTI128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTI64x2 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static new Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8) - /// VINSERTI128 ymm, ymm, xmm, imm8 + /// VINSERTI128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTI64x2 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static new Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr) - /// VMOVNTDQA ymm, m256 + /// VMOVNTDQA ymm1, m256 /// public static unsafe Vector256 LoadAlignedVector256NonTemporal(sbyte* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr) - /// VMOVNTDQA ymm, m256 + /// VMOVNTDQA ymm1, m256 /// public static unsafe Vector256 LoadAlignedVector256NonTemporal(byte* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr) - /// VMOVNTDQA ymm, m256 + /// VMOVNTDQA ymm1, m256 /// public static unsafe Vector256 LoadAlignedVector256NonTemporal(short* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr) - /// VMOVNTDQA ymm, m256 + /// VMOVNTDQA ymm1, m256 /// public static unsafe Vector256 LoadAlignedVector256NonTemporal(ushort* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr) - /// VMOVNTDQA ymm, m256 + /// VMOVNTDQA ymm1, m256 /// public static unsafe Vector256 LoadAlignedVector256NonTemporal(int* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr) - /// VMOVNTDQA ymm, m256 + /// VMOVNTDQA ymm1, m256 /// public static unsafe Vector256 LoadAlignedVector256NonTemporal(uint* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr) - /// VMOVNTDQA ymm, m256 + /// VMOVNTDQA ymm1, m256 /// public static unsafe Vector256 LoadAlignedVector256NonTemporal(long* address) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr) - /// VMOVNTDQA ymm, m256 + /// VMOVNTDQA ymm1, m256 /// public static unsafe Vector256 LoadAlignedVector256NonTemporal(ulong* address) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_maskload_epi32 (int const* mem_addr, __m128i mask) - /// VPMASKMOVD xmm, xmm, m128 + /// VPMASKMOVD xmm1, xmm2, m128 /// public static unsafe Vector128 MaskLoad(int* address, Vector128 mask) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_maskload_epi32 (int const* mem_addr, __m128i mask) - /// VPMASKMOVD xmm, xmm, m128 + /// VPMASKMOVD xmm1, xmm2, m128 /// public static unsafe Vector128 MaskLoad(uint* address, Vector128 mask) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_maskload_epi64 (__int64 const* mem_addr, __m128i mask) - /// VPMASKMOVQ xmm, xmm, m128 + /// VPMASKMOVQ xmm1, xmm2, m128 /// public static unsafe Vector128 MaskLoad(long* address, Vector128 mask) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_maskload_epi64 (__int64 const* mem_addr, __m128i mask) - /// VPMASKMOVQ xmm, xmm, m128 + /// VPMASKMOVQ xmm1, xmm2, m128 /// public static unsafe Vector128 MaskLoad(ulong* address, Vector128 mask) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_maskload_epi32 (int const* mem_addr, __m256i mask) - /// VPMASKMOVD ymm, ymm, m256 + /// VPMASKMOVD ymm1, ymm2, m256 /// public static unsafe Vector256 MaskLoad(int* address, Vector256 mask) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_maskload_epi32 (int const* mem_addr, __m256i mask) - /// VPMASKMOVD ymm, ymm, m256 + /// VPMASKMOVD ymm1, ymm2, m256 /// public static unsafe Vector256 MaskLoad(uint* address, Vector256 mask) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_maskload_epi64 (__int64 const* mem_addr, __m256i mask) - /// VPMASKMOVQ ymm, ymm, m256 + /// VPMASKMOVQ ymm1, ymm2, m256 /// public static unsafe Vector256 MaskLoad(long* address, Vector256 mask) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_maskload_epi64 (__int64 const* mem_addr, __m256i mask) - /// VPMASKMOVQ ymm, ymm, m256 + /// VPMASKMOVQ ymm1, ymm2, m256 /// public static unsafe Vector256 MaskLoad(ulong* address, Vector256 mask) { throw new PlatformNotSupportedException(); } /// /// void _mm_maskstore_epi32 (int* mem_addr, __m128i mask, __m128i a) - /// VPMASKMOVD m128, xmm, xmm + /// VPMASKMOVD m128, xmm1, xmm2 /// public static unsafe void MaskStore(int* address, Vector128 mask, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_maskstore_epi32 (int* mem_addr, __m128i mask, __m128i a) - /// VPMASKMOVD m128, xmm, xmm + /// VPMASKMOVD m128, xmm1, xmm2 /// public static unsafe void MaskStore(uint* address, Vector128 mask, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_maskstore_epi64 (__int64* mem_addr, __m128i mask, __m128i a) - /// VPMASKMOVQ m128, xmm, xmm + /// VPMASKMOVQ m128, xmm1, xmm2 /// public static unsafe void MaskStore(long* address, Vector128 mask, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_maskstore_epi64 (__int64* mem_addr, __m128i mask, __m128i a) - /// VPMASKMOVQ m128, xmm, xmm + /// VPMASKMOVQ m128, xmm1, xmm2 /// public static unsafe void MaskStore(ulong* address, Vector128 mask, Vector128 source) { throw new PlatformNotSupportedException(); } - /// /// void _mm256_maskstore_epi32 (int* mem_addr, __m256i mask, __m256i a) - /// VPMASKMOVD m256, ymm, ymm + /// VPMASKMOVD m256, ymm1, ymm2 /// public static unsafe void MaskStore(int* address, Vector256 mask, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_maskstore_epi32 (int* mem_addr, __m256i mask, __m256i a) - /// VPMASKMOVD m256, ymm, ymm + /// VPMASKMOVD m256, ymm1, ymm2 /// public static unsafe void MaskStore(uint* address, Vector256 mask, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_maskstore_epi64 (__int64* mem_addr, __m256i mask, __m256i a) - /// VPMASKMOVQ m256, ymm, ymm + /// VPMASKMOVQ m256, ymm1, ymm2 /// public static unsafe void MaskStore(long* address, Vector256 mask, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// void _mm256_maskstore_epi64 (__int64* mem_addr, __m256i mask, __m256i a) - /// VPMASKMOVQ m256, ymm, ymm + /// VPMASKMOVQ m256, ymm1, ymm2 /// public static unsafe void MaskStore(ulong* address, Vector256 mask, Vector256 source) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_madd_epi16 (__m256i a, __m256i b) - /// VPMADDWD ymm, ymm, ymm/m256 + /// VPMADDWD ymm1, ymm2, ymm3/m256 + /// VPMADDWD ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 MultiplyAddAdjacent(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_maddubs_epi16 (__m256i a, __m256i b) - /// VPMADDUBSW ymm, ymm, ymm/m256 + /// VPMADDUBSW ymm1, ymm2, ymm3/m256 + /// VPMADDUBSW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 MultiplyAddAdjacent(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_max_epi8 (__m256i a, __m256i b) - /// VPMAXSB ymm, ymm, ymm/m256 + /// VPMAXSB ymm1, ymm2, ymm3/m256 + /// VPMAXSB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Max(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_max_epu8 (__m256i a, __m256i b) - /// VPMAXUB ymm, ymm, ymm/m256 + /// VPMAXUB ymm1, ymm2, ymm3/m256 + /// VPMAXUB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Max(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_max_epi16 (__m256i a, __m256i b) - /// VPMAXSW ymm, ymm, ymm/m256 + /// VPMAXSW ymm1, ymm2, ymm3/m256 + /// VPMAXSW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Max(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_max_epu16 (__m256i a, __m256i b) - /// VPMAXUW ymm, ymm, ymm/m256 + /// VPMAXUW ymm1, ymm2, ymm3/m256 + /// VPMAXUW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Max(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_max_epi32 (__m256i a, __m256i b) - /// VPMAXSD ymm, ymm, ymm/m256 + /// VPMAXSD ymm1, ymm2, ymm3/m256 + /// VPMAXSD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Max(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_max_epu32 (__m256i a, __m256i b) - /// VPMAXUD ymm, ymm, ymm/m256 + /// VPMAXUD ymm1, ymm2, ymm3/m256 + /// VPMAXUD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Max(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_min_epi8 (__m256i a, __m256i b) - /// VPMINSB ymm, ymm, ymm/m256 + /// VPMINSB ymm1, ymm2, ymm3/m256 + /// VPMINSB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Min(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_min_epu8 (__m256i a, __m256i b) - /// VPMINUB ymm, ymm, ymm/m256 + /// VPMINUB ymm1, ymm2, ymm3/m256 + /// VPMINUB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Min(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_min_epi16 (__m256i a, __m256i b) - /// VPMINSW ymm, ymm, ymm/m256 + /// VPMINSW ymm1, ymm2, ymm3/m256 + /// VPMINSW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Min(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_min_epu16 (__m256i a, __m256i b) - /// VPMINUW ymm, ymm, ymm/m256 + /// VPMINUW ymm1, ymm2, ymm3/m256 + /// VPMINUW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Min(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_min_epi32 (__m256i a, __m256i b) - /// VPMINSD ymm, ymm, ymm/m256 + /// VPMINSD ymm1, ymm2, ymm3/m256 + /// VPMINSD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Min(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_min_epu32 (__m256i a, __m256i b) - /// VPMINUD ymm, ymm, ymm/m256 + /// VPMINUD ymm1, ymm2, ymm3/m256 + /// VPMINUD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Min(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// int _mm256_movemask_epi8 (__m256i a) - /// VPMOVMSKB reg, ymm + /// VPMOVMSKB r32, ymm1 /// public static int MoveMask(Vector256 value) { throw new PlatformNotSupportedException(); } /// /// int _mm256_movemask_epi8 (__m256i a) - /// VPMOVMSKB reg, ymm + /// VPMOVMSKB r32, ymm1 /// public static int MoveMask(Vector256 value) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_mpsadbw_epu8 (__m256i a, __m256i b, const int imm8) - /// VMPSADBW ymm, ymm, ymm/m256, imm8 + /// VMPSADBW ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 MultipleSumAbsoluteDifferences(Vector256 left, Vector256 right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_mul_epi32 (__m256i a, __m256i b) - /// VPMULDQ ymm, ymm, ymm/m256 + /// VPMULDQ ymm1, ymm2, ymm3/m256 + /// VPMULDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Multiply(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_mul_epu32 (__m256i a, __m256i b) - /// VPMULUDQ ymm, ymm, ymm/m256 + /// VPMULUDQ ymm1, ymm2, ymm3/m256 + /// VPMULUDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Multiply(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_mulhi_epi16 (__m256i a, __m256i b) - /// VPMULHW ymm, ymm, ymm/m256 + /// VPMULHW ymm1, ymm2, ymm3/m256 + /// VPMULHW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 MultiplyHigh(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_mulhi_epu16 (__m256i a, __m256i b) - /// VPMULHUW ymm, ymm, ymm/m256 + /// VPMULHUW ymm1, ymm2, ymm3/m256 + /// VPMULHUW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 MultiplyHigh(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_mulhrs_epi16 (__m256i a, __m256i b) - /// VPMULHRSW ymm, ymm, ymm/m256 + /// VPMULHRSW ymm1, ymm2, ymm3/m256 + /// VPMULHRSW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 MultiplyHighRoundScale(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_mullo_epi16 (__m256i a, __m256i b) - /// VPMULLW ymm, ymm, ymm/m256 + /// VPMULLW ymm1, ymm2, ymm3/m256 + /// VPMULLW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 MultiplyLow(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_mullo_epi16 (__m256i a, __m256i b) - /// VPMULLW ymm, ymm, ymm/m256 + /// VPMULLW ymm1, ymm2, ymm3/m256 + /// VPMULLW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 MultiplyLow(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// /// __m256i _mm256_mullo_epi32 (__m256i a, __m256i b) - /// VPMULLD ymm, ymm, ymm/m256 + /// VPMULLD ymm1, ymm2, ymm3/m256 + /// VPMULLD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 MultiplyLow(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_mullo_epi32 (__m256i a, __m256i b) - /// VPMULLD ymm, ymm, ymm/m256 + /// VPMULLD ymm1, ymm2, ymm3/m256 + /// VPMULLD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 MultiplyLow(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_or_si256 (__m256i a, __m256i b) - /// VPOR ymm, ymm, ymm/m256 + /// VPOR ymm1, ymm2, ymm3/m256 /// public static Vector256 Or(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_or_si256 (__m256i a, __m256i b) - /// VPOR ymm, ymm, ymm/m256 + /// VPOR ymm1, ymm2, ymm3/m256 /// public static Vector256 Or(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_or_si256 (__m256i a, __m256i b) - /// VPOR ymm, ymm, ymm/m256 + /// VPOR ymm1, ymm2, ymm3/m256 /// public static Vector256 Or(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_or_si256 (__m256i a, __m256i b) - /// VPOR ymm, ymm, ymm/m256 + /// VPOR ymm1, ymm2, ymm3/m256 /// public static Vector256 Or(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_or_si256 (__m256i a, __m256i b) - /// VPOR ymm, ymm, ymm/m256 + /// VPOR ymm1, ymm2, ymm3/m256 + /// VPORD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Or(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_or_si256 (__m256i a, __m256i b) - /// VPOR ymm, ymm, ymm/m256 + /// VPOR ymm1, ymm2, ymm3/m256 + /// VPORD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Or(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_or_si256 (__m256i a, __m256i b) - /// VPOR ymm, ymm, ymm/m256 + /// VPOR ymm1, ymm2, ymm3/m256 + /// VPORQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Or(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_or_si256 (__m256i a, __m256i b) - /// VPOR ymm, ymm, ymm/m256 + /// VPOR ymm1, ymm2, ymm3/m256 + /// VPORQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Or(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_packs_epi16 (__m256i a, __m256i b) - /// VPACKSSWB ymm, ymm, ymm/m256 + /// VPACKSSWB ymm1, ymm2, ymm3/m256 + /// VPACKSSWB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 PackSignedSaturate(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_packs_epi32 (__m256i a, __m256i b) - /// VPACKSSDW ymm, ymm, ymm/m256 + /// VPACKSSDW ymm1, ymm2, ymm3/m256 + /// VPACKSSDW ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 PackSignedSaturate(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + /// /// __m256i _mm256_packus_epi16 (__m256i a, __m256i b) - /// VPACKUSWB ymm, ymm, ymm/m256 + /// VPACKUSWB ymm1, ymm2, ymm3/m256 + /// VPACKUSWB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 PackUnsignedSaturate(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_packus_epi32 (__m256i a, __m256i b) - /// VPACKUSDW ymm, ymm, ymm/m256 + /// VPACKUSDW ymm1, ymm2, ymm3/m256 + /// VPACKUSDW ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 PackUnsignedSaturate(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8) - /// VPERM2I128 ymm, ymm, ymm/m256, imm8 + /// VPERM2I128 ymm1, ymm2, ymm3/m256, imm8 /// public static new Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8) - /// VPERM2I128 ymm, ymm, ymm/m256, imm8 + /// VPERM2I128 ymm1, ymm2, ymm3/m256, imm8 /// public static new Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8) - /// VPERM2I128 ymm, ymm, ymm/m256, imm8 + /// VPERM2I128 ymm1, ymm2, ymm3/m256, imm8 /// public static new Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8) - /// VPERM2I128 ymm, ymm, ymm/m256, imm8 + /// VPERM2I128 ymm1, ymm2, ymm3/m256, imm8 /// public static new Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8) - /// VPERM2I128 ymm, ymm, ymm/m256, imm8 + /// VPERM2I128 ymm1, ymm2, ymm3/m256, imm8 /// public static new Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8) - /// VPERM2I128 ymm, ymm, ymm/m256, imm8 + /// VPERM2I128 ymm1, ymm2, ymm3/m256, imm8 /// public static new Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8) - /// VPERM2I128 ymm, ymm, ymm/m256, imm8 + /// VPERM2I128 ymm1, ymm2, ymm3/m256, imm8 /// public static new Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8) - /// VPERM2I128 ymm, ymm, ymm/m256, imm8 + /// VPERM2I128 ymm1, ymm2, ymm3/m256, imm8 /// public static new Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_permute4x64_epi64 (__m256i a, const int imm8) - /// VPERMQ ymm, ymm/m256, imm8 + /// VPERMQ ymm1, ymm2/m256, imm8 + /// VPERMQ ymm1 {k1}{z}, ymm2/m256/m64bcst, imm8 /// public static Vector256 Permute4x64(Vector256 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_permute4x64_epi64 (__m256i a, const int imm8) - /// VPERMQ ymm, ymm/m256, imm8 + /// VPERMQ ymm1, ymm2/m256, imm8 + /// VPERMQ ymm1 {k1}{z}, ymm2/m256/m64bcst, imm8 /// public static Vector256 Permute4x64(Vector256 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_permute4x64_pd (__m256d a, const int imm8) - /// VPERMPD ymm, ymm/m256, imm8 + /// VPERMPD ymm1, ymm2/m256, imm8 + /// VPERMPD ymm1 {k1}{z}, ymm2/m256/m64bcst, imm8 /// public static Vector256 Permute4x64(Vector256 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_permutevar8x32_epi32 (__m256i a, __m256i idx) - /// VPERMD ymm, ymm/m256, ymm + /// VPERMD ymm1, ymm2/m256, imm8 + /// VPERMD ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8 /// public static Vector256 PermuteVar8x32(Vector256 left, Vector256 control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_permutevar8x32_epi32 (__m256i a, __m256i idx) - /// VPERMD ymm, ymm/m256, ymm + /// VPERMD ymm1, ymm2/m256, imm8 + /// VPERMD ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8 /// public static Vector256 PermuteVar8x32(Vector256 left, Vector256 control) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_permutevar8x32_ps (__m256 a, __m256i idx) - /// VPERMPS ymm, ymm/m256, ymm + /// VPERMPS ymm1, ymm2/m256, imm8 + /// VPERMPS ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8 /// public static Vector256 PermuteVar8x32(Vector256 left, Vector256 control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_sll_epi16 (__m256i a, __m128i count) - /// VPSLLW ymm, ymm, xmm/m128 + /// VPSLLW ymm1, ymm2, xmm3/m128 + /// VPSLLW ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftLeftLogical(Vector256 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_sll_epi16 (__m256i a, __m128i count) - /// VPSLLW ymm, ymm, xmm/m128 + /// VPSLLW ymm1, ymm2, xmm3/m128 + /// VPSLLW ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftLeftLogical(Vector256 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_sll_epi32 (__m256i a, __m128i count) - /// VPSLLD ymm, ymm, xmm/m128 + /// VPSLLD ymm1, ymm2, xmm3/m128 + /// VPSLLD ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftLeftLogical(Vector256 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_sll_epi32 (__m256i a, __m128i count) - /// VPSLLD ymm, ymm, xmm/m128 + /// VPSLLD ymm1, ymm2, xmm3/m128 + /// VPSLLD ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftLeftLogical(Vector256 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_sll_epi64 (__m256i a, __m128i count) - /// VPSLLQ ymm, ymm, xmm/m128 + /// VPSLLQ ymm1, ymm2, xmm3/m128 + /// VPSLLQ ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftLeftLogical(Vector256 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_sll_epi64 (__m256i a, __m128i count) - /// VPSLLQ ymm, ymm, xmm/m128 + /// VPSLLQ ymm1, ymm2, xmm3/m128 + /// VPSLLQ ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftLeftLogical(Vector256 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_slli_epi16 (__m256i a, int imm8) - /// VPSLLW ymm, ymm, imm8 + /// VPSLLW ymm1, ymm2, imm8 + /// VPSLLW ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftLeftLogical(Vector256 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_slli_epi16 (__m256i a, int imm8) - /// VPSLLW ymm, ymm, imm8 + /// VPSLLW ymm1, ymm2, imm8 + /// VPSLLW ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftLeftLogical(Vector256 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_slli_epi32 (__m256i a, int imm8) - /// VPSLLD ymm, ymm, imm8 + /// VPSLLD ymm1, ymm2, imm8 + /// VPSLLD ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftLeftLogical(Vector256 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_slli_epi32 (__m256i a, int imm8) - /// VPSLLD ymm, ymm, imm8 + /// VPSLLD ymm1, ymm2, imm8 + /// VPSLLD ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftLeftLogical(Vector256 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_slli_epi64 (__m256i a, int imm8) - /// VPSLLQ ymm, ymm, imm8 + /// VPSLLQ ymm1, ymm2, imm8 + /// VPSLLQ ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftLeftLogical(Vector256 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_slli_epi64 (__m256i a, int imm8) - /// VPSLLQ ymm, ymm, imm8 + /// VPSLLQ ymm1, ymm2, imm8 + /// VPSLLQ ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftLeftLogical(Vector256 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8) - /// VPSLLDQ ymm, ymm, imm8 + /// VPSLLDQ ymm1, ymm2/m256, imm8 /// public static Vector256 ShiftLeftLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8) - /// VPSLLDQ ymm, ymm, imm8 + /// VPSLLDQ ymm1, ymm2/m256, imm8 /// public static Vector256 ShiftLeftLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8) - /// VPSLLDQ ymm, ymm, imm8 + /// VPSLLDQ ymm1, ymm2/m256, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftLeftLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8) - /// VPSLLDQ ymm, ymm, imm8 + /// VPSLLDQ ymm1, ymm2/m256, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftLeftLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8) - /// VPSLLDQ ymm, ymm, imm8 + /// VPSLLDQ ymm1, ymm2/m256, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftLeftLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8) - /// VPSLLDQ ymm, ymm, imm8 + /// VPSLLDQ ymm1, ymm2/m256, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftLeftLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8) - /// VPSLLDQ ymm, ymm, imm8 + /// VPSLLDQ ymm1, ymm2/m256, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftLeftLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8) - /// VPSLLDQ ymm, ymm, imm8 + /// VPSLLDQ ymm1, ymm2/m256, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftLeftLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } - /// - /// __m256i _mm256_sllv_epi32 (__m256i a, __m256i count) - /// VPSLLVD ymm, ymm, ymm/m256 - /// - public static Vector256 ShiftLeftLogicalVariable(Vector256 value, Vector256 count) { throw new PlatformNotSupportedException(); } - /// - /// __m256i _mm256_sllv_epi32 (__m256i a, __m256i count) - /// VPSLLVD ymm, ymm, ymm/m256 - /// - public static Vector256 ShiftLeftLogicalVariable(Vector256 value, Vector256 count) { throw new PlatformNotSupportedException(); } - /// - /// __m256i _mm256_sllv_epi64 (__m256i a, __m256i count) - /// VPSLLVQ ymm, ymm, ymm/m256 - /// - public static Vector256 ShiftLeftLogicalVariable(Vector256 value, Vector256 count) { throw new PlatformNotSupportedException(); } - /// - /// __m256i _mm256_sllv_epi64 (__m256i a, __m256i count) - /// VPSLLVQ ymm, ymm, ymm/m256 - /// - public static Vector256 ShiftLeftLogicalVariable(Vector256 value, Vector256 count) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_sllv_epi32 (__m128i a, __m128i count) - /// VPSLLVD xmm, ymm, xmm/m128 + /// VPSLLVD xmm1, xmm2, xmm3/m128 + /// VPSLLVD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 ShiftLeftLogicalVariable(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_sllv_epi32 (__m128i a, __m128i count) - /// VPSLLVD xmm, ymm, xmm/m128 + /// VPSLLVD xmm1, xmm2, xmm3/m128 + /// VPSLLVD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 ShiftLeftLogicalVariable(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_sllv_epi64 (__m128i a, __m128i count) - /// VPSLLVQ xmm, ymm, xmm/m128 + /// VPSLLVQ xmm1, xmm2, xmm3/m128 + /// VPSLLVQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 ShiftLeftLogicalVariable(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_sllv_epi64 (__m128i a, __m128i count) - /// VPSLLVQ xmm, ymm, xmm/m128 + /// VPSLLVQ xmm1, xmm2, xmm3/m128 + /// VPSLLVQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 ShiftLeftLogicalVariable(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } + /// + /// __m256i _mm256_sllv_epi32 (__m256i a, __m256i count) + /// VPSLLVD ymm1, ymm2, ymm3/m256 + /// VPSLLVD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst + /// + public static Vector256 ShiftLeftLogicalVariable(Vector256 value, Vector256 count) { throw new PlatformNotSupportedException(); } + /// + /// __m256i _mm256_sllv_epi32 (__m256i a, __m256i count) + /// VPSLLVD ymm1, ymm2, ymm3/m256 + /// VPSLLVD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst + /// + public static Vector256 ShiftLeftLogicalVariable(Vector256 value, Vector256 count) { throw new PlatformNotSupportedException(); } + /// + /// __m256i _mm256_sllv_epi64 (__m256i a, __m256i count) + /// VPSLLVQ ymm1, ymm2, ymm3/m256 + /// VPSLLVQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst + /// + public static Vector256 ShiftLeftLogicalVariable(Vector256 value, Vector256 count) { throw new PlatformNotSupportedException(); } + /// + /// __m256i _mm256_sllv_epi64 (__m256i a, __m256i count) + /// VPSLLVQ ymm1, ymm2, ymm3/m256 + /// VPSLLVQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst + /// + public static Vector256 ShiftLeftLogicalVariable(Vector256 value, Vector256 count) { throw new PlatformNotSupportedException(); } /// /// _mm256_sra_epi16 (__m256i a, __m128i count) - /// VPSRAW ymm, ymm, xmm/m128 + /// VPSRAW ymm1, ymm2, xmm3/m128 + /// VPSRAW ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftRightArithmetic(Vector256 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// _mm256_sra_epi32 (__m256i a, __m128i count) - /// VPSRAD ymm, ymm, xmm/m128 + /// VPSRAD ymm1, ymm2, xmm3/m128 + /// VPSRAD ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftRightArithmetic(Vector256 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_srai_epi16 (__m256i a, int imm8) - /// VPSRAW ymm, ymm, imm8 + /// VPSRAW ymm1, ymm2, imm8 + /// VPSRAW ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftRightArithmetic(Vector256 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_srai_epi32 (__m256i a, int imm8) - /// VPSRAD ymm, ymm, imm8 + /// VPSRAD ymm1, ymm2, imm8 + /// VPSRAD ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftRightArithmetic(Vector256 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } - /// - /// __m256i _mm256_srav_epi32 (__m256i a, __m256i count) - /// VPSRAVD ymm, ymm, ymm/m256 - /// - public static Vector256 ShiftRightArithmeticVariable(Vector256 value, Vector256 count) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_srav_epi32 (__m128i a, __m128i count) - /// VPSRAVD xmm, xmm, xmm/m128 + /// VPSRAVD xmm1, xmm2, xmm3/m128 + /// VPSRAVD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 ShiftRightArithmeticVariable(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } + /// + /// __m256i _mm256_srav_epi32 (__m256i a, __m256i count) + /// VPSRAVD ymm1, ymm2, ymm3/m256 + /// VPSRAVD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst + /// + public static Vector256 ShiftRightArithmeticVariable(Vector256 value, Vector256 count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_srl_epi16 (__m256i a, __m128i count) - /// VPSRLW ymm, ymm, xmm/m128 + /// VPSRLW ymm1, ymm2, xmm3/m128 + /// VPSRLW ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftRightLogical(Vector256 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_srl_epi16 (__m256i a, __m128i count) - /// VPSRLW ymm, ymm, xmm/m128 + /// VPSRLW ymm1, ymm2, xmm3/m128 + /// VPSRLW ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftRightLogical(Vector256 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_srl_epi32 (__m256i a, __m128i count) - /// VPSRLD ymm, ymm, xmm/m128 + /// VPSRLD ymm1, ymm2, xmm3/m128 + /// VPSRLD ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftRightLogical(Vector256 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_srl_epi32 (__m256i a, __m128i count) - /// VPSRLD ymm, ymm, xmm/m128 + /// VPSRLD ymm1, ymm2, xmm3/m128 + /// VPSRLD ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftRightLogical(Vector256 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_srl_epi64 (__m256i a, __m128i count) - /// VPSRLQ ymm, ymm, xmm/m128 + /// VPSRLQ ymm1, ymm2, xmm3/m128 + /// VPSRLQ ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftRightLogical(Vector256 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_srl_epi64 (__m256i a, __m128i count) - /// VPSRLQ ymm, ymm, xmm/m128 + /// VPSRLQ ymm1, ymm2, xmm3/m128 + /// VPSRLQ ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftRightLogical(Vector256 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_srli_epi16 (__m256i a, int imm8) - /// VPSRLW ymm, ymm, imm8 + /// VPSRLW ymm1, ymm2, imm8 + /// VPSRLW ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftRightLogical(Vector256 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_srli_epi16 (__m256i a, int imm8) - /// VPSRLW ymm, ymm, imm8 + /// VPSRLW ymm1, ymm2, imm8 + /// VPSRLW ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftRightLogical(Vector256 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_srli_epi32 (__m256i a, int imm8) - /// VPSRLD ymm, ymm, imm8 + /// VPSRLD ymm1, ymm2, imm8 + /// VPSRLD ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftRightLogical(Vector256 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_srli_epi32 (__m256i a, int imm8) - /// VPSRLD ymm, ymm, imm8 + /// VPSRLD ymm1, ymm2, imm8 + /// VPSRLD ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftRightLogical(Vector256 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_srli_epi64 (__m256i a, int imm8) - /// VPSRLQ ymm, ymm, imm8 + /// VPSRLQ ymm1, ymm2, imm8 + /// VPSRLQ ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftRightLogical(Vector256 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_srli_epi64 (__m256i a, int imm8) - /// VPSRLQ ymm, ymm, imm8 + /// VPSRLQ ymm1, ymm2, imm8 + /// VPSRLQ ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftRightLogical(Vector256 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8) - /// VPSRLDQ ymm, ymm, imm8 + /// VPSRLDQ ymm1, ymm2/m128, imm8 /// public static Vector256 ShiftRightLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8) - /// VPSRLDQ ymm, ymm, imm8 + /// VPSRLDQ ymm1, ymm2/m128, imm8 /// public static Vector256 ShiftRightLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8) - /// VPSRLDQ ymm, ymm, imm8 + /// VPSRLDQ ymm1, ymm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftRightLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8) - /// VPSRLDQ ymm, ymm, imm8 + /// VPSRLDQ ymm1, ymm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftRightLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8) - /// VPSRLDQ ymm, ymm, imm8 + /// VPSRLDQ ymm1, ymm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftRightLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8) - /// VPSRLDQ ymm, ymm, imm8 + /// VPSRLDQ ymm1, ymm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftRightLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8) - /// VPSRLDQ ymm, ymm, imm8 + /// VPSRLDQ ymm1, ymm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftRightLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8) - /// VPSRLDQ ymm, ymm, imm8 + /// VPSRLDQ ymm1, ymm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftRightLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } - /// - /// __m256i _mm256_srlv_epi32 (__m256i a, __m256i count) - /// VPSRLVD ymm, ymm, ymm/m256 - /// - public static Vector256 ShiftRightLogicalVariable(Vector256 value, Vector256 count) { throw new PlatformNotSupportedException(); } - /// - /// __m256i _mm256_srlv_epi32 (__m256i a, __m256i count) - /// VPSRLVD ymm, ymm, ymm/m256 - /// - public static Vector256 ShiftRightLogicalVariable(Vector256 value, Vector256 count) { throw new PlatformNotSupportedException(); } - /// - /// __m256i _mm256_srlv_epi64 (__m256i a, __m256i count) - /// VPSRLVQ ymm, ymm, ymm/m256 - /// - public static Vector256 ShiftRightLogicalVariable(Vector256 value, Vector256 count) { throw new PlatformNotSupportedException(); } - /// - /// __m256i _mm256_srlv_epi64 (__m256i a, __m256i count) - /// VPSRLVQ ymm, ymm, ymm/m256 - /// - public static Vector256 ShiftRightLogicalVariable(Vector256 value, Vector256 count) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_srlv_epi32 (__m128i a, __m128i count) - /// VPSRLVD xmm, xmm, xmm/m128 + /// VPSRLVD xmm1, xmm2, xmm3/m128 + /// VPSRLVD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 ShiftRightLogicalVariable(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_srlv_epi32 (__m128i a, __m128i count) - /// VPSRLVD xmm, xmm, xmm/m128 + /// VPSRLVD xmm1, xmm2, xmm3/m128 + /// VPSRLVD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 ShiftRightLogicalVariable(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_srlv_epi64 (__m128i a, __m128i count) - /// VPSRLVQ xmm, xmm, xmm/m128 + /// VPSRLVQ xmm1, xmm2, xmm3/m128 + /// VPSRLVQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 ShiftRightLogicalVariable(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_srlv_epi64 (__m128i a, __m128i count) - /// VPSRLVQ xmm, xmm, xmm/m128 + /// VPSRLVQ xmm1, xmm2, xmm3/m128 + /// VPSRLVQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 ShiftRightLogicalVariable(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } + /// + /// __m256i _mm256_srlv_epi32 (__m256i a, __m256i count) + /// VPSRLVD ymm1, ymm2, ymm3/m256 + /// VPSRLVD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst + /// + public static Vector256 ShiftRightLogicalVariable(Vector256 value, Vector256 count) { throw new PlatformNotSupportedException(); } + /// + /// __m256i _mm256_srlv_epi32 (__m256i a, __m256i count) + /// VPSRLVD ymm1, ymm2, ymm3/m256 + /// VPSRLVD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst + /// + public static Vector256 ShiftRightLogicalVariable(Vector256 value, Vector256 count) { throw new PlatformNotSupportedException(); } + /// + /// __m256i _mm256_srlv_epi64 (__m256i a, __m256i count) + /// VPSRLVQ ymm1, ymm2, ymm3/m256 + /// VPSRLVQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst + /// + public static Vector256 ShiftRightLogicalVariable(Vector256 value, Vector256 count) { throw new PlatformNotSupportedException(); } + /// + /// __m256i _mm256_srlv_epi64 (__m256i a, __m256i count) + /// VPSRLVQ ymm1, ymm2, ymm3/m256 + /// VPSRLVQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst + /// + public static Vector256 ShiftRightLogicalVariable(Vector256 value, Vector256 count) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_shuffle_epi8 (__m256i a, __m256i b) - /// VPSHUFB ymm, ymm, ymm/m256 + /// VPSHUFB ymm1, ymm2, ymm3/m256 + /// VPSHUFB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Shuffle(Vector256 value, Vector256 mask) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_shuffle_epi8 (__m256i a, __m256i b) - /// VPSHUFB ymm, ymm, ymm/m256 + /// VPSHUFB ymm1, ymm2, ymm3/m256 + /// VPSHUFB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Shuffle(Vector256 value, Vector256 mask) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_shuffle_epi32 (__m256i a, const int imm8) - /// VPSHUFD ymm, ymm/m256, imm8 + /// VPSHUFD ymm1, ymm2/m256, imm8 + /// VPSHUFD ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8 /// public static Vector256 Shuffle(Vector256 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_shuffle_epi32 (__m256i a, const int imm8) - /// VPSHUFD ymm, ymm/m256, imm8 + /// VPSHUFD ymm1, ymm2/m256, imm8 + /// VPSHUFD ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8 /// public static Vector256 Shuffle(Vector256 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_shufflehi_epi16 (__m256i a, const int imm8) - /// VPSHUFHW ymm, ymm/m256, imm8 + /// VPSHUFHW ymm1, ymm2/m256, imm8 + /// VPSHUFHW ymm1 {k1}{z}, ymm2/m256, imm8 /// public static Vector256 ShuffleHigh(Vector256 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_shufflehi_epi16 (__m256i a, const int imm8) - /// VPSHUFHW ymm, ymm/m256, imm8 + /// VPSHUFHW ymm1, ymm2/m256, imm8 + /// VPSHUFHW ymm1 {k1}{z}, ymm2/m256, imm8 /// public static Vector256 ShuffleHigh(Vector256 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_shufflelo_epi16 (__m256i a, const int imm8) - /// VPSHUFLW ymm, ymm/m256, imm8 + /// VPSHUFLW ymm1, ymm2/m256, imm8 + /// VPSHUFLW ymm1 {k1}{z}, ymm2/m256, imm8 /// public static Vector256 ShuffleLow(Vector256 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_shufflelo_epi16 (__m256i a, const int imm8) - /// VPSHUFLW ymm, ymm/m256, imm8 + /// VPSHUFLW ymm1, ymm2/m256, imm8 + /// VPSHUFLW ymm1 {k1}{z}, ymm2/m256, imm8 /// public static Vector256 ShuffleLow(Vector256 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_sign_epi8 (__m256i a, __m256i b) - /// VPSIGNB ymm, ymm, ymm/m256 + /// VPSIGNB ymm1, ymm2, ymm3/m256 /// public static Vector256 Sign(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_sign_epi16 (__m256i a, __m256i b) - /// VPSIGNW ymm, ymm, ymm/m256 + /// VPSIGNW ymm1, ymm2, ymm3/m256 /// public static Vector256 Sign(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_sign_epi32 (__m256i a, __m256i b) - /// VPSIGND ymm, ymm, ymm/m256 + /// VPSIGND ymm1, ymm2, ymm3/m256 /// public static Vector256 Sign(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_sub_epi8 (__m256i a, __m256i b) - /// VPSUBB ymm, ymm, ymm/m256 + /// VPSUBB ymm1, ymm2, ymm3/m256 + /// VPSUBB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Subtract(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_sub_epi8 (__m256i a, __m256i b) - /// VPSUBB ymm, ymm, ymm/m256 + /// VPSUBB ymm1, ymm2, ymm3/m256 + /// VPSUBB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Subtract(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_sub_epi16 (__m256i a, __m256i b) - /// VPSUBW ymm, ymm, ymm/m256 + /// VPSUBW ymm1, ymm2, ymm3/m256 + /// VPSUBW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Subtract(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_sub_epi16 (__m256i a, __m256i b) - /// VPSUBW ymm, ymm, ymm/m256 + /// VPSUBW ymm1, ymm2, ymm3/m256 + /// VPSUBW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Subtract(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_sub_epi32 (__m256i a, __m256i b) - /// VPSUBD ymm, ymm, ymm/m256 + /// VPSUBD ymm1, ymm2, ymm3/m256 + /// VPSUBD ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Subtract(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_sub_epi32 (__m256i a, __m256i b) - /// VPSUBD ymm, ymm, ymm/m256 + /// VPSUBD ymm1, ymm2, ymm3/m256 + /// VPSUBD ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Subtract(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_sub_epi64 (__m256i a, __m256i b) - /// VPSUBQ ymm, ymm, ymm/m256 + /// VPSUBQ ymm1, ymm2, ymm3/m256 + /// VPSUBQ ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Subtract(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_sub_epi64 (__m256i a, __m256i b) - /// VPSUBQ ymm, ymm, ymm/m256 + /// VPSUBQ ymm1, ymm2, ymm3/m256 + /// VPSUBQ ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Subtract(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_subs_epi8 (__m256i a, __m256i b) - /// VPSUBSB ymm, ymm, ymm/m256 + /// VPSUBSB ymm1, ymm2, ymm3/m128 + /// VPSUBSB ymm1 {k1}{z}, ymm2, ymm3/m128 /// public static Vector256 SubtractSaturate(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_subs_epi16 (__m256i a, __m256i b) - /// VPSUBSW ymm, ymm, ymm/m256 + /// VPSUBSW ymm1, ymm2, ymm3/m128 + /// VPSUBSW ymm1 {k1}{z}, ymm2, ymm3/m128 /// public static Vector256 SubtractSaturate(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_subs_epu8 (__m256i a, __m256i b) - /// VPSUBUSB ymm, ymm, ymm/m256 + /// VPSUBUSB ymm1, ymm2, ymm3/m128 + /// VPSUBUSB ymm1 {k1}{z}, ymm2, ymm3/m128 /// public static Vector256 SubtractSaturate(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_subs_epu16 (__m256i a, __m256i b) - /// VPSUBUSW ymm, ymm, ymm/m256 + /// VPSUBUSW ymm1, ymm2, ymm3/m128 + /// VPSUBUSW ymm1 {k1}{z}, ymm2, ymm3/m128 /// public static Vector256 SubtractSaturate(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_sad_epu8 (__m256i a, __m256i b) - /// VPSADBW ymm, ymm, ymm/m256 + /// VPSADBW ymm1, ymm2, ymm3/m256 + /// VPSADBW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 SumAbsoluteDifferences(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_unpackhi_epi8 (__m256i a, __m256i b) - /// VPUNPCKHBW ymm, ymm, ymm/m256 + /// VPUNPCKHBW ymm1, ymm2, ymm3/m256 + /// VPUNPCKHBW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 UnpackHigh(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_unpackhi_epi8 (__m256i a, __m256i b) - /// VPUNPCKHBW ymm, ymm, ymm/m256 + /// VPUNPCKHBW ymm1, ymm2, ymm3/m256 + /// VPUNPCKHBW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 UnpackHigh(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_unpackhi_epi16 (__m256i a, __m256i b) - /// VPUNPCKHWD ymm, ymm, ymm/m256 + /// VPUNPCKHWD ymm1, ymm2, ymm3/m256 + /// VPUNPCKHWD ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 UnpackHigh(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_unpackhi_epi16 (__m256i a, __m256i b) - /// VPUNPCKHWD ymm, ymm, ymm/m256 + /// VPUNPCKHWD ymm1, ymm2, ymm3/m256 + /// VPUNPCKHWD ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 UnpackHigh(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_unpackhi_epi32 (__m256i a, __m256i b) - /// VPUNPCKHDQ ymm, ymm, ymm/m256 + /// VPUNPCKHDQ ymm1, ymm2, ymm3/m256 + /// VPUNPCKHDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 UnpackHigh(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_unpackhi_epi32 (__m256i a, __m256i b) - /// VPUNPCKHDQ ymm, ymm, ymm/m256 + /// VPUNPCKHDQ ymm1, ymm2, ymm3/m256 + /// VPUNPCKHDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 UnpackHigh(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_unpackhi_epi64 (__m256i a, __m256i b) - /// VPUNPCKHQDQ ymm, ymm, ymm/m256 + /// VPUNPCKHQDQ ymm1, ymm2, ymm3/m256 + /// VPUNPCKHQDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 UnpackHigh(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_unpackhi_epi64 (__m256i a, __m256i b) - /// VPUNPCKHQDQ ymm, ymm, ymm/m256 + /// VPUNPCKHQDQ ymm1, ymm2, ymm3/m256 + /// VPUNPCKHQDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 UnpackHigh(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_unpacklo_epi8 (__m256i a, __m256i b) - /// VPUNPCKLBW ymm, ymm, ymm/m256 + /// VPUNPCKLBW ymm1, ymm2, ymm3/m256 + /// VPUNPCKLBW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 UnpackLow(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_unpacklo_epi8 (__m256i a, __m256i b) - /// VPUNPCKLBW ymm, ymm, ymm/m256 + /// VPUNPCKLBW ymm1, ymm2, ymm3/m256 + /// VPUNPCKLBW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 UnpackLow(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_unpacklo_epi16 (__m256i a, __m256i b) - /// VPUNPCKLWD ymm, ymm, ymm/m256 + /// VPUNPCKLWD ymm1, ymm2, ymm3/m256 + /// VPUNPCKLWD ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 UnpackLow(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_unpacklo_epi16 (__m256i a, __m256i b) - /// VPUNPCKLWD ymm, ymm, ymm/m256 + /// VPUNPCKLWD ymm1, ymm2, ymm3/m256 + /// VPUNPCKLWD ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 UnpackLow(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_unpacklo_epi32 (__m256i a, __m256i b) - /// VPUNPCKLDQ ymm, ymm, ymm/m256 + /// VPUNPCKLDQ ymm1, ymm2, ymm3/m256 + /// VPUNPCKLDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 UnpackLow(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_unpacklo_epi32 (__m256i a, __m256i b) - /// VPUNPCKLDQ ymm, ymm, ymm/m256 + /// VPUNPCKLDQ ymm1, ymm2, ymm3/m256 + /// VPUNPCKLDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 UnpackLow(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_unpacklo_epi64 (__m256i a, __m256i b) - /// VPUNPCKLQDQ ymm, ymm, ymm/m256 + /// VPUNPCKLQDQ ymm1, ymm2, ymm3/m256 + /// VPUNPCKLQDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 UnpackLow(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_unpacklo_epi64 (__m256i a, __m256i b) - /// VPUNPCKLQDQ ymm, ymm, ymm/m256 + /// VPUNPCKLQDQ ymm1, ymm2, ymm3/m256 + /// VPUNPCKLQDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 UnpackLow(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_xor_si256 (__m256i a, __m256i b) - /// VPXOR ymm, ymm, ymm/m256 + /// VPXOR ymm1, ymm2, ymm3/m256 /// public static Vector256 Xor(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_xor_si256 (__m256i a, __m256i b) - /// VPXOR ymm, ymm, ymm/m256 + /// VPXOR ymm1, ymm2, ymm3/m256 /// public static Vector256 Xor(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_xor_si256 (__m256i a, __m256i b) - /// VPXOR ymm, ymm, ymm/m256 + /// VPXOR ymm1, ymm2, ymm3/m256 /// public static Vector256 Xor(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_xor_si256 (__m256i a, __m256i b) - /// VPXOR ymm, ymm, ymm/m256 + /// VPXOR ymm1, ymm2, ymm3/m256 /// public static Vector256 Xor(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_xor_si256 (__m256i a, __m256i b) - /// VPXOR ymm, ymm, ymm/m256 + /// VPXOR ymm1, ymm2, ymm3/m256 + /// VPXORD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Xor(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_xor_si256 (__m256i a, __m256i b) - /// VPXOR ymm, ymm, ymm/m256 + /// VPXOR ymm1, ymm2, ymm3/m256 + /// VPXORD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Xor(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_xor_si256 (__m256i a, __m256i b) - /// VPXOR ymm, ymm, ymm/m256 + /// VPXOR ymm1, ymm2, ymm3/m256 + /// VPXORQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Xor(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_xor_si256 (__m256i a, __m256i b) - /// VPXOR ymm, ymm, ymm/m256 + /// VPXOR ymm1, ymm2, ymm3/m256 + /// VPXORQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Xor(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.cs index e275baffece76e..3783778cc857ab 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.cs @@ -27,829 +27,896 @@ internal X64() { } /// /// __m256i _mm256_abs_epi8 (__m256i a) - /// VPABSB ymm, ymm/m256 + /// VPABSB ymm1, ymm2/m256 + /// VPABSB ymm1 {k1}{z}, ymm2/m256 /// public static Vector256 Abs(Vector256 value) => Abs(value); /// /// __m256i _mm256_abs_epi16 (__m256i a) - /// VPABSW ymm, ymm/m256 + /// VPABSW ymm1, ymm2/m256 + /// VPABSW ymm1 {k1}{z}, ymm2/m256 /// public static Vector256 Abs(Vector256 value) => Abs(value); /// /// __m256i _mm256_abs_epi32 (__m256i a) - /// VPABSD ymm, ymm/m256 + /// VPABSD ymm1, ymm2/m256 + /// VPABSD ymm1 {k1}{z}, ymm2/m256/m32bcst /// public static Vector256 Abs(Vector256 value) => Abs(value); /// /// __m256i _mm256_add_epi8 (__m256i a, __m256i b) - /// VPADDB ymm, ymm, ymm/m256 + /// VPADDB ymm1, ymm2, ymm3/m256 + /// VPADDB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Add(Vector256 left, Vector256 right) => Add(left, right); /// /// __m256i _mm256_add_epi8 (__m256i a, __m256i b) - /// VPADDB ymm, ymm, ymm/m256 + /// VPADDB ymm1, ymm2, ymm3/m256 + /// VPADDB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Add(Vector256 left, Vector256 right) => Add(left, right); /// /// __m256i _mm256_add_epi16 (__m256i a, __m256i b) - /// VPADDW ymm, ymm, ymm/m256 + /// VPADDW ymm1, ymm2, ymm3/m256 + /// VPADDW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Add(Vector256 left, Vector256 right) => Add(left, right); /// /// __m256i _mm256_add_epi16 (__m256i a, __m256i b) - /// VPADDW ymm, ymm, ymm/m256 + /// VPADDW ymm1, ymm2, ymm3/m256 + /// VPADDW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Add(Vector256 left, Vector256 right) => Add(left, right); /// /// __m256i _mm256_add_epi32 (__m256i a, __m256i b) - /// VPADDD ymm, ymm, ymm/m256 + /// VPADDD ymm1, ymm2, ymm3/m256 + /// VPADDD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Add(Vector256 left, Vector256 right) => Add(left, right); /// /// __m256i _mm256_add_epi32 (__m256i a, __m256i b) - /// VPADDD ymm, ymm, ymm/m256 + /// VPADDD ymm1, ymm2, ymm3/m256 + /// VPADDD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Add(Vector256 left, Vector256 right) => Add(left, right); /// /// __m256i _mm256_add_epi64 (__m256i a, __m256i b) - /// VPADDQ ymm, ymm, ymm/m256 + /// VPADDQ ymm1, ymm2, ymm3/m256 + /// VPADDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Add(Vector256 left, Vector256 right) => Add(left, right); /// /// __m256i _mm256_add_epi64 (__m256i a, __m256i b) - /// VPADDQ ymm, ymm, ymm/m256 + /// VPADDQ ymm1, ymm2, ymm3/m256 + /// VPADDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Add(Vector256 left, Vector256 right) => Add(left, right); /// /// __m256i _mm256_adds_epi8 (__m256i a, __m256i b) - /// VPADDSB ymm, ymm, ymm/m256 + /// VPADDSB ymm1, ymm2, ymm3/m256 + /// VPADDSB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 AddSaturate(Vector256 left, Vector256 right) => AddSaturate(left, right); /// /// __m256i _mm256_adds_epu8 (__m256i a, __m256i b) - /// VPADDUSB ymm, ymm, ymm/m256 + /// VPADDUSB ymm1, ymm2, ymm3/m256 + /// VPADDUSB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 AddSaturate(Vector256 left, Vector256 right) => AddSaturate(left, right); /// /// __m256i _mm256_adds_epi16 (__m256i a, __m256i b) - /// VPADDSW ymm, ymm, ymm/m256 + /// VPADDSW ymm1, ymm2, ymm3/m256 + /// VPADDSW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 AddSaturate(Vector256 left, Vector256 right) => AddSaturate(left, right); /// /// __m256i _mm256_adds_epu16 (__m256i a, __m256i b) - /// VPADDUSW ymm, ymm, ymm/m256 + /// VPADDUSW ymm1, ymm2, ymm3/m256 + /// VPADDUSW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 AddSaturate(Vector256 left, Vector256 right) => AddSaturate(left, right); /// /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count) - /// VPALIGNR ymm, ymm, ymm/m256, imm8 + /// VPALIGNR ymm1, ymm2, ymm3/m256, imm8 + /// VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8 /// public static Vector256 AlignRight(Vector256 left, Vector256 right, [ConstantExpected] byte mask) => AlignRight(left, right, mask); - /// /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count) - /// VPALIGNR ymm, ymm, ymm/m256, imm8 + /// VPALIGNR ymm1, ymm2, ymm3/m256, imm8 + /// VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8 /// public static Vector256 AlignRight(Vector256 left, Vector256 right, [ConstantExpected] byte mask) => AlignRight(left, right, mask); - /// /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count) - /// VPALIGNR ymm, ymm, ymm/m256, imm8 + /// VPALIGNR ymm1, ymm2, ymm3/m256, imm8 + /// VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8 /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector256 AlignRight(Vector256 left, Vector256 right, [ConstantExpected] byte mask) => AlignRight(left, right, mask); - /// /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count) - /// VPALIGNR ymm, ymm, ymm/m256, imm8 + /// VPALIGNR ymm1, ymm2, ymm3/m256, imm8 + /// VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8 /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector256 AlignRight(Vector256 left, Vector256 right, [ConstantExpected] byte mask) => AlignRight(left, right, mask); - /// /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count) - /// VPALIGNR ymm, ymm, ymm/m256, imm8 + /// VPALIGNR ymm1, ymm2, ymm3/m256, imm8 + /// VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8 /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector256 AlignRight(Vector256 left, Vector256 right, [ConstantExpected] byte mask) => AlignRight(left, right, mask); - /// /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count) - /// VPALIGNR ymm, ymm, ymm/m256, imm8 + /// VPALIGNR ymm1, ymm2, ymm3/m256, imm8 + /// VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8 /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector256 AlignRight(Vector256 left, Vector256 right, [ConstantExpected] byte mask) => AlignRight(left, right, mask); - /// /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count) - /// VPALIGNR ymm, ymm, ymm/m256, imm8 + /// VPALIGNR ymm1, ymm2, ymm3/m256, imm8 + /// VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8 /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector256 AlignRight(Vector256 left, Vector256 right, [ConstantExpected] byte mask) => AlignRight(left, right, mask); - /// /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count) - /// VPALIGNR ymm, ymm, ymm/m256, imm8 + /// VPALIGNR ymm1, ymm2, ymm3/m256, imm8 + /// VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8 /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector256 AlignRight(Vector256 left, Vector256 right, [ConstantExpected] byte mask) => AlignRight(left, right, mask); /// /// __m256i _mm256_and_si256 (__m256i a, __m256i b) - /// VPAND ymm, ymm, ymm/m256 + /// VPAND ymm1, ymm2, ymm3/m256 /// public static Vector256 And(Vector256 left, Vector256 right) => And(left, right); /// /// __m256i _mm256_and_si256 (__m256i a, __m256i b) - /// VPAND ymm, ymm, ymm/m256 + /// VPAND ymm1, ymm2, ymm3/m256 /// public static Vector256 And(Vector256 left, Vector256 right) => And(left, right); /// /// __m256i _mm256_and_si256 (__m256i a, __m256i b) - /// VPAND ymm, ymm, ymm/m256 + /// VPAND ymm1, ymm2, ymm3/m256 /// public static Vector256 And(Vector256 left, Vector256 right) => And(left, right); /// /// __m256i _mm256_and_si256 (__m256i a, __m256i b) - /// VPAND ymm, ymm, ymm/m256 + /// VPAND ymm1, ymm2, ymm3/m256 /// public static Vector256 And(Vector256 left, Vector256 right) => And(left, right); /// /// __m256i _mm256_and_si256 (__m256i a, __m256i b) - /// VPAND ymm, ymm, ymm/m256 + /// VPAND ymm1, ymm2, ymm3/m256 + /// VPANDD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 And(Vector256 left, Vector256 right) => And(left, right); /// /// __m256i _mm256_and_si256 (__m256i a, __m256i b) - /// VPAND ymm, ymm, ymm/m256 + /// VPAND ymm1, ymm2, ymm3/m256 + /// VPANDD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 And(Vector256 left, Vector256 right) => And(left, right); /// /// __m256i _mm256_and_si256 (__m256i a, __m256i b) - /// VPAND ymm, ymm, ymm/m256 + /// VPAND ymm1, ymm2, ymm3/m256 + /// VPANDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 And(Vector256 left, Vector256 right) => And(left, right); /// /// __m256i _mm256_and_si256 (__m256i a, __m256i b) - /// VPAND ymm, ymm, ymm/m256 + /// VPAND ymm1, ymm2, ymm3/m256 + /// VPANDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 And(Vector256 left, Vector256 right) => And(left, right); /// /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b) - /// VPANDN ymm, ymm, ymm/m256 + /// VPANDN ymm1, ymm2, ymm3/m256 /// public static Vector256 AndNot(Vector256 left, Vector256 right) => AndNot(left, right); /// /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b) - /// VPANDN ymm, ymm, ymm/m256 + /// VPANDN ymm1, ymm2, ymm3/m256 /// public static Vector256 AndNot(Vector256 left, Vector256 right) => AndNot(left, right); /// /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b) - /// VPANDN ymm, ymm, ymm/m256 + /// VPANDN ymm1, ymm2, ymm3/m256 /// public static Vector256 AndNot(Vector256 left, Vector256 right) => AndNot(left, right); /// /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b) - /// VPANDN ymm, ymm, ymm/m256 + /// VPANDN ymm1, ymm2, ymm3/m256 /// public static Vector256 AndNot(Vector256 left, Vector256 right) => AndNot(left, right); /// /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b) - /// VPANDN ymm, ymm, ymm/m256 + /// VPANDN ymm1, ymm2, ymm3/m256 + /// VPANDND ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 AndNot(Vector256 left, Vector256 right) => AndNot(left, right); /// /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b) - /// VPANDN ymm, ymm, ymm/m256 + /// VPANDN ymm1, ymm2, ymm3/m256 + /// VPANDND ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 AndNot(Vector256 left, Vector256 right) => AndNot(left, right); /// /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b) - /// VPANDN ymm, ymm, ymm/m256 + /// VPANDN ymm1, ymm2, ymm3/m256 + /// VPANDNQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 AndNot(Vector256 left, Vector256 right) => AndNot(left, right); /// /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b) - /// VPANDN ymm, ymm, ymm/m256 + /// VPANDN ymm1, ymm2, ymm3/m256 + /// VPANDNQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 AndNot(Vector256 left, Vector256 right) => AndNot(left, right); /// /// __m256i _mm256_avg_epu8 (__m256i a, __m256i b) - /// VPAVGB ymm, ymm, ymm/m256 + /// VPAVGB ymm1, ymm2, ymm3/m256 + /// VPAVGB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Average(Vector256 left, Vector256 right) => Average(left, right); /// /// __m256i _mm256_avg_epu16 (__m256i a, __m256i b) - /// VPAVGW ymm, ymm, ymm/m256 + /// VPAVGW ymm1, ymm2, ymm3/m256 + /// VPAVGW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Average(Vector256 left, Vector256 right) => Average(left, right); /// /// __m128i _mm_blend_epi32 (__m128i a, __m128i b, const int imm8) - /// VPBLENDD xmm, xmm, xmm/m128, imm8 + /// VPBLENDD xmm1, xmm2, xmm3/m128, imm8 /// public static Vector128 Blend(Vector128 left, Vector128 right, [ConstantExpected] byte control) => Blend(left, right, control); /// /// __m128i _mm_blend_epi32 (__m128i a, __m128i b, const int imm8) - /// VPBLENDD xmm, xmm, xmm/m128, imm8 + /// VPBLENDD xmm1, xmm2, xmm3/m128, imm8 /// public static Vector128 Blend(Vector128 left, Vector128 right, [ConstantExpected] byte control) => Blend(left, right, control); /// /// __m256i _mm256_blend_epi16 (__m256i a, __m256i b, const int imm8) - /// VPBLENDW ymm, ymm, ymm/m256, imm8 + /// VPBLENDW ymm1, ymm2, ymm3/m256 imm8 /// public static Vector256 Blend(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Blend(left, right, control); /// /// __m256i _mm256_blend_epi16 (__m256i a, __m256i b, const int imm8) - /// VPBLENDW ymm, ymm, ymm/m256, imm8 + /// VPBLENDW ymm1, ymm2, ymm3/m256 imm8 /// public static Vector256 Blend(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Blend(left, right, control); /// /// __m256i _mm256_blend_epi32 (__m256i a, __m256i b, const int imm8) - /// VPBLENDD ymm, ymm, ymm/m256, imm8 + /// VPBLENDD ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Blend(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Blend(left, right, control); /// /// __m256i _mm256_blend_epi32 (__m256i a, __m256i b, const int imm8) - /// VPBLENDD ymm, ymm, ymm/m256, imm8 + /// VPBLENDD ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 Blend(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Blend(left, right, control); /// /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask) - /// VPBLENDVB ymm, ymm, ymm/m256, ymm + /// VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4 /// public static Vector256 BlendVariable(Vector256 left, Vector256 right, Vector256 mask) => BlendVariable(left, right, mask); /// /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask) - /// VPBLENDVB ymm, ymm, ymm/m256, ymm + /// VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4 /// public static Vector256 BlendVariable(Vector256 left, Vector256 right, Vector256 mask) => BlendVariable(left, right, mask); - /// /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask) - /// VPBLENDVB ymm, ymm, ymm/m256, ymm + /// VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4 /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector256 BlendVariable(Vector256 left, Vector256 right, Vector256 mask) => BlendVariable(left, right, mask); /// /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask) - /// VPBLENDVB ymm, ymm, ymm/m256, ymm + /// VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4 /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector256 BlendVariable(Vector256 left, Vector256 right, Vector256 mask) => BlendVariable(left, right, mask); - /// /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask) - /// VPBLENDVB ymm, ymm, ymm/m256, ymm + /// VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4 /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector256 BlendVariable(Vector256 left, Vector256 right, Vector256 mask) => BlendVariable(left, right, mask); /// /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask) - /// VPBLENDVB ymm, ymm, ymm/m256, ymm + /// VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4 /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector256 BlendVariable(Vector256 left, Vector256 right, Vector256 mask) => BlendVariable(left, right, mask); - /// /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask) - /// VPBLENDVB ymm, ymm, ymm/m256, ymm + /// VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4 /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector256 BlendVariable(Vector256 left, Vector256 right, Vector256 mask) => BlendVariable(left, right, mask); /// /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask) - /// VPBLENDVB ymm, ymm, ymm/m256, ymm + /// VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4 /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector256 BlendVariable(Vector256 left, Vector256 right, Vector256 mask) => BlendVariable(left, right, mask); /// /// __m128i _mm_broadcastb_epi8 (__m128i a) - /// VPBROADCASTB xmm, xmm + /// VPBROADCASTB xmm1, xmm2/m8 + /// VPBROADCASTB xmm1 {k1}{z}, xmm2/m8 /// public static Vector128 BroadcastScalarToVector128(Vector128 value) => BroadcastScalarToVector128(value); - /// /// __m128i _mm_broadcastb_epi8 (__m128i a) - /// VPBROADCASTB xmm, xmm + /// VPBROADCASTB xmm1, xmm2/m8 + /// VPBROADCASTB xmm1 {k1}{z}, xmm2/m8 /// public static Vector128 BroadcastScalarToVector128(Vector128 value) => BroadcastScalarToVector128(value); - /// /// __m128i _mm_broadcastw_epi16 (__m128i a) - /// VPBROADCASTW xmm, xmm + /// VPBROADCASTW xmm1, xmm2/m16 + /// VPBROADCASTW xmm1 {k1}{z}, xmm2/m16 /// public static Vector128 BroadcastScalarToVector128(Vector128 value) => BroadcastScalarToVector128(value); - /// /// __m128i _mm_broadcastw_epi16 (__m128i a) - /// VPBROADCASTW xmm, xmm + /// VPBROADCASTW xmm1, xmm2/m16 + /// VPBROADCASTW xmm1 {k1}{z}, xmm2/m16 /// public static Vector128 BroadcastScalarToVector128(Vector128 value) => BroadcastScalarToVector128(value); - /// /// __m128i _mm_broadcastd_epi32 (__m128i a) - /// VPBROADCASTD xmm, xmm + /// VPBROADCASTD xmm1, xmm2/m32 + /// VPBROADCASTD xmm1 {k1}{z}, xmm2/m32 /// public static Vector128 BroadcastScalarToVector128(Vector128 value) => BroadcastScalarToVector128(value); - /// /// __m128i _mm_broadcastd_epi32 (__m128i a) - /// VPBROADCASTD xmm, xmm + /// VPBROADCASTD xmm1, xmm2/m32 + /// VPBROADCASTD xmm1 {k1}{z}, xmm2/m32 /// public static Vector128 BroadcastScalarToVector128(Vector128 value) => BroadcastScalarToVector128(value); - /// /// __m128i _mm_broadcastq_epi64 (__m128i a) - /// VPBROADCASTQ xmm, xmm + /// VPBROADCASTQ xmm1, xmm2/m64 + /// VPBROADCASTQ xmm1 {k1}{z}, xmm2/m64 /// public static Vector128 BroadcastScalarToVector128(Vector128 value) => BroadcastScalarToVector128(value); - /// /// __m128i _mm_broadcastq_epi64 (__m128i a) - /// VPBROADCASTQ xmm, xmm + /// VPBROADCASTQ xmm1, xmm2/m64 + /// VPBROADCASTQ xmm1 {k1}{z}, xmm2/m64 /// public static Vector128 BroadcastScalarToVector128(Vector128 value) => BroadcastScalarToVector128(value); - /// /// __m128 _mm_broadcastss_ps (__m128 a) - /// VBROADCASTSS xmm, xmm + /// VBROADCASTSS xmm1, xmm2/m32 + /// VBROADCASTSS xmm1 {k1}{z}, xmm2/m32 /// public static Vector128 BroadcastScalarToVector128(Vector128 value) => BroadcastScalarToVector128(value); - /// /// __m128d _mm_broadcastsd_pd (__m128d a) - /// VMOVDDUP xmm, xmm + /// VMOVDDUP xmm1, xmm/m64 /// public static Vector128 BroadcastScalarToVector128(Vector128 value) => BroadcastScalarToVector128(value); /// /// __m128i _mm_broadcastb_epi8 (__m128i a) - /// VPBROADCASTB xmm, m8 + /// VPBROADCASTB xmm1, m8 + /// VPBROADCASTB xmm1 {k1}{z}, m8 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector128 BroadcastScalarToVector128(byte* source) => BroadcastScalarToVector128(source); /// /// __m128i _mm_broadcastb_epi8 (__m128i a) - /// VPBROADCASTB xmm, m8 + /// VPBROADCASTB xmm1, m8 + /// VPBROADCASTB xmm1 {k1}{z}, m8 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector128 BroadcastScalarToVector128(sbyte* source) => BroadcastScalarToVector128(source); - /// /// __m128i _mm_broadcastw_epi16 (__m128i a) - /// VPBROADCASTW xmm, m16 + /// VPBROADCASTW xmm1, m16 + /// VPBROADCASTW xmm1 {k1}{z}, m16 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector128 BroadcastScalarToVector128(short* source) => BroadcastScalarToVector128(source); /// /// __m128i _mm_broadcastw_epi16 (__m128i a) - /// VPBROADCASTW xmm, m16 + /// VPBROADCASTW xmm1, m16 + /// VPBROADCASTW xmm1 {k1}{z}, m16 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector128 BroadcastScalarToVector128(ushort* source) => BroadcastScalarToVector128(source); - /// /// __m128i _mm_broadcastd_epi32 (__m128i a) - /// VPBROADCASTD xmm, m32 + /// VPBROADCASTD xmm1, m32 + /// VPBROADCASTD xmm1 {k1}{z}, m32 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector128 BroadcastScalarToVector128(int* source) => BroadcastScalarToVector128(source); /// /// __m128i _mm_broadcastd_epi32 (__m128i a) - /// VPBROADCASTD xmm, m32 + /// VPBROADCASTD xmm1, m32 + /// VPBROADCASTD xmm1 {k1}{z}, m32 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector128 BroadcastScalarToVector128(uint* source) => BroadcastScalarToVector128(source); - /// /// __m128i _mm_broadcastq_epi64 (__m128i a) - /// VPBROADCASTQ xmm, m64 + /// VPBROADCASTQ xmm1, m64 + /// VPBROADCASTQ xmm1 {k1}{z}, m64 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector128 BroadcastScalarToVector128(long* source) => BroadcastScalarToVector128(source); /// /// __m128i _mm_broadcastq_epi64 (__m128i a) - /// VPBROADCASTQ xmm, m64 + /// VPBROADCASTQ xmm1, m64 + /// VPBROADCASTQ xmm1 {k1}{z}, m64 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector128 BroadcastScalarToVector128(ulong* source) => BroadcastScalarToVector128(source); /// /// __m256i _mm256_broadcastb_epi8 (__m128i a) - /// VPBROADCASTB ymm, xmm + /// VPBROADCASTB ymm1, xmm2/m8 + /// VPBROADCASTB ymm1 {k1}{z}, xmm2/m8 /// public static Vector256 BroadcastScalarToVector256(Vector128 value) => BroadcastScalarToVector256(value); - /// /// __m256i _mm256_broadcastb_epi8 (__m128i a) - /// VPBROADCASTB ymm, xmm + /// VPBROADCASTB ymm1, xmm2/m8 + /// VPBROADCASTB ymm1 {k1}{z}, xmm2/m8 /// public static Vector256 BroadcastScalarToVector256(Vector128 value) => BroadcastScalarToVector256(value); - /// /// __m256i _mm256_broadcastw_epi16 (__m128i a) - /// VPBROADCASTW ymm, xmm + /// VPBROADCASTW ymm1, xmm2/m16 + /// VPBROADCASTW ymm1 {k1}{z}, xmm2/m16 /// public static Vector256 BroadcastScalarToVector256(Vector128 value) => BroadcastScalarToVector256(value); - /// /// __m256i _mm256_broadcastw_epi16 (__m128i a) - /// VPBROADCASTW ymm, xmm + /// VPBROADCASTW ymm1, xmm2/m16 + /// VPBROADCASTW ymm1 {k1}{z}, xmm2/m16 /// public static Vector256 BroadcastScalarToVector256(Vector128 value) => BroadcastScalarToVector256(value); - /// /// __m256i _mm256_broadcastd_epi32 (__m128i a) - /// VPBROADCASTD ymm, xmm + /// VPBROADCASTD ymm1, xmm2/m32 + /// VPBROADCASTD ymm1 {k1}{z}, xmm2/m32 /// public static Vector256 BroadcastScalarToVector256(Vector128 value) => BroadcastScalarToVector256(value); - /// /// __m256i _mm256_broadcastd_epi32 (__m128i a) - /// VPBROADCASTD ymm, xmm + /// VPBROADCASTD ymm1, xmm2/m32 + /// VPBROADCASTD ymm1 {k1}{z}, xmm2/m32 /// public static Vector256 BroadcastScalarToVector256(Vector128 value) => BroadcastScalarToVector256(value); - /// /// __m256i _mm256_broadcastq_epi64 (__m128i a) - /// VPBROADCASTQ ymm, xmm + /// VPBROADCASTQ ymm1, xmm2/m64 + /// VPBROADCASTQ ymm1 {k1}{z}, xmm2/m64 /// public static Vector256 BroadcastScalarToVector256(Vector128 value) => BroadcastScalarToVector256(value); - /// /// __m256i _mm256_broadcastq_epi64 (__m128i a) - /// VPBROADCASTQ ymm, xmm + /// VPBROADCASTQ ymm1, xmm2/m64 + /// VPBROADCASTQ ymm1 {k1}{z}, xmm2/m64 /// public static Vector256 BroadcastScalarToVector256(Vector128 value) => BroadcastScalarToVector256(value); - /// /// __m256 _mm256_broadcastss_ps (__m128 a) - /// VBROADCASTSS ymm, xmm + /// VBROADCASTSS ymm1, xmm2/m32 + /// VBROADCASTSS ymm1 {k1}{z}, xmm2/m32 /// public static Vector256 BroadcastScalarToVector256(Vector128 value) => BroadcastScalarToVector256(value); - /// /// __m256d _mm256_broadcastsd_pd (__m128d a) - /// VBROADCASTSD ymm, xmm + /// VBROADCASTSD ymm1, xmm2/m64 + /// VBROADCASTSD ymm1 {k1}{z}, xmm2/m64 /// public static Vector256 BroadcastScalarToVector256(Vector128 value) => BroadcastScalarToVector256(value); /// /// __m256i _mm256_broadcastb_epi8 (__m128i a) - /// VPBROADCASTB ymm, m8 + /// VPBROADCASTB ymm1, m8 + /// VPBROADCASTB ymm1 {k1}{z}, m8 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastScalarToVector256(byte* source) => BroadcastScalarToVector256(source); /// /// __m256i _mm256_broadcastb_epi8 (__m128i a) - /// VPBROADCASTB ymm, m8 + /// VPBROADCASTB ymm1, m8 + /// VPBROADCASTB ymm1 {k1}{z}, m8 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastScalarToVector256(sbyte* source) => BroadcastScalarToVector256(source); - /// /// __m256i _mm256_broadcastw_epi16 (__m128i a) - /// VPBROADCASTW ymm, m16 + /// VPBROADCASTW ymm1, m16 + /// VPBROADCASTW ymm1 {k1}{z}, m16 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastScalarToVector256(short* source) => BroadcastScalarToVector256(source); /// /// __m256i _mm256_broadcastw_epi16 (__m128i a) - /// VPBROADCASTW ymm, m16 + /// VPBROADCASTW ymm1, m16 + /// VPBROADCASTW ymm1 {k1}{z}, m16 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastScalarToVector256(ushort* source) => BroadcastScalarToVector256(source); - /// /// __m256i _mm256_broadcastd_epi32 (__m128i a) - /// VPBROADCASTD ymm, m32 + /// VPBROADCASTD ymm1, m32 + /// VPBROADCASTD ymm1 {k1}{z}, m32 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastScalarToVector256(int* source) => BroadcastScalarToVector256(source); /// /// __m256i _mm256_broadcastd_epi32 (__m128i a) - /// VPBROADCASTD ymm, m32 + /// VPBROADCASTD ymm1, m32 + /// VPBROADCASTD ymm1 {k1}{z}, m32 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastScalarToVector256(uint* source) => BroadcastScalarToVector256(source); - /// /// __m256i _mm256_broadcastq_epi64 (__m128i a) - /// VPBROADCASTQ ymm, m64 + /// VPBROADCASTQ ymm1, m64 + /// VPBROADCASTQ ymm1 {k1}{z}, m64 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastScalarToVector256(long* source) => BroadcastScalarToVector256(source); /// /// __m256i _mm256_broadcastq_epi64 (__m128i a) - /// VPBROADCASTQ ymm, m64 + /// VPBROADCASTQ ymm1, m64 + /// VPBROADCASTQ ymm1 {k1}{z}, m64 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastScalarToVector256(ulong* source) => BroadcastScalarToVector256(source); /// /// __m256i _mm256_broadcastsi128_si256 (__m128i a) - /// VBROADCASTI128 ymm, m128 + /// VBROADCASTI128 ymm1, m128 + /// VBROADCASTI32x4 ymm1 {k1}{z}, m128 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastVector128ToVector256(sbyte* address) => BroadcastVector128ToVector256(address); /// /// __m256i _mm256_broadcastsi128_si256 (__m128i a) - /// VBROADCASTI128 ymm, m128 + /// VBROADCASTI128 ymm1, m128 + /// VBROADCASTI32x4 ymm1 {k1}{z}, m128 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastVector128ToVector256(byte* address) => BroadcastVector128ToVector256(address); /// /// __m256i _mm256_broadcastsi128_si256 (__m128i a) - /// VBROADCASTI128 ymm, m128 + /// VBROADCASTI128 ymm1, m128 + /// VBROADCASTI32x4 ymm1 {k1}{z}, m128 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastVector128ToVector256(short* address) => BroadcastVector128ToVector256(address); /// /// __m256i _mm256_broadcastsi128_si256 (__m128i a) - /// VBROADCASTI128 ymm, m128 + /// VBROADCASTI128 ymm1, m128 + /// VBROADCASTI32x4 ymm1 {k1}{z}, m128 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastVector128ToVector256(ushort* address) => BroadcastVector128ToVector256(address); /// /// __m256i _mm256_broadcastsi128_si256 (__m128i a) - /// VBROADCASTI128 ymm, m128 + /// VBROADCASTI128 ymm1, m128 + /// VBROADCASTI32x4 ymm1 {k1}{z}, m128 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastVector128ToVector256(int* address) => BroadcastVector128ToVector256(address); /// /// __m256i _mm256_broadcastsi128_si256 (__m128i a) - /// VBROADCASTI128 ymm, m128 + /// VBROADCASTI128 ymm1, m128 + /// VBROADCASTI32x4 ymm1 {k1}{z}, m128 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastVector128ToVector256(uint* address) => BroadcastVector128ToVector256(address); /// /// __m256i _mm256_broadcastsi128_si256 (__m128i a) - /// VBROADCASTI128 ymm, m128 + /// VBROADCASTI128 ymm1, m128 + /// VBROADCASTI64x2 ymm1 {k1}{z}, m128 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastVector128ToVector256(long* address) => BroadcastVector128ToVector256(address); /// /// __m256i _mm256_broadcastsi128_si256 (__m128i a) - /// VBROADCASTI128 ymm, m128 + /// VBROADCASTI128 ymm1, m128 + /// VBROADCASTI64x2 ymm1 {k1}{z}, m128 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe Vector256 BroadcastVector128ToVector256(ulong* address) => BroadcastVector128ToVector256(address); /// /// __m256i _mm256_cmpeq_epi8 (__m256i a, __m256i b) - /// VPCMPEQB ymm, ymm, ymm/m256 + /// VPCMPEQB ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareEqual(Vector256 left, Vector256 right) => CompareEqual(left, right); /// /// __m256i _mm256_cmpeq_epi8 (__m256i a, __m256i b) - /// VPCMPEQB ymm, ymm, ymm/m256 + /// VPCMPEQB ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareEqual(Vector256 left, Vector256 right) => CompareEqual(left, right); /// /// __m256i _mm256_cmpeq_epi16 (__m256i a, __m256i b) - /// VPCMPEQW ymm, ymm, ymm/m256 + /// VPCMPEQW ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareEqual(Vector256 left, Vector256 right) => CompareEqual(left, right); /// /// __m256i _mm256_cmpeq_epi16 (__m256i a, __m256i b) - /// VPCMPEQW ymm, ymm, ymm/m256 + /// VPCMPEQW ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareEqual(Vector256 left, Vector256 right) => CompareEqual(left, right); /// /// __m256i _mm256_cmpeq_epi32 (__m256i a, __m256i b) - /// VPCMPEQD ymm, ymm, ymm/m256 + /// VPCMPEQD ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareEqual(Vector256 left, Vector256 right) => CompareEqual(left, right); /// /// __m256i _mm256_cmpeq_epi32 (__m256i a, __m256i b) - /// VPCMPEQD ymm, ymm, ymm/m256 + /// VPCMPEQD ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareEqual(Vector256 left, Vector256 right) => CompareEqual(left, right); /// /// __m256i _mm256_cmpeq_epi64 (__m256i a, __m256i b) - /// VPCMPEQQ ymm, ymm, ymm/m256 + /// VPCMPEQQ ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareEqual(Vector256 left, Vector256 right) => CompareEqual(left, right); /// /// __m256i _mm256_cmpeq_epi64 (__m256i a, __m256i b) - /// VPCMPEQQ ymm, ymm, ymm/m256 + /// VPCMPEQQ ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareEqual(Vector256 left, Vector256 right) => CompareEqual(left, right); /// /// __m256i _mm256_cmpgt_epi8 (__m256i a, __m256i b) - /// VPCMPGTB ymm, ymm, ymm/m256 + /// VPCMPGTB ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareGreaterThan(Vector256 left, Vector256 right) => CompareGreaterThan(left, right); /// /// __m256i _mm256_cmpgt_epi16 (__m256i a, __m256i b) - /// VPCMPGTW ymm, ymm, ymm/m256 + /// VPCMPGTW ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareGreaterThan(Vector256 left, Vector256 right) => CompareGreaterThan(left, right); /// /// __m256i _mm256_cmpgt_epi32 (__m256i a, __m256i b) - /// VPCMPGTD ymm, ymm, ymm/m256 + /// VPCMPGTD ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareGreaterThan(Vector256 left, Vector256 right) => CompareGreaterThan(left, right); /// /// __m256i _mm256_cmpgt_epi64 (__m256i a, __m256i b) - /// VPCMPGTQ ymm, ymm, ymm/m256 + /// VPCMPGTQ ymm1, ymm2, ymm3/m256 /// public static Vector256 CompareGreaterThan(Vector256 left, Vector256 right) => CompareGreaterThan(left, right); /// /// int _mm256_cvtsi256_si32 (__m256i a) - /// MOVD reg/m32, xmm + /// VMOVD r/m32, ymm1 /// public static int ConvertToInt32(Vector256 value) => ConvertToInt32(value); /// /// int _mm256_cvtsi256_si32 (__m256i a) - /// MOVD reg/m32, xmm + /// VMOVD r/m32, ymm1 /// public static uint ConvertToUInt32(Vector256 value) => ConvertToUInt32(value); /// /// __m256i _mm256_cvtepi8_epi16 (__m128i a) - /// VPMOVSXBW ymm, xmm + /// VPMOVSXBW ymm1, xmm2/m128 + /// VPMOVSXBW ymm1 {k1}{z}, xmm2/m128 /// public static Vector256 ConvertToVector256Int16(Vector128 value) => ConvertToVector256Int16(value); /// /// __m256i _mm256_cvtepu8_epi16 (__m128i a) - /// VPMOVZXBW ymm, xmm + /// VPMOVZXBW ymm1, xmm2/m128 + /// VPMOVZXBW ymm1 {k1}{z}, xmm2/m128 /// public static Vector256 ConvertToVector256Int16(Vector128 value) => ConvertToVector256Int16(value); /// /// __m256i _mm256_cvtepi8_epi32 (__m128i a) - /// VPMOVSXBD ymm, xmm + /// VPMOVSXBD ymm1, xmm2/m64 + /// VPMOVSXBD ymm1 {k1}{z}, xmm2/m64 /// public static Vector256 ConvertToVector256Int32(Vector128 value) => ConvertToVector256Int32(value); /// /// __m256i _mm256_cvtepu8_epi32 (__m128i a) - /// VPMOVZXBD ymm, xmm + /// VPMOVZXBD ymm1, xmm2/m64 + /// VPMOVZXBD ymm1 {k1}{z}, xmm2/m64 /// public static Vector256 ConvertToVector256Int32(Vector128 value) => ConvertToVector256Int32(value); /// /// __m256i _mm256_cvtepi16_epi32 (__m128i a) - /// VPMOVSXWD ymm, xmm + /// VPMOVSXWD ymm1, xmm2/m128 + /// VPMOVSXWD ymm1 {k1}{z}, xmm2/m128 /// public static Vector256 ConvertToVector256Int32(Vector128 value) => ConvertToVector256Int32(value); /// /// __m256i _mm256_cvtepu16_epi32 (__m128i a) - /// VPMOVZXWD ymm, xmm + /// VPMOVZXWD ymm1, xmm2/m128 + /// VPMOVZXWD ymm1 {k1}{z}, xmm2/m128 /// public static Vector256 ConvertToVector256Int32(Vector128 value) => ConvertToVector256Int32(value); /// /// __m256i _mm256_cvtepi8_epi64 (__m128i a) - /// VPMOVSXBQ ymm, xmm + /// VPMOVSXBQ ymm1, xmm2/m32 + /// VPMOVSXBQ ymm1 {k1}{z}, xmm2/m32 /// public static Vector256 ConvertToVector256Int64(Vector128 value) => ConvertToVector256Int64(value); /// /// __m256i _mm256_cvtepu8_epi64 (__m128i a) - /// VPMOVZXBQ ymm, xmm + /// VPMOVZXBQ ymm1, xmm2/m32 + /// VPMOVZXBQ ymm1 {k1}{z}, xmm2/m32 /// public static Vector256 ConvertToVector256Int64(Vector128 value) => ConvertToVector256Int64(value); /// /// __m256i _mm256_cvtepi16_epi64 (__m128i a) - /// VPMOVSXWQ ymm, xmm + /// VPMOVSXWQ ymm1, xmm2/m64 + /// VPMOVSXWQ ymm1 {k1}{z}, xmm2/m64 /// public static Vector256 ConvertToVector256Int64(Vector128 value) => ConvertToVector256Int64(value); /// /// __m256i _mm256_cvtepu16_epi64 (__m128i a) - /// VPMOVZXWQ ymm, xmm + /// VPMOVZXWQ ymm1, xmm2/m64 + /// VPMOVZXWQ ymm1 {k1}{z}, xmm2/m64 /// public static Vector256 ConvertToVector256Int64(Vector128 value) => ConvertToVector256Int64(value); /// /// __m256i _mm256_cvtepi32_epi64 (__m128i a) - /// VPMOVSXDQ ymm, xmm + /// VPMOVSXDQ ymm1, xmm2/m128 + /// VPMOVSXDQ ymm1 {k1}{z}, xmm2/m128 /// public static Vector256 ConvertToVector256Int64(Vector128 value) => ConvertToVector256Int64(value); /// /// __m256i _mm256_cvtepu32_epi64 (__m128i a) - /// VPMOVZXDQ ymm, xmm + /// VPMOVZXDQ ymm1, xmm2/m128 + /// VPMOVZXDQ ymm1 {k1}{z}, xmm2/m128 /// public static Vector256 ConvertToVector256Int64(Vector128 value) => ConvertToVector256Int64(value); /// - /// VPMOVSXBW ymm, m128 + /// VPMOVSXBW ymm1, m128 + /// VPMOVSXBW ymm1 {k1}{z}, m128 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int16(sbyte* address) => ConvertToVector256Int16(address); /// - /// VPMOVZXBW ymm, m128 + /// VPMOVZXBW ymm1, m128 + /// VPMOVZXBW ymm1 {k1}{z}, m128 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int16(byte* address) => ConvertToVector256Int16(address); /// - /// VPMOVSXBD ymm, m64 + /// VPMOVSXBD ymm1, m64 + /// VPMOVSXBD ymm1 {k1}{z}, m64 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int32(sbyte* address) => ConvertToVector256Int32(address); /// - /// VPMOVZXBD ymm, m64 + /// VPMOVZXBD ymm1, m64 + /// VPMOVZXBD ymm1 {k1}{z}, m64 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int32(byte* address) => ConvertToVector256Int32(address); /// - /// VPMOVSXWD ymm, m128 + /// VPMOVSXWD ymm1, m128 + /// VPMOVSXWD ymm1 {k1}{z}, m128 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int32(short* address) => ConvertToVector256Int32(address); /// - /// VPMOVZXWD ymm, m128 + /// VPMOVZXWD ymm1, m128 + /// VPMOVZXWD ymm1 {k1}{z}, m128 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int32(ushort* address) => ConvertToVector256Int32(address); /// - /// VPMOVSXBQ ymm, m32 + /// VPMOVSXBQ ymm1, m32 + /// VPMOVSXBQ ymm1 {k1}{z}, m32 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int64(sbyte* address) => ConvertToVector256Int64(address); /// - /// VPMOVZXBQ ymm, m32 + /// VPMOVZXBQ ymm1, m32 + /// VPMOVZXBQ ymm1 {k1}{z}, m32 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int64(byte* address) => ConvertToVector256Int64(address); /// - /// VPMOVSXWQ ymm, m64 + /// VPMOVSXWQ ymm1, m64 + /// VPMOVSXWQ ymm1 {k1}{z}, m64 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int64(short* address) => ConvertToVector256Int64(address); /// - /// VPMOVZXWQ ymm, m64 + /// VPMOVZXWQ ymm1, m64 + /// VPMOVZXWQ ymm1 {k1}{z}, m64 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int64(ushort* address) => ConvertToVector256Int64(address); /// - /// VPMOVSXDQ ymm, m128 + /// VPMOVSXDQ ymm1, m128 + /// VPMOVSXDQ ymm1 {k1}{z}, m128 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int64(int* address) => ConvertToVector256Int64(address); /// - /// VPMOVZXDQ ymm, m128 + /// VPMOVZXDQ ymm1, m128 + /// VPMOVZXDQ ymm1 {k1}{z}, m128 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector256 ConvertToVector256Int64(uint* address) => ConvertToVector256Int64(address); /// /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8) - /// VEXTRACTI128 xmm, ymm, imm8 + /// VEXTRACTI128 xmm1/m128, ymm2, imm8 + /// VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static new Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) => ExtractVector128(value, index); - /// /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8) - /// VEXTRACTI128 xmm, ymm, imm8 + /// VEXTRACTI128 xmm1/m128, ymm2, imm8 + /// VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static new Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) => ExtractVector128(value, index); - /// /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8) - /// VEXTRACTI128 xmm, ymm, imm8 + /// VEXTRACTI128 xmm1/m128, ymm2, imm8 + /// VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static new Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) => ExtractVector128(value, index); - /// /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8) - /// VEXTRACTI128 xmm, ymm, imm8 + /// VEXTRACTI128 xmm1/m128, ymm2, imm8 + /// VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static new Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) => ExtractVector128(value, index); - /// /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8) - /// VEXTRACTI128 xmm, ymm, imm8 + /// VEXTRACTI128 xmm1/m128, ymm2, imm8 + /// VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static new Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) => ExtractVector128(value, index); - /// /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8) - /// VEXTRACTI128 xmm, ymm, imm8 + /// VEXTRACTI128 xmm1/m128, ymm2, imm8 + /// VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static new Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) => ExtractVector128(value, index); - /// /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8) - /// VEXTRACTI128 xmm, ymm, imm8 + /// VEXTRACTI128 xmm1/m128, ymm2, imm8 + /// VEXTRACTI64x2 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static new Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) => ExtractVector128(value, index); - /// /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8) - /// VEXTRACTI128 xmm, ymm, imm8 + /// VEXTRACTI128 xmm1/m128, ymm2, imm8 + /// VEXTRACTI64x2 xmm1/m128 {k1}{z}, ymm2, imm8 /// public static new Vector128 ExtractVector128(Vector256 value, [ConstantExpected] byte index) => ExtractVector128(value, index); /// /// __m128i _mm_i32gather_epi32 (int const* base_addr, __m128i vindex, const int scale) - /// VPGATHERDD xmm, vm32x, xmm + /// VPGATHERDD xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(int* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -865,7 +932,7 @@ public static unsafe Vector128 GatherVector128(int* baseAddress, Vector128< } /// /// __m128i _mm_i32gather_epi32 (int const* base_addr, __m128i vindex, const int scale) - /// VPGATHERDD xmm, vm32x, xmm + /// VPGATHERDD xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(uint* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -881,7 +948,7 @@ public static unsafe Vector128 GatherVector128(uint* baseAddress, Vector12 } /// /// __m128i _mm_i32gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale) - /// VPGATHERDQ xmm, vm32x, xmm + /// VPGATHERDQ xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(long* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -897,7 +964,7 @@ public static unsafe Vector128 GatherVector128(long* baseAddress, Vector12 } /// /// __m128i _mm_i32gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale) - /// VPGATHERDQ xmm, vm32x, xmm + /// VPGATHERDQ xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(ulong* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -913,7 +980,7 @@ public static unsafe Vector128 GatherVector128(ulong* baseAddress, Vector } /// /// __m128 _mm_i32gather_ps (float const* base_addr, __m128i vindex, const int scale) - /// VGATHERDPS xmm, vm32x, xmm + /// VGATHERDPS xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(float* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -929,7 +996,7 @@ public static unsafe Vector128 GatherVector128(float* baseAddress, Vector } /// /// __m128d _mm_i32gather_pd (double const* base_addr, __m128i vindex, const int scale) - /// VGATHERDPD xmm, vm32x, xmm + /// VGATHERDPD xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(double* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -945,7 +1012,7 @@ public static unsafe Vector128 GatherVector128(double* baseAddress, Vect } /// /// __m128i _mm_i64gather_epi32 (int const* base_addr, __m128i vindex, const int scale) - /// VPGATHERQD xmm, vm64x, xmm + /// VPGATHERQD xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(int* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -961,7 +1028,7 @@ public static unsafe Vector128 GatherVector128(int* baseAddress, Vector128< } /// /// __m128i _mm_i64gather_epi32 (int const* base_addr, __m128i vindex, const int scale) - /// VPGATHERQD xmm, vm64x, xmm + /// VPGATHERQD xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(uint* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -977,7 +1044,7 @@ public static unsafe Vector128 GatherVector128(uint* baseAddress, Vector12 } /// /// __m128i _mm_i64gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale) - /// VPGATHERQQ xmm, vm64x, xmm + /// VPGATHERQQ xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(long* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -993,7 +1060,7 @@ public static unsafe Vector128 GatherVector128(long* baseAddress, Vector12 } /// /// __m128i _mm_i64gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale) - /// VPGATHERQQ xmm, vm64x, xmm + /// VPGATHERQQ xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(ulong* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1009,7 +1076,7 @@ public static unsafe Vector128 GatherVector128(ulong* baseAddress, Vector } /// /// __m128 _mm_i64gather_ps (float const* base_addr, __m128i vindex, const int scale) - /// VGATHERQPS xmm, vm64x, xmm + /// VGATHERQPS xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(float* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1025,7 +1092,7 @@ public static unsafe Vector128 GatherVector128(float* baseAddress, Vector } /// /// __m128d _mm_i64gather_pd (double const* base_addr, __m128i vindex, const int scale) - /// VGATHERQPD xmm, vm64x, xmm + /// VGATHERQPD xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(double* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1041,7 +1108,7 @@ public static unsafe Vector128 GatherVector128(double* baseAddress, Vect } /// /// __m256i _mm256_i32gather_epi32 (int const* base_addr, __m256i vindex, const int scale) - /// VPGATHERDD ymm, vm32y, ymm + /// VPGATHERDD ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherVector256(int* baseAddress, Vector256 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1057,7 +1124,7 @@ public static unsafe Vector256 GatherVector256(int* baseAddress, Vector256< } /// /// __m256i _mm256_i32gather_epi32 (int const* base_addr, __m256i vindex, const int scale) - /// VPGATHERDD ymm, vm32y, ymm + /// VPGATHERDD ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherVector256(uint* baseAddress, Vector256 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1073,7 +1140,7 @@ public static unsafe Vector256 GatherVector256(uint* baseAddress, Vector25 } /// /// __m256i _mm256_i32gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale) - /// VPGATHERDQ ymm, vm32y, ymm + /// VPGATHERDQ ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherVector256(long* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1089,7 +1156,7 @@ public static unsafe Vector256 GatherVector256(long* baseAddress, Vector12 } /// /// __m256i _mm256_i32gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale) - /// VPGATHERDQ ymm, vm32y, ymm + /// VPGATHERDQ ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherVector256(ulong* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1105,7 +1172,7 @@ public static unsafe Vector256 GatherVector256(ulong* baseAddress, Vector } /// /// __m256 _mm256_i32gather_ps (float const* base_addr, __m256i vindex, const int scale) - /// VGATHERDPS ymm, vm32y, ymm + /// VGATHERDPS ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherVector256(float* baseAddress, Vector256 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1121,7 +1188,7 @@ public static unsafe Vector256 GatherVector256(float* baseAddress, Vector } /// /// __m256d _mm256_i32gather_pd (double const* base_addr, __m128i vindex, const int scale) - /// VGATHERDPD ymm, vm32y, ymm + /// VGATHERDPD ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherVector256(double* baseAddress, Vector128 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1137,7 +1204,7 @@ public static unsafe Vector256 GatherVector256(double* baseAddress, Vect } /// /// __m128i _mm256_i64gather_epi32 (int const* base_addr, __m256i vindex, const int scale) - /// VPGATHERQD xmm, vm64y, xmm + /// VPGATHERQD xmm1, vm64y, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(int* baseAddress, Vector256 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1153,7 +1220,7 @@ public static unsafe Vector128 GatherVector128(int* baseAddress, Vector256< } /// /// __m128i _mm256_i64gather_epi32 (int const* base_addr, __m256i vindex, const int scale) - /// VPGATHERQD xmm, vm64y, xmm + /// VPGATHERQD xmm1, vm64y, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(uint* baseAddress, Vector256 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1169,7 +1236,7 @@ public static unsafe Vector128 GatherVector128(uint* baseAddress, Vector25 } /// /// __m256i _mm256_i64gather_epi64 (__int64 const* base_addr, __m256i vindex, const int scale) - /// VPGATHERQQ ymm, vm64y, ymm + /// VPGATHERQQ ymm1, vm64y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherVector256(long* baseAddress, Vector256 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1185,7 +1252,7 @@ public static unsafe Vector256 GatherVector256(long* baseAddress, Vector25 } /// /// __m256i _mm256_i64gather_epi64 (__int64 const* base_addr, __m256i vindex, const int scale) - /// VPGATHERQQ ymm, vm64y, ymm + /// VPGATHERQQ ymm1, vm64y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherVector256(ulong* baseAddress, Vector256 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1201,7 +1268,7 @@ public static unsafe Vector256 GatherVector256(ulong* baseAddress, Vector } /// /// __m128 _mm256_i64gather_ps (float const* base_addr, __m256i vindex, const int scale) - /// VGATHERQPS xmm, vm64y, xmm + /// VGATHERQPS xmm1, vm64y, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherVector128(float* baseAddress, Vector256 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1217,7 +1284,7 @@ public static unsafe Vector128 GatherVector128(float* baseAddress, Vector } /// /// __m256d _mm256_i64gather_pd (double const* base_addr, __m256i vindex, const int scale) - /// VGATHERQPD ymm, vm64y, ymm + /// VGATHERQPD ymm1, vm64y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherVector256(double* baseAddress, Vector256 index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1234,7 +1301,7 @@ public static unsafe Vector256 GatherVector256(double* baseAddress, Vect /// /// __m128i _mm_mask_i32gather_epi32 (__m128i src, int const* base_addr, __m128i vindex, __m128i mask, const int scale) - /// VPGATHERDD xmm, vm32x, xmm + /// VPGATHERDD xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, int* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1250,7 +1317,7 @@ public static unsafe Vector128 GatherMaskVector128(Vector128 source, i } /// /// __m128i _mm_mask_i32gather_epi32 (__m128i src, int const* base_addr, __m128i vindex, __m128i mask, const int scale) - /// VPGATHERDD xmm, vm32x, xmm + /// VPGATHERDD xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, uint* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1266,7 +1333,7 @@ public static unsafe Vector128 GatherMaskVector128(Vector128 source, } /// /// __m128i _mm_mask_i32gather_epi64 (__m128i src, __int64 const* base_addr, __m128i vindex, __m128i mask, const int scale) - /// VPGATHERDQ xmm, vm32x, xmm + /// VPGATHERDQ xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, long* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1282,7 +1349,7 @@ public static unsafe Vector128 GatherMaskVector128(Vector128 source, } /// /// __m128i _mm_mask_i32gather_epi64 (__m128i src, __int64 const* base_addr, __m128i vindex, __m128i mask, const int scale) - /// VPGATHERDQ xmm, vm32x, xmm + /// VPGATHERDQ xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, ulong* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1298,7 +1365,7 @@ public static unsafe Vector128 GatherMaskVector128(Vector128 sourc } /// /// __m128 _mm_mask_i32gather_ps (__m128 src, float const* base_addr, __m128i vindex, __m128 mask, const int scale) - /// VGATHERDPS xmm, vm32x, xmm + /// VGATHERDPS xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, float* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1314,7 +1381,7 @@ public static unsafe Vector128 GatherMaskVector128(Vector128 sourc } /// /// __m128d _mm_mask_i32gather_pd (__m128d src, double const* base_addr, __m128i vindex, __m128d mask, const int scale) - /// VGATHERDPD xmm, vm32x, xmm + /// VGATHERDPD xmm1, vm32x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, double* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1330,7 +1397,7 @@ public static unsafe Vector128 GatherMaskVector128(Vector128 sou } /// /// __m128i _mm_mask_i64gather_epi32 (__m128i src, int const* base_addr, __m128i vindex, __m128i mask, const int scale) - /// VPGATHERQD xmm, vm64x, xmm + /// VPGATHERQD xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, int* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1346,7 +1413,7 @@ public static unsafe Vector128 GatherMaskVector128(Vector128 source, i } /// /// __m128i _mm_mask_i64gather_epi32 (__m128i src, int const* base_addr, __m128i vindex, __m128i mask, const int scale) - /// VPGATHERQD xmm, vm64x, xmm + /// VPGATHERQD xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, uint* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1362,7 +1429,7 @@ public static unsafe Vector128 GatherMaskVector128(Vector128 source, } /// /// __m128i _mm_mask_i64gather_epi64 (__m128i src, __int64 const* base_addr, __m128i vindex, __m128i mask, const int scale) - /// VPGATHERQQ xmm, vm64x, xmm + /// VPGATHERQQ xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, long* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1378,7 +1445,7 @@ public static unsafe Vector128 GatherMaskVector128(Vector128 source, } /// /// __m128i _mm_mask_i64gather_epi64 (__m128i src, __int64 const* base_addr, __m128i vindex, __m128i mask, const int scale) - /// VPGATHERQQ xmm, vm64x, xmm + /// VPGATHERQQ xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, ulong* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1394,7 +1461,7 @@ public static unsafe Vector128 GatherMaskVector128(Vector128 sourc } /// /// __m128 _mm_mask_i64gather_ps (__m128 src, float const* base_addr, __m128i vindex, __m128 mask, const int scale) - /// VGATHERQPS xmm, vm64x, xmm + /// VGATHERQPS xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, float* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1410,7 +1477,7 @@ public static unsafe Vector128 GatherMaskVector128(Vector128 sourc } /// /// __m128d _mm_mask_i64gather_pd (__m128d src, double const* base_addr, __m128i vindex, __m128d mask, const int scale) - /// VGATHERQPD xmm, vm64x, xmm + /// VGATHERQPD xmm1, vm64x, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, double* baseAddress, Vector128 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1426,7 +1493,7 @@ public static unsafe Vector128 GatherMaskVector128(Vector128 sou } /// /// __m256i _mm256_mask_i32gather_epi32 (__m256i src, int const* base_addr, __m256i vindex, __m256i mask, const int scale) - /// VPGATHERDD ymm, vm32y, ymm + /// VPGATHERDD ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherMaskVector256(Vector256 source, int* baseAddress, Vector256 index, Vector256 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1442,7 +1509,7 @@ public static unsafe Vector256 GatherMaskVector256(Vector256 source, i } /// /// __m256i _mm256_mask_i32gather_epi32 (__m256i src, int const* base_addr, __m256i vindex, __m256i mask, const int scale) - /// VPGATHERDD ymm, vm32y, ymm + /// VPGATHERDD ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherMaskVector256(Vector256 source, uint* baseAddress, Vector256 index, Vector256 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1458,7 +1525,7 @@ public static unsafe Vector256 GatherMaskVector256(Vector256 source, } /// /// __m256i _mm256_mask_i32gather_epi64 (__m256i src, __int64 const* base_addr, __m128i vindex, __m256i mask, const int scale) - /// VPGATHERDQ ymm, vm32y, ymm + /// VPGATHERDQ ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherMaskVector256(Vector256 source, long* baseAddress, Vector128 index, Vector256 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1474,7 +1541,7 @@ public static unsafe Vector256 GatherMaskVector256(Vector256 source, } /// /// __m256i _mm256_mask_i32gather_epi64 (__m256i src, __int64 const* base_addr, __m128i vindex, __m256i mask, const int scale) - /// VPGATHERDQ ymm, vm32y, ymm + /// VPGATHERDQ ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherMaskVector256(Vector256 source, ulong* baseAddress, Vector128 index, Vector256 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1490,7 +1557,7 @@ public static unsafe Vector256 GatherMaskVector256(Vector256 sourc } /// /// __m256 _mm256_mask_i32gather_ps (__m256 src, float const* base_addr, __m256i vindex, __m256 mask, const int scale) - /// VPGATHERDPS ymm, vm32y, ymm + /// VPGATHERDPS ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherMaskVector256(Vector256 source, float* baseAddress, Vector256 index, Vector256 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1506,7 +1573,7 @@ public static unsafe Vector256 GatherMaskVector256(Vector256 sourc } /// /// __m256d _mm256_mask_i32gather_pd (__m256d src, double const* base_addr, __m128i vindex, __m256d mask, const int scale) - /// VPGATHERDPD ymm, vm32y, ymm + /// VPGATHERDPD ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherMaskVector256(Vector256 source, double* baseAddress, Vector128 index, Vector256 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1522,7 +1589,7 @@ public static unsafe Vector256 GatherMaskVector256(Vector256 sou } /// /// __m128i _mm256_mask_i64gather_epi32 (__m128i src, int const* base_addr, __m256i vindex, __m128i mask, const int scale) - /// VPGATHERQD xmm, vm32y, xmm + /// VPGATHERQD xmm1, vm32y, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, int* baseAddress, Vector256 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1538,7 +1605,7 @@ public static unsafe Vector128 GatherMaskVector128(Vector128 source, i } /// /// __m128i _mm256_mask_i64gather_epi32 (__m128i src, int const* base_addr, __m256i vindex, __m128i mask, const int scale) - /// VPGATHERQD xmm, vm32y, xmm + /// VPGATHERQD xmm1, vm32y, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, uint* baseAddress, Vector256 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1554,7 +1621,7 @@ public static unsafe Vector128 GatherMaskVector128(Vector128 source, } /// /// __m256i _mm256_mask_i64gather_epi64 (__m256i src, __int64 const* base_addr, __m256i vindex, __m256i mask, const int scale) - /// VPGATHERQQ ymm, vm32y, ymm + /// VPGATHERQQ ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherMaskVector256(Vector256 source, long* baseAddress, Vector256 index, Vector256 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1570,7 +1637,7 @@ public static unsafe Vector256 GatherMaskVector256(Vector256 source, } /// /// __m256i _mm256_mask_i64gather_epi64 (__m256i src, __int64 const* base_addr, __m256i vindex, __m256i mask, const int scale) - /// VPGATHERQQ ymm, vm32y, ymm + /// VPGATHERQQ ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherMaskVector256(Vector256 source, ulong* baseAddress, Vector256 index, Vector256 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1586,7 +1653,7 @@ public static unsafe Vector256 GatherMaskVector256(Vector256 sourc } /// /// __m128 _mm256_mask_i64gather_ps (__m128 src, float const* base_addr, __m256i vindex, __m128 mask, const int scale) - /// VGATHERQPS xmm, vm32y, xmm + /// VGATHERQPS xmm1, vm32y, xmm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector128 GatherMaskVector128(Vector128 source, float* baseAddress, Vector256 index, Vector128 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1602,7 +1669,7 @@ public static unsafe Vector128 GatherMaskVector128(Vector128 sourc } /// /// __m256d _mm256_mask_i64gather_pd (__m256d src, double const* base_addr, __m256i vindex, __m256d mask, const int scale) - /// VGATHERQPD ymm, vm32y, ymm + /// VGATHERQPD ymm1, vm32y, ymm2 /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. /// public static unsafe Vector256 GatherMaskVector256(Vector256 source, double* baseAddress, Vector256 index, Vector256 mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) @@ -1619,1058 +1686,1189 @@ public static unsafe Vector256 GatherMaskVector256(Vector256 sou /// /// __m256i _mm256_hadd_epi16 (__m256i a, __m256i b) - /// VPHADDW ymm, ymm, ymm/m256 + /// VPHADDW ymm1, ymm2, ymm3/m256 /// public static Vector256 HorizontalAdd(Vector256 left, Vector256 right) => HorizontalAdd(left, right); /// /// __m256i _mm256_hadd_epi32 (__m256i a, __m256i b) - /// VPHADDD ymm, ymm, ymm/m256 + /// VPHADDD ymm1, ymm2, ymm3/m256 /// public static Vector256 HorizontalAdd(Vector256 left, Vector256 right) => HorizontalAdd(left, right); /// /// __m256i _mm256_hadds_epi16 (__m256i a, __m256i b) - /// VPHADDSW ymm, ymm, ymm/m256 + /// VPHADDSW ymm1, ymm2, ymm3/m256 /// public static Vector256 HorizontalAddSaturate(Vector256 left, Vector256 right) => HorizontalAddSaturate(left, right); /// /// __m256i _mm256_hsub_epi16 (__m256i a, __m256i b) - /// VPHSUBW ymm, ymm, ymm/m256 + /// VPHSUBW ymm1, ymm2, ymm3/m256 /// public static Vector256 HorizontalSubtract(Vector256 left, Vector256 right) => HorizontalSubtract(left, right); /// /// __m256i _mm256_hsub_epi32 (__m256i a, __m256i b) - /// VPHSUBD ymm, ymm, ymm/m256 + /// VPHSUBD ymm1, ymm2, ymm3/m256 /// public static Vector256 HorizontalSubtract(Vector256 left, Vector256 right) => HorizontalSubtract(left, right); /// /// __m256i _mm256_hsubs_epi16 (__m256i a, __m256i b) - /// VPHSUBSW ymm, ymm, ymm/m256 + /// VPHSUBSW ymm1, ymm2, ymm3/m256 /// public static Vector256 HorizontalSubtractSaturate(Vector256 left, Vector256 right) => HorizontalSubtractSaturate(left, right); /// /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8) - /// VINSERTI128 ymm, ymm, xmm, imm8 + /// VINSERTI128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static new Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) => InsertVector128(value, data, index); - /// /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8) - /// VINSERTI128 ymm, ymm, xmm, imm8 + /// VINSERTI128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static new Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) => InsertVector128(value, data, index); - /// /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8) - /// VINSERTI128 ymm, ymm, xmm, imm8 + /// VINSERTI128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static new Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) => InsertVector128(value, data, index); - /// /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8) - /// VINSERTI128 ymm, ymm, xmm, imm8 + /// VINSERTI128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static new Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) => InsertVector128(value, data, index); - /// /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8) - /// VINSERTI128 ymm, ymm, xmm, imm8 + /// VINSERTI128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static new Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) => InsertVector128(value, data, index); - /// /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8) - /// VINSERTI128 ymm, ymm, xmm, imm8 + /// VINSERTI128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static new Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) => InsertVector128(value, data, index); - /// /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8) - /// VINSERTI128 ymm, ymm, xmm, imm8 + /// VINSERTI128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTI64x2 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static new Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) => InsertVector128(value, data, index); - /// /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8) - /// VINSERTI128 ymm, ymm, xmm, imm8 + /// VINSERTI128 ymm1, ymm2, xmm3/m128, imm8 + /// VINSERTI64x2 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8 /// public static new Vector256 InsertVector128(Vector256 value, Vector128 data, [ConstantExpected] byte index) => InsertVector128(value, data, index); /// /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr) - /// VMOVNTDQA ymm, m256 + /// VMOVNTDQA ymm1, m256 /// public static unsafe Vector256 LoadAlignedVector256NonTemporal(sbyte* address) => LoadAlignedVector256NonTemporal(address); /// /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr) - /// VMOVNTDQA ymm, m256 + /// VMOVNTDQA ymm1, m256 /// public static unsafe Vector256 LoadAlignedVector256NonTemporal(byte* address) => LoadAlignedVector256NonTemporal(address); /// /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr) - /// VMOVNTDQA ymm, m256 + /// VMOVNTDQA ymm1, m256 /// public static unsafe Vector256 LoadAlignedVector256NonTemporal(short* address) => LoadAlignedVector256NonTemporal(address); /// /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr) - /// VMOVNTDQA ymm, m256 + /// VMOVNTDQA ymm1, m256 /// public static unsafe Vector256 LoadAlignedVector256NonTemporal(ushort* address) => LoadAlignedVector256NonTemporal(address); /// /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr) - /// VMOVNTDQA ymm, m256 + /// VMOVNTDQA ymm1, m256 /// public static unsafe Vector256 LoadAlignedVector256NonTemporal(int* address) => LoadAlignedVector256NonTemporal(address); /// /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr) - /// VMOVNTDQA ymm, m256 + /// VMOVNTDQA ymm1, m256 /// public static unsafe Vector256 LoadAlignedVector256NonTemporal(uint* address) => LoadAlignedVector256NonTemporal(address); /// /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr) - /// VMOVNTDQA ymm, m256 + /// VMOVNTDQA ymm1, m256 /// public static unsafe Vector256 LoadAlignedVector256NonTemporal(long* address) => LoadAlignedVector256NonTemporal(address); /// /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr) - /// VMOVNTDQA ymm, m256 + /// VMOVNTDQA ymm1, m256 /// public static unsafe Vector256 LoadAlignedVector256NonTemporal(ulong* address) => LoadAlignedVector256NonTemporal(address); /// /// __m128i _mm_maskload_epi32 (int const* mem_addr, __m128i mask) - /// VPMASKMOVD xmm, xmm, m128 + /// VPMASKMOVD xmm1, xmm2, m128 /// public static unsafe Vector128 MaskLoad(int* address, Vector128 mask) => MaskLoad(address, mask); /// /// __m128i _mm_maskload_epi32 (int const* mem_addr, __m128i mask) - /// VPMASKMOVD xmm, xmm, m128 + /// VPMASKMOVD xmm1, xmm2, m128 /// public static unsafe Vector128 MaskLoad(uint* address, Vector128 mask) => MaskLoad(address, mask); /// /// __m128i _mm_maskload_epi64 (__int64 const* mem_addr, __m128i mask) - /// VPMASKMOVQ xmm, xmm, m128 + /// VPMASKMOVQ xmm1, xmm2, m128 /// public static unsafe Vector128 MaskLoad(long* address, Vector128 mask) => MaskLoad(address, mask); /// /// __m128i _mm_maskload_epi64 (__int64 const* mem_addr, __m128i mask) - /// VPMASKMOVQ xmm, xmm, m128 + /// VPMASKMOVQ xmm1, xmm2, m128 /// public static unsafe Vector128 MaskLoad(ulong* address, Vector128 mask) => MaskLoad(address, mask); - /// /// __m256i _mm256_maskload_epi32 (int const* mem_addr, __m256i mask) - /// VPMASKMOVD ymm, ymm, m256 + /// VPMASKMOVD ymm1, ymm2, m256 /// public static unsafe Vector256 MaskLoad(int* address, Vector256 mask) => MaskLoad(address, mask); /// /// __m256i _mm256_maskload_epi32 (int const* mem_addr, __m256i mask) - /// VPMASKMOVD ymm, ymm, m256 + /// VPMASKMOVD ymm1, ymm2, m256 /// public static unsafe Vector256 MaskLoad(uint* address, Vector256 mask) => MaskLoad(address, mask); /// /// __m256i _mm256_maskload_epi64 (__int64 const* mem_addr, __m256i mask) - /// VPMASKMOVQ ymm, ymm, m256 + /// VPMASKMOVQ ymm1, ymm2, m256 /// public static unsafe Vector256 MaskLoad(long* address, Vector256 mask) => MaskLoad(address, mask); /// /// __m256i _mm256_maskload_epi64 (__int64 const* mem_addr, __m256i mask) - /// VPMASKMOVQ ymm, ymm, m256 + /// VPMASKMOVQ ymm1, ymm2, m256 /// public static unsafe Vector256 MaskLoad(ulong* address, Vector256 mask) => MaskLoad(address, mask); /// /// void _mm_maskstore_epi32 (int* mem_addr, __m128i mask, __m128i a) - /// VPMASKMOVD m128, xmm, xmm + /// VPMASKMOVD m128, xmm1, xmm2 /// public static unsafe void MaskStore(int* address, Vector128 mask, Vector128 source) => MaskStore(address, mask, source); /// /// void _mm_maskstore_epi32 (int* mem_addr, __m128i mask, __m128i a) - /// VPMASKMOVD m128, xmm, xmm + /// VPMASKMOVD m128, xmm1, xmm2 /// public static unsafe void MaskStore(uint* address, Vector128 mask, Vector128 source) => MaskStore(address, mask, source); /// /// void _mm_maskstore_epi64 (__int64* mem_addr, __m128i mask, __m128i a) - /// VPMASKMOVQ m128, xmm, xmm + /// VPMASKMOVQ m128, xmm1, xmm2 /// public static unsafe void MaskStore(long* address, Vector128 mask, Vector128 source) => MaskStore(address, mask, source); /// /// void _mm_maskstore_epi64 (__int64* mem_addr, __m128i mask, __m128i a) - /// VPMASKMOVQ m128, xmm, xmm + /// VPMASKMOVQ m128, xmm1, xmm2 /// public static unsafe void MaskStore(ulong* address, Vector128 mask, Vector128 source) => MaskStore(address, mask, source); - /// /// void _mm256_maskstore_epi32 (int* mem_addr, __m256i mask, __m256i a) - /// VPMASKMOVD m256, ymm, ymm + /// VPMASKMOVD m256, ymm1, ymm2 /// public static unsafe void MaskStore(int* address, Vector256 mask, Vector256 source) => MaskStore(address, mask, source); /// /// void _mm256_maskstore_epi32 (int* mem_addr, __m256i mask, __m256i a) - /// VPMASKMOVD m256, ymm, ymm + /// VPMASKMOVD m256, ymm1, ymm2 /// public static unsafe void MaskStore(uint* address, Vector256 mask, Vector256 source) => MaskStore(address, mask, source); /// /// void _mm256_maskstore_epi64 (__int64* mem_addr, __m256i mask, __m256i a) - /// VPMASKMOVQ m256, ymm, ymm + /// VPMASKMOVQ m256, ymm1, ymm2 /// public static unsafe void MaskStore(long* address, Vector256 mask, Vector256 source) => MaskStore(address, mask, source); /// /// void _mm256_maskstore_epi64 (__int64* mem_addr, __m256i mask, __m256i a) - /// VPMASKMOVQ m256, ymm, ymm + /// VPMASKMOVQ m256, ymm1, ymm2 /// public static unsafe void MaskStore(ulong* address, Vector256 mask, Vector256 source) => MaskStore(address, mask, source); /// /// __m256i _mm256_madd_epi16 (__m256i a, __m256i b) - /// VPMADDWD ymm, ymm, ymm/m256 + /// VPMADDWD ymm1, ymm2, ymm3/m256 + /// VPMADDWD ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 MultiplyAddAdjacent(Vector256 left, Vector256 right) => MultiplyAddAdjacent(left, right); - /// /// __m256i _mm256_maddubs_epi16 (__m256i a, __m256i b) - /// VPMADDUBSW ymm, ymm, ymm/m256 + /// VPMADDUBSW ymm1, ymm2, ymm3/m256 + /// VPMADDUBSW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 MultiplyAddAdjacent(Vector256 left, Vector256 right) => MultiplyAddAdjacent(left, right); /// /// __m256i _mm256_max_epi8 (__m256i a, __m256i b) - /// VPMAXSB ymm, ymm, ymm/m256 + /// VPMAXSB ymm1, ymm2, ymm3/m256 + /// VPMAXSB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Max(Vector256 left, Vector256 right) => Max(left, right); /// /// __m256i _mm256_max_epu8 (__m256i a, __m256i b) - /// VPMAXUB ymm, ymm, ymm/m256 + /// VPMAXUB ymm1, ymm2, ymm3/m256 + /// VPMAXUB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Max(Vector256 left, Vector256 right) => Max(left, right); /// /// __m256i _mm256_max_epi16 (__m256i a, __m256i b) - /// VPMAXSW ymm, ymm, ymm/m256 + /// VPMAXSW ymm1, ymm2, ymm3/m256 + /// VPMAXSW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Max(Vector256 left, Vector256 right) => Max(left, right); /// /// __m256i _mm256_max_epu16 (__m256i a, __m256i b) - /// VPMAXUW ymm, ymm, ymm/m256 + /// VPMAXUW ymm1, ymm2, ymm3/m256 + /// VPMAXUW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Max(Vector256 left, Vector256 right) => Max(left, right); /// /// __m256i _mm256_max_epi32 (__m256i a, __m256i b) - /// VPMAXSD ymm, ymm, ymm/m256 + /// VPMAXSD ymm1, ymm2, ymm3/m256 + /// VPMAXSD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Max(Vector256 left, Vector256 right) => Max(left, right); /// /// __m256i _mm256_max_epu32 (__m256i a, __m256i b) - /// VPMAXUD ymm, ymm, ymm/m256 + /// VPMAXUD ymm1, ymm2, ymm3/m256 + /// VPMAXUD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Max(Vector256 left, Vector256 right) => Max(left, right); /// /// __m256i _mm256_min_epi8 (__m256i a, __m256i b) - /// VPMINSB ymm, ymm, ymm/m256 + /// VPMINSB ymm1, ymm2, ymm3/m256 + /// VPMINSB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Min(Vector256 left, Vector256 right) => Min(left, right); /// /// __m256i _mm256_min_epu8 (__m256i a, __m256i b) - /// VPMINUB ymm, ymm, ymm/m256 + /// VPMINUB ymm1, ymm2, ymm3/m256 + /// VPMINUB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Min(Vector256 left, Vector256 right) => Min(left, right); /// /// __m256i _mm256_min_epi16 (__m256i a, __m256i b) - /// VPMINSW ymm, ymm, ymm/m256 + /// VPMINSW ymm1, ymm2, ymm3/m256 + /// VPMINSW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Min(Vector256 left, Vector256 right) => Min(left, right); /// /// __m256i _mm256_min_epu16 (__m256i a, __m256i b) - /// VPMINUW ymm, ymm, ymm/m256 + /// VPMINUW ymm1, ymm2, ymm3/m256 + /// VPMINUW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Min(Vector256 left, Vector256 right) => Min(left, right); /// /// __m256i _mm256_min_epi32 (__m256i a, __m256i b) - /// VPMINSD ymm, ymm, ymm/m256 + /// VPMINSD ymm1, ymm2, ymm3/m256 + /// VPMINSD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Min(Vector256 left, Vector256 right) => Min(left, right); /// /// __m256i _mm256_min_epu32 (__m256i a, __m256i b) - /// VPMINUD ymm, ymm, ymm/m256 + /// VPMINUD ymm1, ymm2, ymm3/m256 + /// VPMINUD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Min(Vector256 left, Vector256 right) => Min(left, right); /// /// int _mm256_movemask_epi8 (__m256i a) - /// VPMOVMSKB reg, ymm + /// VPMOVMSKB r32, ymm1 /// public static int MoveMask(Vector256 value) => MoveMask(value); /// /// int _mm256_movemask_epi8 (__m256i a) - /// VPMOVMSKB reg, ymm + /// VPMOVMSKB r32, ymm1 /// public static int MoveMask(Vector256 value) => MoveMask(value); /// /// __m256i _mm256_mpsadbw_epu8 (__m256i a, __m256i b, const int imm8) - /// VMPSADBW ymm, ymm, ymm/m256, imm8 + /// VMPSADBW ymm1, ymm2, ymm3/m256, imm8 /// public static Vector256 MultipleSumAbsoluteDifferences(Vector256 left, Vector256 right, [ConstantExpected] byte mask) => MultipleSumAbsoluteDifferences(left, right, mask); /// /// __m256i _mm256_mul_epi32 (__m256i a, __m256i b) - /// VPMULDQ ymm, ymm, ymm/m256 + /// VPMULDQ ymm1, ymm2, ymm3/m256 + /// VPMULDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Multiply(Vector256 left, Vector256 right) => Multiply(left, right); /// /// __m256i _mm256_mul_epu32 (__m256i a, __m256i b) - /// VPMULUDQ ymm, ymm, ymm/m256 + /// VPMULUDQ ymm1, ymm2, ymm3/m256 + /// VPMULUDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Multiply(Vector256 left, Vector256 right) => Multiply(left, right); /// /// __m256i _mm256_mulhi_epi16 (__m256i a, __m256i b) - /// VPMULHW ymm, ymm, ymm/m256 + /// VPMULHW ymm1, ymm2, ymm3/m256 + /// VPMULHW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 MultiplyHigh(Vector256 left, Vector256 right) => MultiplyHigh(left, right); /// /// __m256i _mm256_mulhi_epu16 (__m256i a, __m256i b) - /// VPMULHUW ymm, ymm, ymm/m256 + /// VPMULHUW ymm1, ymm2, ymm3/m256 + /// VPMULHUW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 MultiplyHigh(Vector256 left, Vector256 right) => MultiplyHigh(left, right); /// /// __m256i _mm256_mulhrs_epi16 (__m256i a, __m256i b) - /// VPMULHRSW ymm, ymm, ymm/m256 + /// VPMULHRSW ymm1, ymm2, ymm3/m256 + /// VPMULHRSW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 MultiplyHighRoundScale(Vector256 left, Vector256 right) => MultiplyHighRoundScale(left, right); /// /// __m256i _mm256_mullo_epi16 (__m256i a, __m256i b) - /// VPMULLW ymm, ymm, ymm/m256 + /// VPMULLW ymm1, ymm2, ymm3/m256 + /// VPMULLW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 MultiplyLow(Vector256 left, Vector256 right) => MultiplyLow(left, right); /// /// __m256i _mm256_mullo_epi16 (__m256i a, __m256i b) - /// VPMULLW ymm, ymm, ymm/m256 + /// VPMULLW ymm1, ymm2, ymm3/m256 + /// VPMULLW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 MultiplyLow(Vector256 left, Vector256 right) => MultiplyLow(left, right); - /// /// __m256i _mm256_mullo_epi32 (__m256i a, __m256i b) - /// VPMULLD ymm, ymm, ymm/m256 + /// VPMULLD ymm1, ymm2, ymm3/m256 + /// VPMULLD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 MultiplyLow(Vector256 left, Vector256 right) => MultiplyLow(left, right); /// /// __m256i _mm256_mullo_epi32 (__m256i a, __m256i b) - /// VPMULLD ymm, ymm, ymm/m256 + /// VPMULLD ymm1, ymm2, ymm3/m256 + /// VPMULLD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 MultiplyLow(Vector256 left, Vector256 right) => MultiplyLow(left, right); /// /// __m256i _mm256_or_si256 (__m256i a, __m256i b) - /// VPOR ymm, ymm, ymm/m256 + /// VPOR ymm1, ymm2, ymm3/m256 /// public static Vector256 Or(Vector256 left, Vector256 right) => Or(left, right); /// /// __m256i _mm256_or_si256 (__m256i a, __m256i b) - /// VPOR ymm, ymm, ymm/m256 + /// VPOR ymm1, ymm2, ymm3/m256 /// public static Vector256 Or(Vector256 left, Vector256 right) => Or(left, right); /// /// __m256i _mm256_or_si256 (__m256i a, __m256i b) - /// VPOR ymm, ymm, ymm/m256 + /// VPOR ymm1, ymm2, ymm3/m256 /// public static Vector256 Or(Vector256 left, Vector256 right) => Or(left, right); /// /// __m256i _mm256_or_si256 (__m256i a, __m256i b) - /// VPOR ymm, ymm, ymm/m256 + /// VPOR ymm1, ymm2, ymm3/m256 /// public static Vector256 Or(Vector256 left, Vector256 right) => Or(left, right); /// /// __m256i _mm256_or_si256 (__m256i a, __m256i b) - /// VPOR ymm, ymm, ymm/m256 + /// VPOR ymm1, ymm2, ymm3/m256 + /// VPORD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Or(Vector256 left, Vector256 right) => Or(left, right); /// /// __m256i _mm256_or_si256 (__m256i a, __m256i b) - /// VPOR ymm, ymm, ymm/m256 + /// VPOR ymm1, ymm2, ymm3/m256 + /// VPORD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Or(Vector256 left, Vector256 right) => Or(left, right); /// /// __m256i _mm256_or_si256 (__m256i a, __m256i b) - /// VPOR ymm, ymm, ymm/m256 + /// VPOR ymm1, ymm2, ymm3/m256 + /// VPORQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Or(Vector256 left, Vector256 right) => Or(left, right); /// /// __m256i _mm256_or_si256 (__m256i a, __m256i b) - /// VPOR ymm, ymm, ymm/m256 + /// VPOR ymm1, ymm2, ymm3/m256 + /// VPORQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Or(Vector256 left, Vector256 right) => Or(left, right); /// /// __m256i _mm256_packs_epi16 (__m256i a, __m256i b) - /// VPACKSSWB ymm, ymm, ymm/m256 + /// VPACKSSWB ymm1, ymm2, ymm3/m256 + /// VPACKSSWB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 PackSignedSaturate(Vector256 left, Vector256 right) => PackSignedSaturate(left, right); /// /// __m256i _mm256_packs_epi32 (__m256i a, __m256i b) - /// VPACKSSDW ymm, ymm, ymm/m256 + /// VPACKSSDW ymm1, ymm2, ymm3/m256 + /// VPACKSSDW ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 PackSignedSaturate(Vector256 left, Vector256 right) => PackSignedSaturate(left, right); + /// /// __m256i _mm256_packus_epi16 (__m256i a, __m256i b) - /// VPACKUSWB ymm, ymm, ymm/m256 + /// VPACKUSWB ymm1, ymm2, ymm3/m256 + /// VPACKUSWB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 PackUnsignedSaturate(Vector256 left, Vector256 right) => PackUnsignedSaturate(left, right); /// /// __m256i _mm256_packus_epi32 (__m256i a, __m256i b) - /// VPACKUSDW ymm, ymm, ymm/m256 + /// VPACKUSDW ymm1, ymm2, ymm3/m256 + /// VPACKUSDW ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 PackUnsignedSaturate(Vector256 left, Vector256 right) => PackUnsignedSaturate(left, right); /// /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8) - /// VPERM2I128 ymm, ymm, ymm/m256, imm8 + /// VPERM2I128 ymm1, ymm2, ymm3/m256, imm8 /// public static new Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Permute2x128(left, right, control); /// /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8) - /// VPERM2I128 ymm, ymm, ymm/m256, imm8 + /// VPERM2I128 ymm1, ymm2, ymm3/m256, imm8 /// public static new Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Permute2x128(left, right, control); /// /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8) - /// VPERM2I128 ymm, ymm, ymm/m256, imm8 + /// VPERM2I128 ymm1, ymm2, ymm3/m256, imm8 /// public static new Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Permute2x128(left, right, control); /// /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8) - /// VPERM2I128 ymm, ymm, ymm/m256, imm8 + /// VPERM2I128 ymm1, ymm2, ymm3/m256, imm8 /// public static new Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Permute2x128(left, right, control); /// /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8) - /// VPERM2I128 ymm, ymm, ymm/m256, imm8 + /// VPERM2I128 ymm1, ymm2, ymm3/m256, imm8 /// public static new Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Permute2x128(left, right, control); /// /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8) - /// VPERM2I128 ymm, ymm, ymm/m256, imm8 + /// VPERM2I128 ymm1, ymm2, ymm3/m256, imm8 /// public static new Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Permute2x128(left, right, control); /// /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8) - /// VPERM2I128 ymm, ymm, ymm/m256, imm8 + /// VPERM2I128 ymm1, ymm2, ymm3/m256, imm8 /// public static new Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Permute2x128(left, right, control); /// /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8) - /// VPERM2I128 ymm, ymm, ymm/m256, imm8 + /// VPERM2I128 ymm1, ymm2, ymm3/m256, imm8 /// public static new Vector256 Permute2x128(Vector256 left, Vector256 right, [ConstantExpected] byte control) => Permute2x128(left, right, control); /// /// __m256i _mm256_permute4x64_epi64 (__m256i a, const int imm8) - /// VPERMQ ymm, ymm/m256, imm8 + /// VPERMQ ymm1, ymm2/m256, imm8 + /// VPERMQ ymm1 {k1}{z}, ymm2/m256/m64bcst, imm8 /// public static Vector256 Permute4x64(Vector256 value, [ConstantExpected] byte control) => Permute4x64(value, control); /// /// __m256i _mm256_permute4x64_epi64 (__m256i a, const int imm8) - /// VPERMQ ymm, ymm/m256, imm8 + /// VPERMQ ymm1, ymm2/m256, imm8 + /// VPERMQ ymm1 {k1}{z}, ymm2/m256/m64bcst, imm8 /// public static Vector256 Permute4x64(Vector256 value, [ConstantExpected] byte control) => Permute4x64(value, control); /// /// __m256d _mm256_permute4x64_pd (__m256d a, const int imm8) - /// VPERMPD ymm, ymm/m256, imm8 + /// VPERMPD ymm1, ymm2/m256, imm8 + /// VPERMPD ymm1 {k1}{z}, ymm2/m256/m64bcst, imm8 /// public static Vector256 Permute4x64(Vector256 value, [ConstantExpected] byte control) => Permute4x64(value, control); /// /// __m256i _mm256_permutevar8x32_epi32 (__m256i a, __m256i idx) - /// VPERMD ymm, ymm/m256, ymm + /// VPERMD ymm1, ymm2/m256, imm8 + /// VPERMD ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8 /// public static Vector256 PermuteVar8x32(Vector256 left, Vector256 control) => PermuteVar8x32(left, control); /// /// __m256i _mm256_permutevar8x32_epi32 (__m256i a, __m256i idx) - /// VPERMD ymm, ymm/m256, ymm + /// VPERMD ymm1, ymm2/m256, imm8 + /// VPERMD ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8 /// public static Vector256 PermuteVar8x32(Vector256 left, Vector256 control) => PermuteVar8x32(left, control); /// /// __m256 _mm256_permutevar8x32_ps (__m256 a, __m256i idx) - /// VPERMPS ymm, ymm/m256, ymm + /// VPERMPS ymm1, ymm2/m256, imm8 + /// VPERMPS ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8 /// public static Vector256 PermuteVar8x32(Vector256 left, Vector256 control) => PermuteVar8x32(left, control); /// /// __m256i _mm256_sll_epi16 (__m256i a, __m128i count) - /// VPSLLW ymm, ymm, xmm/m128 + /// VPSLLW ymm1, ymm2, xmm3/m128 + /// VPSLLW ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftLeftLogical(Vector256 value, Vector128 count) => ShiftLeftLogical(value, count); /// /// __m256i _mm256_sll_epi16 (__m256i a, __m128i count) - /// VPSLLW ymm, ymm, xmm/m128 + /// VPSLLW ymm1, ymm2, xmm3/m128 + /// VPSLLW ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftLeftLogical(Vector256 value, Vector128 count) => ShiftLeftLogical(value, count); /// /// __m256i _mm256_sll_epi32 (__m256i a, __m128i count) - /// VPSLLD ymm, ymm, xmm/m128 + /// VPSLLD ymm1, ymm2, xmm3/m128 + /// VPSLLD ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftLeftLogical(Vector256 value, Vector128 count) => ShiftLeftLogical(value, count); /// /// __m256i _mm256_sll_epi32 (__m256i a, __m128i count) - /// VPSLLD ymm, ymm, xmm/m128 + /// VPSLLD ymm1, ymm2, xmm3/m128 + /// VPSLLD ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftLeftLogical(Vector256 value, Vector128 count) => ShiftLeftLogical(value, count); /// /// __m256i _mm256_sll_epi64 (__m256i a, __m128i count) - /// VPSLLQ ymm, ymm, xmm/m128 + /// VPSLLQ ymm1, ymm2, xmm3/m128 + /// VPSLLQ ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftLeftLogical(Vector256 value, Vector128 count) => ShiftLeftLogical(value, count); /// /// __m256i _mm256_sll_epi64 (__m256i a, __m128i count) - /// VPSLLQ ymm, ymm, xmm/m128 + /// VPSLLQ ymm1, ymm2, xmm3/m128 + /// VPSLLQ ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftLeftLogical(Vector256 value, Vector128 count) => ShiftLeftLogical(value, count); /// /// __m256i _mm256_slli_epi16 (__m256i a, int imm8) - /// VPSLLW ymm, ymm, imm8 + /// VPSLLW ymm1, ymm2, imm8 + /// VPSLLW ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftLeftLogical(Vector256 value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count); /// /// __m256i _mm256_slli_epi16 (__m256i a, int imm8) - /// VPSLLW ymm, ymm, imm8 + /// VPSLLW ymm1, ymm2, imm8 + /// VPSLLW ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftLeftLogical(Vector256 value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count); /// /// __m256i _mm256_slli_epi32 (__m256i a, int imm8) - /// VPSLLD ymm, ymm, imm8 + /// VPSLLD ymm1, ymm2, imm8 + /// VPSLLD ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftLeftLogical(Vector256 value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count); /// /// __m256i _mm256_slli_epi32 (__m256i a, int imm8) - /// VPSLLD ymm, ymm, imm8 + /// VPSLLD ymm1, ymm2, imm8 + /// VPSLLD ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftLeftLogical(Vector256 value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count); /// /// __m256i _mm256_slli_epi64 (__m256i a, int imm8) - /// VPSLLQ ymm, ymm, imm8 + /// VPSLLQ ymm1, ymm2, imm8 + /// VPSLLQ ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftLeftLogical(Vector256 value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count); /// /// __m256i _mm256_slli_epi64 (__m256i a, int imm8) - /// VPSLLQ ymm, ymm, imm8 + /// VPSLLQ ymm1, ymm2, imm8 + /// VPSLLQ ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftLeftLogical(Vector256 value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count); /// /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8) - /// VPSLLDQ ymm, ymm, imm8 + /// VPSLLDQ ymm1, ymm2/m256, imm8 /// public static Vector256 ShiftLeftLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes); /// /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8) - /// VPSLLDQ ymm, ymm, imm8 + /// VPSLLDQ ymm1, ymm2/m256, imm8 /// public static Vector256 ShiftLeftLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes); /// /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8) - /// VPSLLDQ ymm, ymm, imm8 + /// VPSLLDQ ymm1, ymm2/m256, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftLeftLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes); /// /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8) - /// VPSLLDQ ymm, ymm, imm8 + /// VPSLLDQ ymm1, ymm2/m256, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftLeftLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes); /// /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8) - /// VPSLLDQ ymm, ymm, imm8 + /// VPSLLDQ ymm1, ymm2/m256, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftLeftLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes); /// /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8) - /// VPSLLDQ ymm, ymm, imm8 + /// VPSLLDQ ymm1, ymm2/m256, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftLeftLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes); /// /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8) - /// VPSLLDQ ymm, ymm, imm8 + /// VPSLLDQ ymm1, ymm2/m256, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftLeftLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes); /// /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8) - /// VPSLLDQ ymm, ymm, imm8 + /// VPSLLDQ ymm1, ymm2/m256, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftLeftLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes); - /// - /// __m256i _mm256_sllv_epi32 (__m256i a, __m256i count) - /// VPSLLVD ymm, ymm, ymm/m256 - /// - public static Vector256 ShiftLeftLogicalVariable(Vector256 value, Vector256 count) => ShiftLeftLogicalVariable(value, count); - /// - /// __m256i _mm256_sllv_epi32 (__m256i a, __m256i count) - /// VPSLLVD ymm, ymm, ymm/m256 - /// - public static Vector256 ShiftLeftLogicalVariable(Vector256 value, Vector256 count) => ShiftLeftLogicalVariable(value, count); - /// - /// __m256i _mm256_sllv_epi64 (__m256i a, __m256i count) - /// VPSLLVQ ymm, ymm, ymm/m256 - /// - public static Vector256 ShiftLeftLogicalVariable(Vector256 value, Vector256 count) => ShiftLeftLogicalVariable(value, count); - /// - /// __m256i _mm256_sllv_epi64 (__m256i a, __m256i count) - /// VPSLLVQ ymm, ymm, ymm/m256 - /// - public static Vector256 ShiftLeftLogicalVariable(Vector256 value, Vector256 count) => ShiftLeftLogicalVariable(value, count); - /// /// __m128i _mm_sllv_epi32 (__m128i a, __m128i count) - /// VPSLLVD xmm, xmm, xmm/m128 + /// VPSLLVD xmm1, xmm2, xmm3/m128 + /// VPSLLVD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 ShiftLeftLogicalVariable(Vector128 value, Vector128 count) => ShiftLeftLogicalVariable(value, count); /// /// __m128i _mm_sllv_epi32 (__m128i a, __m128i count) - /// VPSLLVD xmm, xmm, xmm/m128 + /// VPSLLVD xmm1, xmm2, xmm3/m128 + /// VPSLLVD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 ShiftLeftLogicalVariable(Vector128 value, Vector128 count) => ShiftLeftLogicalVariable(value, count); /// /// __m128i _mm_sllv_epi64 (__m128i a, __m128i count) - /// VPSLLVQ xmm, xmm, xmm/m128 + /// VPSLLVQ xmm1, xmm2, xmm3/m128 + /// VPSLLVQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 ShiftLeftLogicalVariable(Vector128 value, Vector128 count) => ShiftLeftLogicalVariable(value, count); /// /// __m128i _mm_sllv_epi64 (__m128i a, __m128i count) - /// VPSLLVQ xmm, xmm, xmm/m128 + /// VPSLLVQ xmm1, xmm2, xmm3/m128 + /// VPSLLVQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 ShiftLeftLogicalVariable(Vector128 value, Vector128 count) => ShiftLeftLogicalVariable(value, count); + /// + /// __m256i _mm256_sllv_epi32 (__m256i a, __m256i count) + /// VPSLLVD ymm1, ymm2, ymm3/m256 + /// VPSLLVD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst + /// + public static Vector256 ShiftLeftLogicalVariable(Vector256 value, Vector256 count) => ShiftLeftLogicalVariable(value, count); + /// + /// __m256i _mm256_sllv_epi32 (__m256i a, __m256i count) + /// VPSLLVD ymm1, ymm2, ymm3/m256 + /// VPSLLVD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst + /// + public static Vector256 ShiftLeftLogicalVariable(Vector256 value, Vector256 count) => ShiftLeftLogicalVariable(value, count); + /// + /// __m256i _mm256_sllv_epi64 (__m256i a, __m256i count) + /// VPSLLVQ ymm1, ymm2, ymm3/m256 + /// VPSLLVQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst + /// + public static Vector256 ShiftLeftLogicalVariable(Vector256 value, Vector256 count) => ShiftLeftLogicalVariable(value, count); + /// + /// __m256i _mm256_sllv_epi64 (__m256i a, __m256i count) + /// VPSLLVQ ymm1, ymm2, ymm3/m256 + /// VPSLLVQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst + /// + public static Vector256 ShiftLeftLogicalVariable(Vector256 value, Vector256 count) => ShiftLeftLogicalVariable(value, count); /// /// _mm256_sra_epi16 (__m256i a, __m128i count) - /// VPSRAW ymm, ymm, xmm/m128 + /// VPSRAW ymm1, ymm2, xmm3/m128 + /// VPSRAW ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftRightArithmetic(Vector256 value, Vector128 count) => ShiftRightArithmetic(value, count); /// /// _mm256_sra_epi32 (__m256i a, __m128i count) - /// VPSRAD ymm, ymm, xmm/m128 + /// VPSRAD ymm1, ymm2, xmm3/m128 + /// VPSRAD ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftRightArithmetic(Vector256 value, Vector128 count) => ShiftRightArithmetic(value, count); /// /// __m256i _mm256_srai_epi16 (__m256i a, int imm8) - /// VPSRAW ymm, ymm, imm8 + /// VPSRAW ymm1, ymm2, imm8 + /// VPSRAW ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftRightArithmetic(Vector256 value, [ConstantExpected] byte count) => ShiftRightArithmetic(value, count); /// /// __m256i _mm256_srai_epi32 (__m256i a, int imm8) - /// VPSRAD ymm, ymm, imm8 + /// VPSRAD ymm1, ymm2, imm8 + /// VPSRAD ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftRightArithmetic(Vector256 value, [ConstantExpected] byte count) => ShiftRightArithmetic(value, count); - /// - /// __m256i _mm256_srav_epi32 (__m256i a, __m256i count) - /// VPSRAVD ymm, ymm, ymm/m256 - /// - public static Vector256 ShiftRightArithmeticVariable(Vector256 value, Vector256 count) => ShiftRightArithmeticVariable(value, count); - /// /// __m128i _mm_srav_epi32 (__m128i a, __m128i count) - /// VPSRAVD xmm, xmm, xmm/m128 + /// VPSRAVD xmm1, xmm2, xmm3/m128 + /// VPSRAVD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 ShiftRightArithmeticVariable(Vector128 value, Vector128 count) => ShiftRightArithmeticVariable(value, count); + /// + /// __m256i _mm256_srav_epi32 (__m256i a, __m256i count) + /// VPSRAVD ymm1, ymm2, ymm3/m256 + /// VPSRAVD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst + /// + public static Vector256 ShiftRightArithmeticVariable(Vector256 value, Vector256 count) => ShiftRightArithmeticVariable(value, count); /// /// __m256i _mm256_srl_epi16 (__m256i a, __m128i count) - /// VPSRLW ymm, ymm, xmm/m128 + /// VPSRLW ymm1, ymm2, xmm3/m128 + /// VPSRLW ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftRightLogical(Vector256 value, Vector128 count) => ShiftRightLogical(value, count); /// /// __m256i _mm256_srl_epi16 (__m256i a, __m128i count) - /// VPSRLW ymm, ymm, xmm/m128 + /// VPSRLW ymm1, ymm2, xmm3/m128 + /// VPSRLW ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftRightLogical(Vector256 value, Vector128 count) => ShiftRightLogical(value, count); /// /// __m256i _mm256_srl_epi32 (__m256i a, __m128i count) - /// VPSRLD ymm, ymm, xmm/m128 + /// VPSRLD ymm1, ymm2, xmm3/m128 + /// VPSRLD ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftRightLogical(Vector256 value, Vector128 count) => ShiftRightLogical(value, count); /// /// __m256i _mm256_srl_epi32 (__m256i a, __m128i count) - /// VPSRLD ymm, ymm, xmm/m128 + /// VPSRLD ymm1, ymm2, xmm3/m128 + /// VPSRLD ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftRightLogical(Vector256 value, Vector128 count) => ShiftRightLogical(value, count); /// /// __m256i _mm256_srl_epi64 (__m256i a, __m128i count) - /// VPSRLQ ymm, ymm, xmm/m128 + /// VPSRLQ ymm1, ymm2, xmm3/m128 + /// VPSRLQ ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftRightLogical(Vector256 value, Vector128 count) => ShiftRightLogical(value, count); /// /// __m256i _mm256_srl_epi64 (__m256i a, __m128i count) - /// VPSRLQ ymm, ymm, xmm/m128 + /// VPSRLQ ymm1, ymm2, xmm3/m128 + /// VPSRLQ ymm1 {k1}{z}, ymm2, xmm3/m128 /// public static Vector256 ShiftRightLogical(Vector256 value, Vector128 count) => ShiftRightLogical(value, count); /// /// __m256i _mm256_srli_epi16 (__m256i a, int imm8) - /// VPSRLW ymm, ymm, imm8 + /// VPSRLW ymm1, ymm2, imm8 + /// VPSRLW ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftRightLogical(Vector256 value, [ConstantExpected] byte count) => ShiftRightLogical(value, count); /// /// __m256i _mm256_srli_epi16 (__m256i a, int imm8) - /// VPSRLW ymm, ymm, imm8 + /// VPSRLW ymm1, ymm2, imm8 + /// VPSRLW ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftRightLogical(Vector256 value, [ConstantExpected] byte count) => ShiftRightLogical(value, count); /// /// __m256i _mm256_srli_epi32 (__m256i a, int imm8) - /// VPSRLD ymm, ymm, imm8 + /// VPSRLD ymm1, ymm2, imm8 + /// VPSRLD ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftRightLogical(Vector256 value, [ConstantExpected] byte count) => ShiftRightLogical(value, count); /// /// __m256i _mm256_srli_epi32 (__m256i a, int imm8) - /// VPSRLD ymm, ymm, imm8 + /// VPSRLD ymm1, ymm2, imm8 + /// VPSRLD ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftRightLogical(Vector256 value, [ConstantExpected] byte count) => ShiftRightLogical(value, count); /// /// __m256i _mm256_srli_epi64 (__m256i a, int imm8) - /// VPSRLQ ymm, ymm, imm8 + /// VPSRLQ ymm1, ymm2, imm8 + /// VPSRLQ ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftRightLogical(Vector256 value, [ConstantExpected] byte count) => ShiftRightLogical(value, count); /// /// __m256i _mm256_srli_epi64 (__m256i a, int imm8) - /// VPSRLQ ymm, ymm, imm8 + /// VPSRLQ ymm1, ymm2, imm8 + /// VPSRLQ ymm1 {k1}{z}, ymm2, imm8 /// public static Vector256 ShiftRightLogical(Vector256 value, [ConstantExpected] byte count) => ShiftRightLogical(value, count); /// /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8) - /// VPSRLDQ ymm, ymm, imm8 + /// VPSRLDQ ymm1, ymm2/m128, imm8 /// public static Vector256 ShiftRightLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes); /// /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8) - /// VPSRLDQ ymm, ymm, imm8 + /// VPSRLDQ ymm1, ymm2/m128, imm8 /// public static Vector256 ShiftRightLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes); /// /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8) - /// VPSRLDQ ymm, ymm, imm8 + /// VPSRLDQ ymm1, ymm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftRightLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes); /// /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8) - /// VPSRLDQ ymm, ymm, imm8 + /// VPSRLDQ ymm1, ymm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftRightLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes); /// /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8) - /// VPSRLDQ ymm, ymm, imm8 + /// VPSRLDQ ymm1, ymm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftRightLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes); /// /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8) - /// VPSRLDQ ymm, ymm, imm8 + /// VPSRLDQ ymm1, ymm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftRightLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes); /// /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8) - /// VPSRLDQ ymm, ymm, imm8 + /// VPSRLDQ ymm1, ymm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftRightLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes); /// /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8) - /// VPSRLDQ ymm, ymm, imm8 + /// VPSRLDQ ymm1, ymm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector256 ShiftRightLogical128BitLane(Vector256 value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes); - /// - /// __m256i _mm256_srlv_epi32 (__m256i a, __m256i count) - /// VPSRLVD ymm, ymm, ymm/m256 - /// - public static Vector256 ShiftRightLogicalVariable(Vector256 value, Vector256 count) => ShiftRightLogicalVariable(value, count); - /// - /// __m256i _mm256_srlv_epi32 (__m256i a, __m256i count) - /// VPSRLVD ymm, ymm, ymm/m256 - /// - public static Vector256 ShiftRightLogicalVariable(Vector256 value, Vector256 count) => ShiftRightLogicalVariable(value, count); - /// - /// __m256i _mm256_srlv_epi64 (__m256i a, __m256i count) - /// VPSRLVQ ymm, ymm, ymm/m256 - /// - public static Vector256 ShiftRightLogicalVariable(Vector256 value, Vector256 count) => ShiftRightLogicalVariable(value, count); - /// - /// __m256i _mm256_srlv_epi64 (__m256i a, __m256i count) - /// VPSRLVQ ymm, ymm, ymm/m256 - /// - public static Vector256 ShiftRightLogicalVariable(Vector256 value, Vector256 count) => ShiftRightLogicalVariable(value, count); - /// /// __m128i _mm_srlv_epi32 (__m128i a, __m128i count) - /// VPSRLVD xmm, xmm, xmm/m128 + /// VPSRLVD xmm1, xmm2, xmm3/m128 + /// VPSRLVD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 ShiftRightLogicalVariable(Vector128 value, Vector128 count) => ShiftRightLogicalVariable(value, count); /// /// __m128i _mm_srlv_epi32 (__m128i a, __m128i count) - /// VPSRLVD xmm, xmm, xmm/m128 + /// VPSRLVD xmm1, xmm2, xmm3/m128 + /// VPSRLVD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 ShiftRightLogicalVariable(Vector128 value, Vector128 count) => ShiftRightLogicalVariable(value, count); /// /// __m128i _mm_srlv_epi64 (__m128i a, __m128i count) - /// VPSRLVQ xmm, xmm, xmm/m128 + /// VPSRLVQ xmm1, xmm2, xmm3/m128 + /// VPSRLVQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 ShiftRightLogicalVariable(Vector128 value, Vector128 count) => ShiftRightLogicalVariable(value, count); /// /// __m128i _mm_srlv_epi64 (__m128i a, __m128i count) - /// VPSRLVQ xmm, xmm, xmm/m128 + /// VPSRLVQ xmm1, xmm2, xmm3/m128 + /// VPSRLVQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 ShiftRightLogicalVariable(Vector128 value, Vector128 count) => ShiftRightLogicalVariable(value, count); + /// + /// __m256i _mm256_srlv_epi32 (__m256i a, __m256i count) + /// VPSRLVD ymm1, ymm2, ymm3/m256 + /// VPSRLVD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst + /// + public static Vector256 ShiftRightLogicalVariable(Vector256 value, Vector256 count) => ShiftRightLogicalVariable(value, count); + /// + /// __m256i _mm256_srlv_epi32 (__m256i a, __m256i count) + /// VPSRLVD ymm1, ymm2, ymm3/m256 + /// VPSRLVD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst + /// + public static Vector256 ShiftRightLogicalVariable(Vector256 value, Vector256 count) => ShiftRightLogicalVariable(value, count); + /// + /// __m256i _mm256_srlv_epi64 (__m256i a, __m256i count) + /// VPSRLVQ ymm1, ymm2, ymm3/m256 + /// VPSRLVQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst + /// + public static Vector256 ShiftRightLogicalVariable(Vector256 value, Vector256 count) => ShiftRightLogicalVariable(value, count); + /// + /// __m256i _mm256_srlv_epi64 (__m256i a, __m256i count) + /// VPSRLVQ ymm1, ymm2, ymm3/m256 + /// VPSRLVQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst + /// + public static Vector256 ShiftRightLogicalVariable(Vector256 value, Vector256 count) => ShiftRightLogicalVariable(value, count); /// /// __m256i _mm256_shuffle_epi8 (__m256i a, __m256i b) - /// VPSHUFB ymm, ymm, ymm/m256 + /// VPSHUFB ymm1, ymm2, ymm3/m256 + /// VPSHUFB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Shuffle(Vector256 value, Vector256 mask) => Shuffle(value, mask); /// /// __m256i _mm256_shuffle_epi8 (__m256i a, __m256i b) - /// VPSHUFB ymm, ymm, ymm/m256 + /// VPSHUFB ymm1, ymm2, ymm3/m256 + /// VPSHUFB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Shuffle(Vector256 value, Vector256 mask) => Shuffle(value, mask); /// /// __m256i _mm256_shuffle_epi32 (__m256i a, const int imm8) - /// VPSHUFD ymm, ymm/m256, imm8 + /// VPSHUFD ymm1, ymm2/m256, imm8 + /// VPSHUFD ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8 /// public static Vector256 Shuffle(Vector256 value, [ConstantExpected] byte control) => Shuffle(value, control); /// /// __m256i _mm256_shuffle_epi32 (__m256i a, const int imm8) - /// VPSHUFD ymm, ymm/m256, imm8 + /// VPSHUFD ymm1, ymm2/m256, imm8 + /// VPSHUFD ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8 /// public static Vector256 Shuffle(Vector256 value, [ConstantExpected] byte control) => Shuffle(value, control); /// /// __m256i _mm256_shufflehi_epi16 (__m256i a, const int imm8) - /// VPSHUFHW ymm, ymm/m256, imm8 + /// VPSHUFHW ymm1, ymm2/m256, imm8 + /// VPSHUFHW ymm1 {k1}{z}, ymm2/m256, imm8 /// public static Vector256 ShuffleHigh(Vector256 value, [ConstantExpected] byte control) => ShuffleHigh(value, control); /// /// __m256i _mm256_shufflehi_epi16 (__m256i a, const int imm8) - /// VPSHUFHW ymm, ymm/m256, imm8 + /// VPSHUFHW ymm1, ymm2/m256, imm8 + /// VPSHUFHW ymm1 {k1}{z}, ymm2/m256, imm8 /// public static Vector256 ShuffleHigh(Vector256 value, [ConstantExpected] byte control) => ShuffleHigh(value, control); /// /// __m256i _mm256_shufflelo_epi16 (__m256i a, const int imm8) - /// VPSHUFLW ymm, ymm/m256, imm8 + /// VPSHUFLW ymm1, ymm2/m256, imm8 + /// VPSHUFLW ymm1 {k1}{z}, ymm2/m256, imm8 /// public static Vector256 ShuffleLow(Vector256 value, [ConstantExpected] byte control) => ShuffleLow(value, control); /// /// __m256i _mm256_shufflelo_epi16 (__m256i a, const int imm8) - /// VPSHUFLW ymm, ymm/m256, imm8 + /// VPSHUFLW ymm1, ymm2/m256, imm8 + /// VPSHUFLW ymm1 {k1}{z}, ymm2/m256, imm8 /// public static Vector256 ShuffleLow(Vector256 value, [ConstantExpected] byte control) => ShuffleLow(value, control); /// /// __m256i _mm256_sign_epi8 (__m256i a, __m256i b) - /// VPSIGNB ymm, ymm, ymm/m256 + /// VPSIGNB ymm1, ymm2, ymm3/m256 /// public static Vector256 Sign(Vector256 left, Vector256 right) => Sign(left, right); /// /// __m256i _mm256_sign_epi16 (__m256i a, __m256i b) - /// VPSIGNW ymm, ymm, ymm/m256 + /// VPSIGNW ymm1, ymm2, ymm3/m256 /// public static Vector256 Sign(Vector256 left, Vector256 right) => Sign(left, right); /// /// __m256i _mm256_sign_epi32 (__m256i a, __m256i b) - /// VPSIGND ymm, ymm, ymm/m256 + /// VPSIGND ymm1, ymm2, ymm3/m256 /// public static Vector256 Sign(Vector256 left, Vector256 right) => Sign(left, right); /// /// __m256i _mm256_sub_epi8 (__m256i a, __m256i b) - /// VPSUBB ymm, ymm, ymm/m256 + /// VPSUBB ymm1, ymm2, ymm3/m256 + /// VPSUBB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Subtract(Vector256 left, Vector256 right) => Subtract(left, right); /// /// __m256i _mm256_sub_epi8 (__m256i a, __m256i b) - /// VPSUBB ymm, ymm, ymm/m256 + /// VPSUBB ymm1, ymm2, ymm3/m256 + /// VPSUBB ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Subtract(Vector256 left, Vector256 right) => Subtract(left, right); /// /// __m256i _mm256_sub_epi16 (__m256i a, __m256i b) - /// VPSUBW ymm, ymm, ymm/m256 + /// VPSUBW ymm1, ymm2, ymm3/m256 + /// VPSUBW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Subtract(Vector256 left, Vector256 right) => Subtract(left, right); /// /// __m256i _mm256_sub_epi16 (__m256i a, __m256i b) - /// VPSUBW ymm, ymm, ymm/m256 + /// VPSUBW ymm1, ymm2, ymm3/m256 + /// VPSUBW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Subtract(Vector256 left, Vector256 right) => Subtract(left, right); /// /// __m256i _mm256_sub_epi32 (__m256i a, __m256i b) - /// VPSUBD ymm, ymm, ymm/m256 + /// VPSUBD ymm1, ymm2, ymm3/m256 + /// VPSUBD ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Subtract(Vector256 left, Vector256 right) => Subtract(left, right); /// /// __m256i _mm256_sub_epi32 (__m256i a, __m256i b) - /// VPSUBD ymm, ymm, ymm/m256 + /// VPSUBD ymm1, ymm2, ymm3/m256 + /// VPSUBD ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Subtract(Vector256 left, Vector256 right) => Subtract(left, right); /// /// __m256i _mm256_sub_epi64 (__m256i a, __m256i b) - /// VPSUBQ ymm, ymm, ymm/m256 + /// VPSUBQ ymm1, ymm2, ymm3/m256 + /// VPSUBQ ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Subtract(Vector256 left, Vector256 right) => Subtract(left, right); /// /// __m256i _mm256_sub_epi64 (__m256i a, __m256i b) - /// VPSUBQ ymm, ymm, ymm/m256 + /// VPSUBQ ymm1, ymm2, ymm3/m256 + /// VPSUBQ ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 Subtract(Vector256 left, Vector256 right) => Subtract(left, right); /// /// __m256i _mm256_subs_epi8 (__m256i a, __m256i b) - /// VPSUBSB ymm, ymm, ymm/m256 + /// VPSUBSB ymm1, ymm2, ymm3/m128 + /// VPSUBSB ymm1 {k1}{z}, ymm2, ymm3/m128 /// public static Vector256 SubtractSaturate(Vector256 left, Vector256 right) => SubtractSaturate(left, right); /// /// __m256i _mm256_subs_epi16 (__m256i a, __m256i b) - /// VPSUBSW ymm, ymm, ymm/m256 + /// VPSUBSW ymm1, ymm2, ymm3/m128 + /// VPSUBSW ymm1 {k1}{z}, ymm2, ymm3/m128 /// public static Vector256 SubtractSaturate(Vector256 left, Vector256 right) => SubtractSaturate(left, right); /// /// __m256i _mm256_subs_epu8 (__m256i a, __m256i b) - /// VPSUBUSB ymm, ymm, ymm/m256 + /// VPSUBUSB ymm1, ymm2, ymm3/m128 + /// VPSUBUSB ymm1 {k1}{z}, ymm2, ymm3/m128 /// public static Vector256 SubtractSaturate(Vector256 left, Vector256 right) => SubtractSaturate(left, right); /// /// __m256i _mm256_subs_epu16 (__m256i a, __m256i b) - /// VPSUBUSW ymm, ymm, ymm/m256 + /// VPSUBUSW ymm1, ymm2, ymm3/m128 + /// VPSUBUSW ymm1 {k1}{z}, ymm2, ymm3/m128 /// public static Vector256 SubtractSaturate(Vector256 left, Vector256 right) => SubtractSaturate(left, right); /// /// __m256i _mm256_sad_epu8 (__m256i a, __m256i b) - /// VPSADBW ymm, ymm, ymm/m256 + /// VPSADBW ymm1, ymm2, ymm3/m256 + /// VPSADBW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 SumAbsoluteDifferences(Vector256 left, Vector256 right) => SumAbsoluteDifferences(left, right); /// /// __m256i _mm256_unpackhi_epi8 (__m256i a, __m256i b) - /// VPUNPCKHBW ymm, ymm, ymm/m256 + /// VPUNPCKHBW ymm1, ymm2, ymm3/m256 + /// VPUNPCKHBW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 UnpackHigh(Vector256 left, Vector256 right) => UnpackHigh(left, right); /// /// __m256i _mm256_unpackhi_epi8 (__m256i a, __m256i b) - /// VPUNPCKHBW ymm, ymm, ymm/m256 + /// VPUNPCKHBW ymm1, ymm2, ymm3/m256 + /// VPUNPCKHBW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 UnpackHigh(Vector256 left, Vector256 right) => UnpackHigh(left, right); /// /// __m256i _mm256_unpackhi_epi16 (__m256i a, __m256i b) - /// VPUNPCKHWD ymm, ymm, ymm/m256 + /// VPUNPCKHWD ymm1, ymm2, ymm3/m256 + /// VPUNPCKHWD ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 UnpackHigh(Vector256 left, Vector256 right) => UnpackHigh(left, right); /// /// __m256i _mm256_unpackhi_epi16 (__m256i a, __m256i b) - /// VPUNPCKHWD ymm, ymm, ymm/m256 + /// VPUNPCKHWD ymm1, ymm2, ymm3/m256 + /// VPUNPCKHWD ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 UnpackHigh(Vector256 left, Vector256 right) => UnpackHigh(left, right); /// /// __m256i _mm256_unpackhi_epi32 (__m256i a, __m256i b) - /// VPUNPCKHDQ ymm, ymm, ymm/m256 + /// VPUNPCKHDQ ymm1, ymm2, ymm3/m256 + /// VPUNPCKHDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 UnpackHigh(Vector256 left, Vector256 right) => UnpackHigh(left, right); /// /// __m256i _mm256_unpackhi_epi32 (__m256i a, __m256i b) - /// VPUNPCKHDQ ymm, ymm, ymm/m256 + /// VPUNPCKHDQ ymm1, ymm2, ymm3/m256 + /// VPUNPCKHDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 UnpackHigh(Vector256 left, Vector256 right) => UnpackHigh(left, right); /// /// __m256i _mm256_unpackhi_epi64 (__m256i a, __m256i b) - /// VPUNPCKHQDQ ymm, ymm, ymm/m256 + /// VPUNPCKHQDQ ymm1, ymm2, ymm3/m256 + /// VPUNPCKHQDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 UnpackHigh(Vector256 left, Vector256 right) => UnpackHigh(left, right); /// /// __m256i _mm256_unpackhi_epi64 (__m256i a, __m256i b) - /// VPUNPCKHQDQ ymm, ymm, ymm/m256 + /// VPUNPCKHQDQ ymm1, ymm2, ymm3/m256 + /// VPUNPCKHQDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 UnpackHigh(Vector256 left, Vector256 right) => UnpackHigh(left, right); /// /// __m256i _mm256_unpacklo_epi8 (__m256i a, __m256i b) - /// VPUNPCKLBW ymm, ymm, ymm/m256 + /// VPUNPCKLBW ymm1, ymm2, ymm3/m256 + /// VPUNPCKLBW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 UnpackLow(Vector256 left, Vector256 right) => UnpackLow(left, right); /// /// __m256i _mm256_unpacklo_epi8 (__m256i a, __m256i b) - /// VPUNPCKLBW ymm, ymm, ymm/m256 + /// VPUNPCKLBW ymm1, ymm2, ymm3/m256 + /// VPUNPCKLBW ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 UnpackLow(Vector256 left, Vector256 right) => UnpackLow(left, right); /// /// __m256i _mm256_unpacklo_epi16 (__m256i a, __m256i b) - /// VPUNPCKLWD ymm, ymm, ymm/m256 + /// VPUNPCKLWD ymm1, ymm2, ymm3/m256 + /// VPUNPCKLWD ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 UnpackLow(Vector256 left, Vector256 right) => UnpackLow(left, right); /// /// __m256i _mm256_unpacklo_epi16 (__m256i a, __m256i b) - /// VPUNPCKLWD ymm, ymm, ymm/m256 + /// VPUNPCKLWD ymm1, ymm2, ymm3/m256 + /// VPUNPCKLWD ymm1 {k1}{z}, ymm2, ymm3/m256 /// public static Vector256 UnpackLow(Vector256 left, Vector256 right) => UnpackLow(left, right); /// /// __m256i _mm256_unpacklo_epi32 (__m256i a, __m256i b) - /// VPUNPCKLDQ ymm, ymm, ymm/m256 + /// VPUNPCKLDQ ymm1, ymm2, ymm3/m256 + /// VPUNPCKLDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 UnpackLow(Vector256 left, Vector256 right) => UnpackLow(left, right); /// /// __m256i _mm256_unpacklo_epi32 (__m256i a, __m256i b) - /// VPUNPCKLDQ ymm, ymm, ymm/m256 + /// VPUNPCKLDQ ymm1, ymm2, ymm3/m256 + /// VPUNPCKLDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 UnpackLow(Vector256 left, Vector256 right) => UnpackLow(left, right); /// /// __m256i _mm256_unpacklo_epi64 (__m256i a, __m256i b) - /// VPUNPCKLQDQ ymm, ymm, ymm/m256 + /// VPUNPCKLQDQ ymm1, ymm2, ymm3/m256 + /// VPUNPCKLQDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 UnpackLow(Vector256 left, Vector256 right) => UnpackLow(left, right); /// /// __m256i _mm256_unpacklo_epi64 (__m256i a, __m256i b) - /// VPUNPCKLQDQ ymm, ymm, ymm/m256 + /// VPUNPCKLQDQ ymm1, ymm2, ymm3/m256 + /// VPUNPCKLQDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 UnpackLow(Vector256 left, Vector256 right) => UnpackLow(left, right); /// /// __m256i _mm256_xor_si256 (__m256i a, __m256i b) - /// VPXOR ymm, ymm, ymm/m256 + /// VPXOR ymm1, ymm2, ymm3/m256 /// public static Vector256 Xor(Vector256 left, Vector256 right) => Xor(left, right); /// /// __m256i _mm256_xor_si256 (__m256i a, __m256i b) - /// VPXOR ymm, ymm, ymm/m256 + /// VPXOR ymm1, ymm2, ymm3/m256 /// public static Vector256 Xor(Vector256 left, Vector256 right) => Xor(left, right); /// /// __m256i _mm256_xor_si256 (__m256i a, __m256i b) - /// VPXOR ymm, ymm, ymm/m256 + /// VPXOR ymm1, ymm2, ymm3/m256 /// public static Vector256 Xor(Vector256 left, Vector256 right) => Xor(left, right); /// /// __m256i _mm256_xor_si256 (__m256i a, __m256i b) - /// VPXOR ymm, ymm, ymm/m256 + /// VPXOR ymm1, ymm2, ymm3/m256 /// public static Vector256 Xor(Vector256 left, Vector256 right) => Xor(left, right); /// /// __m256i _mm256_xor_si256 (__m256i a, __m256i b) - /// VPXOR ymm, ymm, ymm/m256 + /// VPXOR ymm1, ymm2, ymm3/m256 + /// VPXORD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Xor(Vector256 left, Vector256 right) => Xor(left, right); /// /// __m256i _mm256_xor_si256 (__m256i a, __m256i b) - /// VPXOR ymm, ymm, ymm/m256 + /// VPXOR ymm1, ymm2, ymm3/m256 + /// VPXORD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 Xor(Vector256 left, Vector256 right) => Xor(left, right); /// /// __m256i _mm256_xor_si256 (__m256i a, __m256i b) - /// VPXOR ymm, ymm, ymm/m256 + /// VPXOR ymm1, ymm2, ymm3/m256 + /// VPXORQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Xor(Vector256 left, Vector256 right) => Xor(left, right); /// /// __m256i _mm256_xor_si256 (__m256i a, __m256i b) - /// VPXOR ymm, ymm, ymm/m256 + /// VPXOR ymm1, ymm2, ymm3/m256 + /// VPXORQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 Xor(Vector256 left, Vector256 right) => Xor(left, right); } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.cs index b0fe1c60f8b33c..c5f60ff2e7b8c6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.cs @@ -1,7 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; namespace System.Runtime.Intrinsics.X86 diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512DQ.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512DQ.cs index 01004e8b8e9ee5..6a63728a1ac3dd 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512DQ.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512DQ.cs @@ -1,7 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; namespace System.Runtime.Intrinsics.X86 diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.PlatformNotSupported.cs index 63f62aff615aa3..77bd4d333c1d4e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.PlatformNotSupported.cs @@ -32,461 +32,461 @@ internal X64() { } /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) - /// VPAND zmm, zmm, zmm/m512 + /// VPANDD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// - public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) - /// VPAND zmm, zmm, zmm/m512 + /// VPANDD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// - public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) - /// VPAND zmm, zmm, zmm/m512 + /// VPANDD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) - /// VPAND zmm, zmm, zmm/m512 + /// VPANDD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) - /// VPAND zmm, zmm, zmm/m512 + /// VPANDD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) - /// VPAND zmm, zmm, zmm/m512 + /// VPANDD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) - /// VPAND zmm, zmm, zmm/m512 + /// VPANDQ zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst /// public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) - /// VPAND zmm, zmm, zmm/m512 + /// VPANDQ zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst /// public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512 _mm512_and_ps (__m512 a, __m512 b) - /// VANDPS zmm, zmm, zmm/m512 + /// VANDPS zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512d _mm512_and_pd (__m512d a, __m512d b) - /// VANDPD zmm, zmm, zmm/m512 + /// VANDPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst /// public static Vector512 And(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) - /// VPANDN zmm, zmm, zmm/m512 + /// VPANDND zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// - public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) - /// VPANDN zmm, zmm, zmm/m512 + /// VPANDND zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// - public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) - /// VPANDN zmm, zmm, zmm/m512 + /// VPANDND zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) - /// VPANDN zmm, zmm, zmm/m512 + /// VPANDND zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) - /// VPANDN zmm, zmm, zmm/m512 + /// VPANDND zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) - /// VPANDN zmm, zmm, zmm/m512 + /// VPANDND zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) - /// VPANDN zmm, zmm, zmm/m512 + /// VPANDNQ zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst /// public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) - /// VPANDN zmm, zmm, zmm/m512 + /// VPANDNQ zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst /// public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512 _mm512_andnot_ps (__m512 a, __m512 b) - /// VANDNPS zmm, zmm, zmm/m512 + /// VANDNPS zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512d _mm512_andnot_pd (__m512d a, __m512d b) - /// VANDNPD zmm, zmm, zmm/m512 + /// VANDNPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst /// public static Vector512 AndNot(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } - /// - /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) - /// VMOVDQU32 zmm, m512 - /// - public static unsafe Vector512 LoadVector512(sbyte* address) { throw new PlatformNotSupportedException(); } - /// - /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) - /// VMOVDQU32 zmm, m512 - /// - public static unsafe Vector512 LoadVector512(byte* address) { throw new PlatformNotSupportedException(); } - /// - /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) - /// VMOVDQU32 zmm, m512 - /// - public static unsafe Vector512 LoadVector512(short* address) { throw new PlatformNotSupportedException(); } - /// - /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) - /// VMOVDQU32 zmm, m512 - /// - public static unsafe Vector512 LoadVector512(ushort* address) { throw new PlatformNotSupportedException(); } - /// - /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) - /// VMOVDQU32 zmm, m512 - /// - public static unsafe Vector512 LoadVector512(int* address) { throw new PlatformNotSupportedException(); } - /// - /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) - /// VMOVDQU32 zmm, m512 - /// - public static unsafe Vector512 LoadVector512(uint* address) { throw new PlatformNotSupportedException(); } - /// - /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) - /// VMOVDQU64 zmm, m512 - /// - public static unsafe Vector512 LoadVector512(long* address) { throw new PlatformNotSupportedException(); } - /// - /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) - /// VMOVDQU64 zmm, m512 - /// - public static unsafe Vector512 LoadVector512(ulong* address) { throw new PlatformNotSupportedException(); } - /// - /// __m512 _mm512_loadu_ps (float const * mem_addr) - /// VMOVUPS zmm, zmm/m512 - /// - public static unsafe Vector512 LoadVector512(float* address) { throw new PlatformNotSupportedException(); } - /// - /// __m512d _mm512_loadu_pd (double const * mem_addr) - /// VMOVUPD zmm, zmm/m512 - /// - public static unsafe Vector512 LoadVector512(double* address) { throw new PlatformNotSupportedException(); } - /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) - /// VMOVDQA32 zmm, m512 + /// VMOVDQA32 zmm1 {k1}{z}, m512 /// - public static unsafe Vector512 LoadAlignedVector512(sbyte* address) { throw new PlatformNotSupportedException(); } + public static unsafe Vector512 LoadAlignedVector512(byte* address) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) - /// VMOVDQA32 zmm, m512 + /// VMOVDQA32 zmm1 {k1}{z}, m512 /// - public static unsafe Vector512 LoadAlignedVector512(byte* address) { throw new PlatformNotSupportedException(); } + public static unsafe Vector512 LoadAlignedVector512(sbyte* address) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) - /// VMOVDQA32 zmm, m512 + /// VMOVDQA32 zmm1 {k1}{z}, m512 /// public static unsafe Vector512 LoadAlignedVector512(short* address) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) - /// VMOVDQA32 zmm, m512 + /// VMOVDQA32 zmm1 {k1}{z}, m512 /// public static unsafe Vector512 LoadAlignedVector512(ushort* address) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) - /// VMOVDQA32 zmm, m512 + /// VMOVDQA32 zmm1 {k1}{z}, m512 /// public static unsafe Vector512 LoadAlignedVector512(int* address) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) - /// VMOVDQA32 zmm, m512 + /// VMOVDQA32 zmm1 {k1}{z}, m512 /// public static unsafe Vector512 LoadAlignedVector512(uint* address) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) - /// VMOVDQA64 zmm, m512 + /// VMOVDQA64 zmm1 {k1}{z}, m512 /// public static unsafe Vector512 LoadAlignedVector512(long* address) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) - /// VMOVDQA64 zmm, m512 + /// VMOVDQA64 zmm1 {k1}{z}, m512 /// public static unsafe Vector512 LoadAlignedVector512(ulong* address) { throw new PlatformNotSupportedException(); } /// /// __m512 _mm512_load_ps (float const * mem_addr) - /// VMOVAPS zmm, zmm/m512 + /// VMOVAPS zmm1 {k1}{z}, m512 /// public static unsafe Vector512 LoadAlignedVector512(float* address) { throw new PlatformNotSupportedException(); } /// /// __m512d _mm512_load_pd (double const * mem_addr) - /// VMOVAPD zmm, zmm/m512 + /// VMOVAPD zmm1 {k1}{z}, m512 /// public static unsafe Vector512 LoadAlignedVector512(double* address) { throw new PlatformNotSupportedException(); } /// - /// __m512 _mm512_or_ps (__m512 a, __m512 b) - /// VORPS zmm, zmm, zmm/m512 + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU8 zmm1 {k1}{z}, m512 /// - public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + public static unsafe Vector512 LoadVector512(sbyte* address) { throw new PlatformNotSupportedException(); } /// - /// __m512d _mm512_or_pd (__m512d a, __m512d b) - /// VORPD zmm, zmm, zmm/m512 + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU8 zmm1 {k1}{z}, m512 /// - public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + public static unsafe Vector512 LoadVector512(byte* address) { throw new PlatformNotSupportedException(); } /// - /// __m512i _mm512_or_si512 (__m512i a, __m512i b) - /// VPOR zmm, zmm, zmm/m512 + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU16 zmm1 {k1}{z}, m512 /// - public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + public static unsafe Vector512 LoadVector512(short* address) { throw new PlatformNotSupportedException(); } + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU16 zmm1 {k1}{z}, m512 + /// + public static unsafe Vector512 LoadVector512(ushort* address) { throw new PlatformNotSupportedException(); } + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU32 zmm1 {k1}{z}, m512 + /// + public static unsafe Vector512 LoadVector512(int* address) { throw new PlatformNotSupportedException(); } + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU32 zmm1 {k1}{z}, m512 + /// + public static unsafe Vector512 LoadVector512(uint* address) { throw new PlatformNotSupportedException(); } + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU64 zmm1 {k1}{z}, m512 + /// + public static unsafe Vector512 LoadVector512(long* address) { throw new PlatformNotSupportedException(); } + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU64 zmm1 {k1}{z}, m512 + /// + public static unsafe Vector512 LoadVector512(ulong* address) { throw new PlatformNotSupportedException(); } + /// + /// __m512 _mm512_loadu_ps (float const * mem_addr) + /// VMOVUPS zmm1 {k1}{z}, m512 + /// + public static unsafe Vector512 LoadVector512(float* address) { throw new PlatformNotSupportedException(); } + /// + /// __m512d _mm512_loadu_pd (double const * mem_addr) + /// VMOVUPD zmm1 {k1}{z}, m512 + /// + public static unsafe Vector512 LoadVector512(double* address) { throw new PlatformNotSupportedException(); } + /// /// __m512i _mm512_or_si512 (__m512i a, __m512i b) - /// VPOR zmm, zmm, zmm/m512 + /// VPORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_or_si512 (__m512i a, __m512i b) - /// VPOR zmm, zmm, zmm/m512 + /// VPORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst + /// + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + /// + /// __m512i _mm512_or_si512 (__m512i a, __m512i b) + /// VPORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_or_si512 (__m512i a, __m512i b) - /// VPOR zmm, zmm, zmm/m512 + /// VPORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_or_si512 (__m512i a, __m512i b) - /// VPOR zmm, zmm, zmm/m512 + /// VPORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_or_si512 (__m512i a, __m512i b) - /// VPOR zmm, zmm, zmm/m512 + /// VPORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_or_si512 (__m512i a, __m512i b) - /// VPOR zmm, zmm, zmm/m512 + /// VPORQ zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst /// public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_or_si512 (__m512i a, __m512i b) - /// VPOR zmm, zmm, zmm/m512 + /// VPORQ zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst /// public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + /// + /// __m512 _mm512_or_ps (__m512 a, __m512 b) + /// VORPS zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst + /// + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + /// + /// __m512d _mm512_or_pd (__m512d a, __m512d b) + /// VORPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst + /// + public static Vector512 Or(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQU32 m512, zmm + /// VMOVDQU8 m512 {k1}{z}, zmm1 /// public static unsafe void Store(sbyte* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQU32 m512, zmm + /// VMOVDQU8 m512 {k1}{z}, zmm1 /// public static unsafe void Store(byte* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQU32 m512, zmm + /// VMOVDQU16 m512 {k1}{z}, zmm1 /// public static unsafe void Store(short* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQU32 m512, zmm + /// VMOVDQU16 m512 {k1}{z}, zmm1 /// public static unsafe void Store(ushort* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQU32 m512, zmm + /// VMOVDQU32 m512 {k1}{z}, zmm1 /// public static unsafe void Store(int* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQU32 m512, zmm + /// VMOVDQU32 m512 {k1}{z}, zmm1 /// public static unsafe void Store(uint* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQU64 m512, zmm + /// VMOVDQU64 m512 {k1}{z}, zmm1 /// public static unsafe void Store(long* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQU64 m512, zmm + /// VMOVDQU64 m512 {k1}{z}, zmm1 /// public static unsafe void Store(ulong* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_storeu_ps (float * mem_addr, __m512 a) - /// VMOVUPS m512, zmm + /// VMOVUPS m512 {k1}{z}, zmm1 /// public static unsafe void Store(float* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_storeu_pd (double * mem_addr, __m512d a) - /// VMOVUPD m512, zmm + /// VMOVUPD m512 {k1}{z}, zmm1 /// public static unsafe void Store(double* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQA32 m512, zmm + /// VMOVDQA32 m512 {k1}{z}, zmm1 /// - public static unsafe void StoreAligned(sbyte* address, Vector512 source) { throw new PlatformNotSupportedException(); } + public static unsafe void StoreAligned(byte* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQA32 m512, zmm + /// VMOVDQA32 m512 {k1}{z}, zmm1 /// - public static unsafe void StoreAligned(byte* address, Vector512 source) { throw new PlatformNotSupportedException(); } + public static unsafe void StoreAligned(sbyte* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQA32 m512, zmm + /// VMOVDQA32 m512 {k1}{z}, zmm1 /// public static unsafe void StoreAligned(short* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQA32 m512, zmm + /// VMOVDQA32 m512 {k1}{z}, zmm1 /// public static unsafe void StoreAligned(ushort* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQA32 m512, zmm + /// VMOVDQA32 m512 {k1}{z}, zmm1 /// public static unsafe void StoreAligned(int* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQA32 m512, zmm + /// VMOVDQA32 m512 {k1}{z}, zmm1 /// public static unsafe void StoreAligned(uint* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQA64 m512, zmm + /// VMOVDQA32 m512 {k1}{z}, zmm1 /// public static unsafe void StoreAligned(long* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQA64 m512, zmm + /// VMOVDQA32 m512 {k1}{z}, zmm1 /// public static unsafe void StoreAligned(ulong* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_store_ps (float * mem_addr, __m512 a) - /// VMOVAPS m512, zmm + /// VMOVAPS m512 {k1}{z}, zmm1 /// public static unsafe void StoreAligned(float* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_store_pd (double * mem_addr, __m512d a) - /// VMOVAPD m512, zmm + /// VMOVAPD m512 {k1}{z}, zmm1 /// public static unsafe void StoreAligned(double* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) - /// VMOVNTDQ m512, zmm + /// VMOVNTDQ m512, zmm1 /// public static unsafe void StoreAlignedNonTemporal(sbyte* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) - /// VMOVNTDQ m512, zmm + /// VMOVNTDQ m512, zmm1 /// public static unsafe void StoreAlignedNonTemporal(byte* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) - /// VMOVNTDQ m512, zmm + /// VMOVNTDQ m512, zmm1 /// public static unsafe void StoreAlignedNonTemporal(short* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) - /// VMOVNTDQ m512, zmm + /// VMOVNTDQ m512, zmm1 /// public static unsafe void StoreAlignedNonTemporal(ushort* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) - /// VMOVNTDQ m512, zmm + /// VMOVNTDQ m512, zmm1 /// public static unsafe void StoreAlignedNonTemporal(int* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) - /// VMOVNTDQ m512, zmm + /// VMOVNTDQ m512, zmm1 /// public static unsafe void StoreAlignedNonTemporal(uint* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) - /// VMOVNTDQ m512, zmm + /// VMOVNTDQ m512, zmm1 /// public static unsafe void StoreAlignedNonTemporal(long* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) - /// VMOVNTDQ m512, zmm + /// VMOVNTDQ m512, zmm1 /// public static unsafe void StoreAlignedNonTemporal(ulong* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_stream_ps (float * mem_addr, __m512 a) - /// MOVNTPS m512, zmm + /// VMOVNTPS m512, zmm1 /// public static unsafe void StoreAlignedNonTemporal(float* address, Vector512 source) { throw new PlatformNotSupportedException(); } /// /// void _mm512_stream_pd (double * mem_addr, __m512d a) - /// MOVNTPD m512, zmm + /// VMOVNTPD m512, zmm1 /// public static unsafe void StoreAlignedNonTemporal(double* address, Vector512 source) { throw new PlatformNotSupportedException(); } - /// - /// __m512 _mm512_xor_ps (__m512 a, __m512 b) - /// VXORPS zmm, zmm, zmm/m512 - /// - public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } - /// - /// __m512d _mm512_xor_pd (__m512d a, __m512d b) - /// VXORPS zmm, zmm, zmm/m512 - /// - public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) - /// VPXOR zmm, zmm, zmm/m512 + /// VPXORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// - public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) - /// VPXOR zmm, zmm, zmm/m512 + /// VPXORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// - public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) - /// VPXOR zmm, zmm, zmm/m512 + /// VPXORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) - /// VPXOR zmm, zmm, zmm/m512 + /// VPXORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) - /// VPXOR zmm, zmm, zmm/m512 + /// VPXORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) - /// VPXOR zmm, zmm, zmm/m512 + /// VPXORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) - /// VPXOR zmm, zmm, zmm/m512 + /// VPXORQ zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst /// public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) - /// VPXOR zmm, zmm, zmm/m512 + /// VPXORQ zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst /// public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + /// + /// __m512 _mm512_xor_ps (__m512 a, __m512 b) + /// VXORPS zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst + /// + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + /// + /// __m512d _mm512_xor_pd (__m512d a, __m512d b) + /// VXORPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst + /// + public static Vector512 Xor(Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.cs index e8d64a3373aaa3..29a9e0a36ca115 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.cs @@ -1,7 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; namespace System.Runtime.Intrinsics.X86 @@ -33,461 +32,461 @@ internal X64() { } /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) - /// VPAND zmm, zmm, zmm/m512 + /// VPANDD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// - public static Vector512 And(Vector512 left, Vector512 right) => And(left, right); + public static Vector512 And(Vector512 left, Vector512 right) => And(left, right); /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) - /// VPAND zmm, zmm, zmm/m512 + /// VPANDD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// - public static Vector512 And(Vector512 left, Vector512 right) => And(left, right); + public static Vector512 And(Vector512 left, Vector512 right) => And(left, right); /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) - /// VPAND zmm, zmm, zmm/m512 + /// VPANDD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 And(Vector512 left, Vector512 right) => And(left, right); /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) - /// VPAND zmm, zmm, zmm/m512 + /// VPANDD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 And(Vector512 left, Vector512 right) => And(left, right); /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) - /// VPAND zmm, zmm, zmm/m512 + /// VPANDD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 And(Vector512 left, Vector512 right) => And(left, right); /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) - /// VPAND zmm, zmm, zmm/m512 + /// VPANDD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 And(Vector512 left, Vector512 right) => And(left, right); /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) - /// VPAND zmm, zmm, zmm/m512 + /// VPANDQ zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst /// public static Vector512 And(Vector512 left, Vector512 right) => And(left, right); /// /// __m512i _mm512_and_si512 (__m512i a, __m512i b) - /// VPAND zmm, zmm, zmm/m512 + /// VPANDQ zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst /// public static Vector512 And(Vector512 left, Vector512 right) => And(left, right); /// /// __m512 _mm512_and_ps (__m512 a, __m512 b) - /// VANDPS zmm, zmm, zmm/m512 + /// VANDPS zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 And(Vector512 left, Vector512 right) => And(left, right); /// /// __m512d _mm512_and_pd (__m512d a, __m512d b) - /// VANDPD zmm, zmm, zmm/m512 + /// VANDPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst /// public static Vector512 And(Vector512 left, Vector512 right) => And(left, right); /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) - /// VPANDN zmm, zmm, zmm/m512 + /// VPANDND zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// - public static Vector512 AndNot(Vector512 left, Vector512 right) => AndNot(left, right); + public static Vector512 AndNot(Vector512 left, Vector512 right) => AndNot(left, right); /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) - /// VPANDN zmm, zmm, zmm/m512 + /// VPANDND zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// - public static Vector512 AndNot(Vector512 left, Vector512 right) => AndNot(left, right); + public static Vector512 AndNot(Vector512 left, Vector512 right) => AndNot(left, right); /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) - /// VPANDN zmm, zmm, zmm/m512 + /// VPANDND zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 AndNot(Vector512 left, Vector512 right) => AndNot(left, right); /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) - /// VPANDN zmm, zmm, zmm/m512 + /// VPANDND zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 AndNot(Vector512 left, Vector512 right) => AndNot(left, right); /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) - /// VPANDN zmm, zmm, zmm/m512 + /// VPANDND zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 AndNot(Vector512 left, Vector512 right) => AndNot(left, right); /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) - /// VPANDN zmm, zmm, zmm/m512 + /// VPANDND zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 AndNot(Vector512 left, Vector512 right) => AndNot(left, right); /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) - /// VPANDN zmm, zmm, zmm/m512 + /// VPANDNQ zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst /// public static Vector512 AndNot(Vector512 left, Vector512 right) => AndNot(left, right); /// /// __m512i _mm512_andnot_si512 (__m512i a, __m512i b) - /// VPANDN zmm, zmm, zmm/m512 + /// VPANDNQ zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst /// public static Vector512 AndNot(Vector512 left, Vector512 right) => AndNot(left, right); /// /// __m512 _mm512_andnot_ps (__m512 a, __m512 b) - /// VANDNPS zmm, zmm, zmm/m512 + /// VANDNPS zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 AndNot(Vector512 left, Vector512 right) => AndNot(left, right); /// /// __m512d _mm512_andnot_pd (__m512d a, __m512d b) - /// VANDNPD zmm, zmm, zmm/m512 + /// VANDNPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst /// public static Vector512 AndNot(Vector512 left, Vector512 right) => AndNot(left, right); - /// - /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) - /// VMOVDQU32 zmm, m512 - /// - public static unsafe Vector512 LoadVector512(sbyte* address) => LoadVector512(address); - /// - /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) - /// VMOVDQU32 zmm, m512 - /// - public static unsafe Vector512 LoadVector512(byte* address) => LoadVector512(address); - /// - /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) - /// VMOVDQU32 zmm, m512 - /// - public static unsafe Vector512 LoadVector512(short* address) => LoadVector512(address); - /// - /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) - /// VMOVDQU32 zmm, m512 - /// - public static unsafe Vector512 LoadVector512(ushort* address) => LoadVector512(address); - /// - /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) - /// VMOVDQU32 zmm, m512 - /// - public static unsafe Vector512 LoadVector512(int* address) => LoadVector512(address); - /// - /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) - /// VMOVDQU32 zmm, m512 - /// - public static unsafe Vector512 LoadVector512(uint* address) => LoadVector512(address); - /// - /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) - /// VMOVDQU64 zmm, m512 - /// - public static unsafe Vector512 LoadVector512(long* address) => LoadVector512(address); - /// - /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) - /// VMOVDQU64 zmm, m512 - /// - public static unsafe Vector512 LoadVector512(ulong* address) => LoadVector512(address); - /// - /// __m512 _mm512_loadu_ps (float const * mem_addr) - /// VMOVUPS zmm, zmm/m512 - /// - public static unsafe Vector512 LoadVector512(float* address) => LoadVector512(address); - /// - /// __m512d _mm512_loadu_pd (double const * mem_addr) - /// VMOVUPD zmm, zmm/m512 - /// - public static unsafe Vector512 LoadVector512(double* address) => LoadVector512(address); - /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) - /// VMOVDQA32 zmm, m512 + /// VMOVDQA32 zmm1 {k1}{z}, m512 /// - public static unsafe Vector512 LoadAlignedVector512(sbyte* address) => LoadAlignedVector512(address); + public static unsafe Vector512 LoadAlignedVector512(byte* address) => LoadAlignedVector512(address); /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) - /// VMOVDQA32 zmm, m512 + /// VMOVDQA32 zmm1 {k1}{z}, m512 /// - public static unsafe Vector512 LoadAlignedVector512(byte* address) => LoadAlignedVector512(address); + public static unsafe Vector512 LoadAlignedVector512(sbyte* address) => LoadAlignedVector512(address); /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) - /// VMOVDQA32 zmm, m512 + /// VMOVDQA32 zmm1 {k1}{z}, m512 /// public static unsafe Vector512 LoadAlignedVector512(short* address) => LoadAlignedVector512(address); /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) - /// VMOVDQA32 zmm, m512 + /// VMOVDQA32 zmm1 {k1}{z}, m512 /// public static unsafe Vector512 LoadAlignedVector512(ushort* address) => LoadAlignedVector512(address); /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) - /// VMOVDQA32 zmm, m512 + /// VMOVDQA32 zmm1 {k1}{z}, m512 /// public static unsafe Vector512 LoadAlignedVector512(int* address) => LoadAlignedVector512(address); /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) - /// VMOVDQA32 zmm, m512 + /// VMOVDQA32 zmm1 {k1}{z}, m512 /// public static unsafe Vector512 LoadAlignedVector512(uint* address) => LoadAlignedVector512(address); /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) - /// VMOVDQA64 zmm, m512 + /// VMOVDQA64 zmm1 {k1}{z}, m512 /// public static unsafe Vector512 LoadAlignedVector512(long* address) => LoadAlignedVector512(address); /// /// __m512i _mm512_load_si512 (__m512i const * mem_addr) - /// VMOVDQA64 zmm, m512 + /// VMOVDQA64 zmm1 {k1}{z}, m512 /// public static unsafe Vector512 LoadAlignedVector512(ulong* address) => LoadAlignedVector512(address); /// /// __m512 _mm512_load_ps (float const * mem_addr) - /// VMOVAPS zmm, zmm/m512 + /// VMOVAPS zmm1 {k1}{z}, m512 /// public static unsafe Vector512 LoadAlignedVector512(float* address) => LoadAlignedVector512(address); /// /// __m512d _mm512_load_pd (double const * mem_addr) - /// VMOVAPD zmm, zmm/m512 + /// VMOVAPD zmm1 {k1}{z}, m512 /// public static unsafe Vector512 LoadAlignedVector512(double* address) => LoadAlignedVector512(address); /// - /// __m512 _mm512_or_ps (__m512 a, __m512 b) - /// VORPS zmm, zmm, zmm/m512 + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU8 zmm1 {k1}{z}, m512 /// - public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); + public static unsafe Vector512 LoadVector512(sbyte* address) => LoadVector512(address); /// - /// __m512d _mm512_or_pd (__m512d a, __m512d b) - /// VORPD zmm, zmm, zmm/m512 + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU8 zmm1 {k1}{z}, m512 /// - public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); + public static unsafe Vector512 LoadVector512(byte* address) => LoadVector512(address); /// - /// __m512i _mm512_or_si512 (__m512i a, __m512i b) - /// VPOR zmm, zmm, zmm/m512 + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU16 zmm1 {k1}{z}, m512 /// - public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); + public static unsafe Vector512 LoadVector512(short* address) => LoadVector512(address); + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU16 zmm1 {k1}{z}, m512 + /// + public static unsafe Vector512 LoadVector512(ushort* address) => LoadVector512(address); + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU32 zmm1 {k1}{z}, m512 + /// + public static unsafe Vector512 LoadVector512(int* address) => LoadVector512(address); + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU32 zmm1 {k1}{z}, m512 + /// + public static unsafe Vector512 LoadVector512(uint* address) => LoadVector512(address); + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU64 zmm1 {k1}{z}, m512 + /// + public static unsafe Vector512 LoadVector512(long* address) => LoadVector512(address); + /// + /// __m512i _mm512_loadu_si512 (__m512i const * mem_addr) + /// VMOVDQU64 zmm1 {k1}{z}, m512 + /// + public static unsafe Vector512 LoadVector512(ulong* address) => LoadVector512(address); + /// + /// __m512 _mm512_loadu_ps (float const * mem_addr) + /// VMOVUPS zmm1 {k1}{z}, m512 + /// + public static unsafe Vector512 LoadVector512(float* address) => LoadVector512(address); + /// + /// __m512d _mm512_loadu_pd (double const * mem_addr) + /// VMOVUPD zmm1 {k1}{z}, m512 + /// + public static unsafe Vector512 LoadVector512(double* address) => LoadVector512(address); + /// /// __m512i _mm512_or_si512 (__m512i a, __m512i b) - /// VPOR zmm, zmm, zmm/m512 + /// VPORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); /// /// __m512i _mm512_or_si512 (__m512i a, __m512i b) - /// VPOR zmm, zmm, zmm/m512 + /// VPORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst + /// + public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); + /// + /// __m512i _mm512_or_si512 (__m512i a, __m512i b) + /// VPORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); /// /// __m512i _mm512_or_si512 (__m512i a, __m512i b) - /// VPOR zmm, zmm, zmm/m512 + /// VPORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); /// /// __m512i _mm512_or_si512 (__m512i a, __m512i b) - /// VPOR zmm, zmm, zmm/m512 + /// VPORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); /// /// __m512i _mm512_or_si512 (__m512i a, __m512i b) - /// VPOR zmm, zmm, zmm/m512 + /// VPORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); /// /// __m512i _mm512_or_si512 (__m512i a, __m512i b) - /// VPOR zmm, zmm, zmm/m512 + /// VPORQ zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst /// public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); /// /// __m512i _mm512_or_si512 (__m512i a, __m512i b) - /// VPOR zmm, zmm, zmm/m512 + /// VPORQ zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst /// public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); + /// + /// __m512 _mm512_or_ps (__m512 a, __m512 b) + /// VORPS zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst + /// + public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); + /// + /// __m512d _mm512_or_pd (__m512d a, __m512d b) + /// VORPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst + /// + public static Vector512 Or(Vector512 left, Vector512 right) => Or(left, right); /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQU32 m512, zmm + /// VMOVDQU8 m512 {k1}{z}, zmm1 /// public static unsafe void Store(sbyte* address, Vector512 source) => Store(address, source); /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQU32 m512, zmm + /// VMOVDQU8 m512 {k1}{z}, zmm1 /// public static unsafe void Store(byte* address, Vector512 source) => Store(address, source); /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQU32 m512, zmm + /// VMOVDQU16 m512 {k1}{z}, zmm1 /// public static unsafe void Store(short* address, Vector512 source) => Store(address, source); /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQU32 m512, zmm + /// VMOVDQU16 m512 {k1}{z}, zmm1 /// public static unsafe void Store(ushort* address, Vector512 source) => Store(address, source); /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQU32 m512, zmm + /// VMOVDQU32 m512 {k1}{z}, zmm1 /// public static unsafe void Store(int* address, Vector512 source) => Store(address, source); /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQU32 m512, zmm + /// VMOVDQU32 m512 {k1}{z}, zmm1 /// public static unsafe void Store(uint* address, Vector512 source) => Store(address, source); /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQU64 m512, zmm + /// VMOVDQU64 m512 {k1}{z}, zmm1 /// public static unsafe void Store(long* address, Vector512 source) => Store(address, source); /// /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQU64 m512, zmm + /// VMOVDQU64 m512 {k1}{z}, zmm1 /// public static unsafe void Store(ulong* address, Vector512 source) => Store(address, source); /// /// void _mm512_storeu_ps (float * mem_addr, __m512 a) - /// VMOVUPS m512, zmm + /// VMOVUPS m512 {k1}{z}, zmm1 /// public static unsafe void Store(float* address, Vector512 source) => Store(address, source); /// /// void _mm512_storeu_pd (double * mem_addr, __m512d a) - /// VMOVUPD m512, zmm + /// VMOVUPD m512 {k1}{z}, zmm1 /// public static unsafe void Store(double* address, Vector512 source) => Store(address, source); /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQA32 m512, zmm + /// VMOVDQA32 m512 {k1}{z}, zmm1 /// - public static unsafe void StoreAligned(sbyte* address, Vector512 source) => StoreAligned(address, source); + public static unsafe void StoreAligned(byte* address, Vector512 source) => StoreAligned(address, source); /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQA32 m512, zmm + /// VMOVDQA32 m512 {k1}{z}, zmm1 /// - public static unsafe void StoreAligned(byte* address, Vector512 source) => StoreAligned(address, source); + public static unsafe void StoreAligned(sbyte* address, Vector512 source) => StoreAligned(address, source); /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQA32 m512, zmm + /// VMOVDQA32 m512 {k1}{z}, zmm1 /// public static unsafe void StoreAligned(short* address, Vector512 source) => StoreAligned(address, source); /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQA32 m512, zmm + /// VMOVDQA32 m512 {k1}{z}, zmm1 /// public static unsafe void StoreAligned(ushort* address, Vector512 source) => StoreAligned(address, source); /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQA32 m512, zmm + /// VMOVDQA32 m512 {k1}{z}, zmm1 /// public static unsafe void StoreAligned(int* address, Vector512 source) => StoreAligned(address, source); /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQA32 m512, zmm + /// VMOVDQA32 m512 {k1}{z}, zmm1 /// public static unsafe void StoreAligned(uint* address, Vector512 source) => StoreAligned(address, source); /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQA64 m512, zmm + /// VMOVDQA32 m512 {k1}{z}, zmm1 /// public static unsafe void StoreAligned(long* address, Vector512 source) => StoreAligned(address, source); /// /// void _mm512_store_si512 (__m512i * mem_addr, __m512i a) - /// VMOVDQA64 m512, zmm + /// VMOVDQA32 m512 {k1}{z}, zmm1 /// public static unsafe void StoreAligned(ulong* address, Vector512 source) => StoreAligned(address, source); /// /// void _mm512_store_ps (float * mem_addr, __m512 a) - /// VMOVAPS m512, zmm + /// VMOVAPS m512 {k1}{z}, zmm1 /// public static unsafe void StoreAligned(float* address, Vector512 source) => StoreAligned(address, source); /// /// void _mm512_store_pd (double * mem_addr, __m512d a) - /// VMOVAPD m512, zmm + /// VMOVAPD m512 {k1}{z}, zmm1 /// public static unsafe void StoreAligned(double* address, Vector512 source) => StoreAligned(address, source); /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) - /// VMOVNTDQ m512, zmm + /// VMOVNTDQ m512, zmm1 /// public static unsafe void StoreAlignedNonTemporal(sbyte* address, Vector512 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) - /// VMOVNTDQ m512, zmm + /// VMOVNTDQ m512, zmm1 /// public static unsafe void StoreAlignedNonTemporal(byte* address, Vector512 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) - /// VMOVNTDQ m512, zmm + /// VMOVNTDQ m512, zmm1 /// public static unsafe void StoreAlignedNonTemporal(short* address, Vector512 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) - /// VMOVNTDQ m512, zmm + /// VMOVNTDQ m512, zmm1 /// public static unsafe void StoreAlignedNonTemporal(ushort* address, Vector512 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) - /// VMOVNTDQ m512, zmm + /// VMOVNTDQ m512, zmm1 /// public static unsafe void StoreAlignedNonTemporal(int* address, Vector512 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) - /// VMOVNTDQ m512, zmm + /// VMOVNTDQ m512, zmm1 /// public static unsafe void StoreAlignedNonTemporal(uint* address, Vector512 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) - /// VMOVNTDQ m512, zmm + /// VMOVNTDQ m512, zmm1 /// public static unsafe void StoreAlignedNonTemporal(long* address, Vector512 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm512_stream_si512 (__m512i * mem_addr, __m512i a) - /// VMOVNTDQ m512, zmm + /// VMOVNTDQ m512, zmm1 /// public static unsafe void StoreAlignedNonTemporal(ulong* address, Vector512 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm512_stream_ps (float * mem_addr, __m512 a) - /// MOVNTPS m512, zmm + /// VMOVNTPS m512, zmm1 /// public static unsafe void StoreAlignedNonTemporal(float* address, Vector512 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm512_stream_pd (double * mem_addr, __m512d a) - /// MOVNTPD m512, zmm + /// VMOVNTPD m512, zmm1 /// public static unsafe void StoreAlignedNonTemporal(double* address, Vector512 source) => StoreAlignedNonTemporal(address, source); - /// - /// __m512 _mm512_xor_ps (__m512 a, __m512 b) - /// VXORPS zmm, zmm, zmm/m512 - /// - public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); - /// - /// __m512d _mm512_xor_pd (__m512d a, __m512d b) - /// VXORPS zmm, zmm, zmm/m512 - /// - public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) - /// VPXOR zmm, zmm, zmm/m512 + /// VPXORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// - public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); + public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) - /// VPXOR zmm, zmm, zmm/m512 + /// VPXORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// - public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); + public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) - /// VPXOR zmm, zmm, zmm/m512 + /// VPXORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) - /// VPXOR zmm, zmm, zmm/m512 + /// VPXORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) - /// VPXOR zmm, zmm, zmm/m512 + /// VPXORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) - /// VPXOR zmm, zmm, zmm/m512 + /// VPXORD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst /// public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) - /// VPXOR zmm, zmm, zmm/m512 + /// VPXORQ zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst /// public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); /// /// __m512i _mm512_xor_si512 (__m512i a, __m512i b) - /// VPXOR zmm, zmm, zmm/m512 + /// VPXORQ zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst /// public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); + /// + /// __m512 _mm512_xor_ps (__m512 a, __m512 b) + /// VXORPS zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst + /// + public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); + /// + /// __m512d _mm512_xor_pd (__m512d a, __m512d b) + /// VXORPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst + /// + public static Vector512 Xor(Vector512 left, Vector512 right) => Xor(left, right); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi1.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi1.PlatformNotSupported.cs index a9dc857b623d69..46fb386a395bf5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi1.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi1.PlatformNotSupported.cs @@ -25,49 +25,49 @@ internal X64() { } /// /// unsigned __int64 _andn_u64 (unsigned __int64 a, unsigned __int64 b) - /// ANDN r64a, r64b, reg/m64 + /// ANDN r64a, r64b, r/m64 /// This intrinsic is only available on 64-bit processes /// public static ulong AndNot(ulong left, ulong right) { throw new PlatformNotSupportedException(); } /// /// unsigned __int64 _bextr_u64 (unsigned __int64 a, unsigned int start, unsigned int len) - /// BEXTR r64a, reg/m64, r64b + /// BEXTR r64a, r/m64, r64b /// This intrinsic is only available on 64-bit processes /// public static ulong BitFieldExtract(ulong value, byte start, byte length) { throw new PlatformNotSupportedException(); } /// /// unsigned __int64 _bextr2_u64 (unsigned __int64 a, unsigned __int64 control) - /// BEXTR r64a, reg/m64, r64b + /// BEXTR r64a, r/m64, r64b /// This intrinsic is only available on 64-bit processes /// public static ulong BitFieldExtract(ulong value, ushort control) { throw new PlatformNotSupportedException(); } /// /// unsigned __int64 _blsi_u64 (unsigned __int64 a) - /// BLSI reg, reg/m64 + /// BLSI r64, r/m64 /// This intrinsic is only available on 64-bit processes /// public static ulong ExtractLowestSetBit(ulong value) { throw new PlatformNotSupportedException(); } /// /// unsigned __int64 _blsmsk_u64 (unsigned __int64 a) - /// BLSMSK reg, reg/m64 + /// BLSMSK r64, r/m64 /// This intrinsic is only available on 64-bit processes /// public static ulong GetMaskUpToLowestSetBit(ulong value) { throw new PlatformNotSupportedException(); } /// /// unsigned __int64 _blsr_u64 (unsigned __int64 a) - /// BLSR reg, reg/m64 + /// BLSR r64, r/m64 /// This intrinsic is only available on 64-bit processes /// public static ulong ResetLowestSetBit(ulong value) { throw new PlatformNotSupportedException(); } /// /// __int64 _mm_tzcnt_64 (unsigned __int64 a) - /// TZCNT reg, reg/m64 + /// TZCNT r64, r/m64 /// This intrinsic is only available on 64-bit processes /// public static ulong TrailingZeroCount(ulong value) { throw new PlatformNotSupportedException(); } @@ -75,43 +75,43 @@ internal X64() { } /// /// unsigned int _andn_u32 (unsigned int a, unsigned int b) - /// ANDN r32a, r32b, reg/m32 + /// ANDN r32a, r32b, r/m32 /// public static uint AndNot(uint left, uint right) { throw new PlatformNotSupportedException(); } /// /// unsigned int _bextr_u32 (unsigned int a, unsigned int start, unsigned int len) - /// BEXTR r32a, reg/m32, r32b + /// BEXTR r32a, r/m32, r32b /// public static uint BitFieldExtract(uint value, byte start, byte length) { throw new PlatformNotSupportedException(); } /// /// unsigned int _bextr2_u32 (unsigned int a, unsigned int control) - /// BEXTR r32a, reg/m32, r32b + /// BEXTR r32a, r/m32, r32b /// public static uint BitFieldExtract(uint value, ushort control) { throw new PlatformNotSupportedException(); } /// /// unsigned int _blsi_u32 (unsigned int a) - /// BLSI reg, reg/m32 + /// BLSI r32, r/m32 /// public static uint ExtractLowestSetBit(uint value) { throw new PlatformNotSupportedException(); } /// /// unsigned int _blsmsk_u32 (unsigned int a) - /// BLSMSK reg, reg/m32 + /// BLSMSK r32, r/m32 /// public static uint GetMaskUpToLowestSetBit(uint value) { throw new PlatformNotSupportedException(); } /// /// unsigned int _blsr_u32 (unsigned int a) - /// BLSR reg, reg/m32 + /// BLSR r32, r/m32 /// public static uint ResetLowestSetBit(uint value) { throw new PlatformNotSupportedException(); } /// /// int _mm_tzcnt_32 (unsigned int a) - /// TZCNT reg, reg/m32 + /// TZCNT r32, r/m32 /// public static uint TrailingZeroCount(uint value) { throw new PlatformNotSupportedException(); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi1.cs index 044ec940e75e08..eec6534a14232d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi1.cs @@ -25,49 +25,49 @@ internal X64() { } /// /// unsigned __int64 _andn_u64 (unsigned __int64 a, unsigned __int64 b) - /// ANDN r64a, r64b, reg/m64 + /// ANDN r64a, r64b, r/m64 /// This intrinsic is only available on 64-bit processes /// public static ulong AndNot(ulong left, ulong right) => AndNot(left, right); /// /// unsigned __int64 _bextr_u64 (unsigned __int64 a, unsigned int start, unsigned int len) - /// BEXTR r64a, reg/m64, r64b + /// BEXTR r64a, r/m64, r64b /// This intrinsic is only available on 64-bit processes /// public static ulong BitFieldExtract(ulong value, byte start, byte length) => BitFieldExtract(value, (ushort)(start | (length << 8))); /// /// unsigned __int64 _bextr2_u64 (unsigned __int64 a, unsigned __int64 control) - /// BEXTR r64a, reg/m64, r64b + /// BEXTR r64a, r/m64, r64b /// This intrinsic is only available on 64-bit processes /// public static ulong BitFieldExtract(ulong value, ushort control) => BitFieldExtract(value, control); /// /// unsigned __int64 _blsi_u64 (unsigned __int64 a) - /// BLSI reg, reg/m64 + /// BLSI r64, r/m64 /// This intrinsic is only available on 64-bit processes /// public static ulong ExtractLowestSetBit(ulong value) => ExtractLowestSetBit(value); /// /// unsigned __int64 _blsmsk_u64 (unsigned __int64 a) - /// BLSMSK reg, reg/m64 + /// BLSMSK r64, r/m64 /// This intrinsic is only available on 64-bit processes /// public static ulong GetMaskUpToLowestSetBit(ulong value) => GetMaskUpToLowestSetBit(value); /// /// unsigned __int64 _blsr_u64 (unsigned __int64 a) - /// BLSR reg, reg/m64 + /// BLSR r64, r/m64 /// This intrinsic is only available on 64-bit processes /// public static ulong ResetLowestSetBit(ulong value) => ResetLowestSetBit(value); /// /// __int64 _mm_tzcnt_64 (unsigned __int64 a) - /// TZCNT reg, reg/m64 + /// TZCNT r64, r/m64 /// This intrinsic is only available on 64-bit processes /// public static ulong TrailingZeroCount(ulong value) => TrailingZeroCount(value); @@ -75,43 +75,43 @@ internal X64() { } /// /// unsigned int _andn_u32 (unsigned int a, unsigned int b) - /// ANDN r32a, r32b, reg/m32 + /// ANDN r32a, r32b, r/m32 /// public static uint AndNot(uint left, uint right) => AndNot(left, right); /// /// unsigned int _bextr_u32 (unsigned int a, unsigned int start, unsigned int len) - /// BEXTR r32a, reg/m32, r32b + /// BEXTR r32a, r/m32, r32b /// public static uint BitFieldExtract(uint value, byte start, byte length) => BitFieldExtract(value, (ushort)(start | (length << 8))); /// /// unsigned int _bextr2_u32 (unsigned int a, unsigned int control) - /// BEXTR r32a, reg/m32, r32b + /// BEXTR r32a, r/m32, r32b /// public static uint BitFieldExtract(uint value, ushort control) => BitFieldExtract(value, control); /// /// unsigned int _blsi_u32 (unsigned int a) - /// BLSI reg, reg/m32 + /// BLSI r32, r/m32 /// public static uint ExtractLowestSetBit(uint value) => ExtractLowestSetBit(value); /// /// unsigned int _blsmsk_u32 (unsigned int a) - /// BLSMSK reg, reg/m32 + /// BLSMSK r32, r/m32 /// public static uint GetMaskUpToLowestSetBit(uint value) => GetMaskUpToLowestSetBit(value); /// /// unsigned int _blsr_u32 (unsigned int a) - /// BLSR reg, reg/m32 + /// BLSR r32, r/m32 /// public static uint ResetLowestSetBit(uint value) => ResetLowestSetBit(value); /// /// int _mm_tzcnt_32 (unsigned int a) - /// TZCNT reg, reg/m32 + /// TZCNT r32, r/m32 /// public static uint TrailingZeroCount(uint value) => TrailingZeroCount(value); } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi2.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi2.PlatformNotSupported.cs index 8d8fdcf2df2ddb..a412768afe76a8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi2.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi2.PlatformNotSupported.cs @@ -25,14 +25,14 @@ internal X64() { } /// /// unsigned __int64 _bzhi_u64 (unsigned __int64 a, unsigned int index) - /// BZHI r64a, reg/m32, r64b + /// BZHI r64a, r/m64, r64b /// This intrinsic is only available on 64-bit processes /// public static ulong ZeroHighBits(ulong value, ulong index) { throw new PlatformNotSupportedException(); } /// /// unsigned __int64 _mulx_u64 (unsigned __int64 a, unsigned __int64 b, unsigned __int64* hi) - /// MULX r64a, r64b, reg/m64 + /// MULX r64a, r64b, r/m64 /// The above native signature does not directly correspond to the managed signature. /// This intrinsic is only available on 64-bit processes /// @@ -40,7 +40,7 @@ internal X64() { } /// /// unsigned __int64 _mulx_u64 (unsigned __int64 a, unsigned __int64 b, unsigned __int64* hi) - /// MULX r64a, r64b, reg/m64 + /// MULX r64a, r64b, r/m64 /// The above native signature does not directly correspond to the managed signature. /// This intrinsic is only available on 64-bit processes /// @@ -48,14 +48,14 @@ internal X64() { } /// /// unsigned __int64 _pdep_u64 (unsigned __int64 a, unsigned __int64 mask) - /// PDEP r64a, r64b, reg/m64 + /// PDEP r64a, r64b, r/m64 /// This intrinsic is only available on 64-bit processes /// public static ulong ParallelBitDeposit(ulong value, ulong mask) { throw new PlatformNotSupportedException(); } /// /// unsigned __int64 _pext_u64 (unsigned __int64 a, unsigned __int64 mask) - /// PEXT r64a, r64b, reg/m64 + /// PEXT r64a, r64b, r/m64 /// This intrinsic is only available on 64-bit processes /// public static ulong ParallelBitExtract(ulong value, ulong mask) { throw new PlatformNotSupportedException(); } @@ -63,33 +63,33 @@ internal X64() { } /// /// unsigned int _bzhi_u32 (unsigned int a, unsigned int index) - /// BZHI r32a, reg/m32, r32b + /// BZHI r32a, r/m32, r32b /// public static uint ZeroHighBits(uint value, uint index) { throw new PlatformNotSupportedException(); } /// /// unsigned int _mulx_u32 (unsigned int a, unsigned int b, unsigned int* hi) - /// MULX r32a, r32b, reg/m32 + /// MULX r32a, r32b, r/m32 /// The above native signature does not directly correspond to the managed signature. /// public static uint MultiplyNoFlags(uint left, uint right) { throw new PlatformNotSupportedException(); } /// /// unsigned int _mulx_u32 (unsigned int a, unsigned int b, unsigned int* hi) - /// MULX r32a, r32b, reg/m32 + /// MULX r32a, r32b, r/m32 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe uint MultiplyNoFlags(uint left, uint right, uint* low) { throw new PlatformNotSupportedException(); } /// /// unsigned int _pdep_u32 (unsigned int a, unsigned int mask) - /// PDEP r32a, r32b, reg/m32 + /// PDEP r32a, r32b, r/m32 /// public static uint ParallelBitDeposit(uint value, uint mask) { throw new PlatformNotSupportedException(); } /// /// unsigned int _pext_u32 (unsigned int a, unsigned int mask) - /// PEXT r32a, r32b, reg/m32 + /// PEXT r32a, r32b, r/m32 /// public static uint ParallelBitExtract(uint value, uint mask) { throw new PlatformNotSupportedException(); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi2.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi2.cs index 2ed73efad2d861..31ac3531e3bded 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi2.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi2.cs @@ -25,14 +25,14 @@ internal X64() { } /// /// unsigned __int64 _bzhi_u64 (unsigned __int64 a, unsigned int index) - /// BZHI r64a, reg/m32, r64b + /// BZHI r64a, r/m64, r64b /// This intrinsic is only available on 64-bit processes /// public static ulong ZeroHighBits(ulong value, ulong index) => ZeroHighBits(value, index); /// /// unsigned __int64 _mulx_u64 (unsigned __int64 a, unsigned __int64 b, unsigned __int64* hi) - /// MULX r64a, r64b, reg/m64 + /// MULX r64a, r64b, r/m64 /// The above native signature does not directly correspond to the managed signature. /// This intrinsic is only available on 64-bit processes /// @@ -40,7 +40,7 @@ internal X64() { } /// /// unsigned __int64 _mulx_u64 (unsigned __int64 a, unsigned __int64 b, unsigned __int64* hi) - /// MULX r64a, r64b, reg/m64 + /// MULX r64a, r64b, r/m64 /// The above native signature does not directly correspond to the managed signature. /// This intrinsic is only available on 64-bit processes /// @@ -48,14 +48,14 @@ internal X64() { } /// /// unsigned __int64 _pdep_u64 (unsigned __int64 a, unsigned __int64 mask) - /// PDEP r64a, r64b, reg/m64 + /// PDEP r64a, r64b, r/m64 /// This intrinsic is only available on 64-bit processes /// public static ulong ParallelBitDeposit(ulong value, ulong mask) => ParallelBitDeposit(value, mask); /// /// unsigned __int64 _pext_u64 (unsigned __int64 a, unsigned __int64 mask) - /// PEXT r64a, r64b, reg/m64 + /// PEXT r64a, r64b, r/m64 /// This intrinsic is only available on 64-bit processes /// public static ulong ParallelBitExtract(ulong value, ulong mask) => ParallelBitExtract(value, mask); @@ -63,33 +63,33 @@ internal X64() { } /// /// unsigned int _bzhi_u32 (unsigned int a, unsigned int index) - /// BZHI r32a, reg/m32, r32b + /// BZHI r32a, r/m32, r32b /// public static uint ZeroHighBits(uint value, uint index) => ZeroHighBits(value, index); /// /// unsigned int _mulx_u32 (unsigned int a, unsigned int b, unsigned int* hi) - /// MULX r32a, r32b, reg/m32 + /// MULX r32a, r32b, r/m32 /// The above native signature does not directly correspond to the managed signature. /// public static uint MultiplyNoFlags(uint left, uint right) => MultiplyNoFlags(left, right); /// /// unsigned int _mulx_u32 (unsigned int a, unsigned int b, unsigned int* hi) - /// MULX r32a, r32b, reg/m32 + /// MULX r32a, r32b, r/m32 /// The above native signature does not directly correspond to the managed signature. /// public static unsafe uint MultiplyNoFlags(uint left, uint right, uint* low) => MultiplyNoFlags(left, right, low); /// /// unsigned int _pdep_u32 (unsigned int a, unsigned int mask) - /// PDEP r32a, r32b, reg/m32 + /// PDEP r32a, r32b, r/m32 /// public static uint ParallelBitDeposit(uint value, uint mask) => ParallelBitDeposit(value, mask); /// /// unsigned int _pext_u32 (unsigned int a, unsigned int mask) - /// PEXT r32a, r32b, reg/m32 + /// PEXT r32a, r32b, r/m32 /// public static uint ParallelBitExtract(uint value, uint mask) => ParallelBitExtract(value, mask); } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Fma.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Fma.PlatformNotSupported.cs index 64b3024a56f8b0..398f55f3c6b068 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Fma.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Fma.PlatformNotSupported.cs @@ -26,171 +26,203 @@ internal X64() { } /// /// __m128 _mm_fmadd_ps (__m128 a, __m128 b, __m128 c) - /// VFMADDPS xmm, xmm, xmm/m128 + /// VFMADDPS xmm1, xmm2, xmm3/m128 + /// VFMADDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 MultiplyAdd(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_fmadd_pd (__m128d a, __m128d b, __m128d c) - /// VFMADDPD xmm, xmm, xmm/m128 + /// VFMADDPD xmm1, xmm2, xmm3/m128 + /// VFMADDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 MultiplyAdd(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_fmadd_ps (__m256 a, __m256 b, __m256 c) - /// VFMADDPS ymm, ymm, ymm/m256 + /// VFMADDPS ymm1, ymm2, ymm3/m256 + /// VFMADDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 MultiplyAdd(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_fmadd_pd (__m256d a, __m256d b, __m256d c) - /// VFMADDPS ymm, ymm, ymm/m256 + /// VFMADDPD ymm1, ymm2, ymm3/m256 + /// VFMADDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 MultiplyAdd(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_fmadd_ss (__m128 a, __m128 b, __m128 c) - /// VFMADDSS xmm, xmm, xmm/m32 + /// VFMADDSS xmm1, xmm2, xmm3/m32 + /// VFMADDSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} /// public static Vector128 MultiplyAddScalar(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_fmadd_sd (__m128d a, __m128d b, __m128d c) - /// VFMADDSS xmm, xmm, xmm/m64 + /// VFMADDSD xmm1, xmm2, xmm3/m64 + /// VFMADDSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} /// public static Vector128 MultiplyAddScalar(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_fmaddsub_ps (__m128 a, __m128 b, __m128 c) - /// VFMADDSUBPS xmm, xmm, xmm/m128 + /// VFMADDSUBPS xmm1, xmm2, xmm3/m128 + /// VFMADDSUBPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 MultiplyAddSubtract(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_fmaddsub_pd (__m128d a, __m128d b, __m128d c) - /// VFMADDSUBPD xmm, xmm, xmm/m128 + /// VFMADDSUBPD xmm1, xmm2, xmm3/m128 + /// VFMADDSUBPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 MultiplyAddSubtract(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_fmaddsub_ps (__m256 a, __m256 b, __m256 c) - /// VFMADDSUBPS ymm, ymm, ymm/m256 + /// VFMADDSUBPS ymm1, ymm2, ymm3/m256 + /// VFMADDSUBPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 MultiplyAddSubtract(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_fmaddsub_pd (__m256d a, __m256d b, __m256d c) - /// VFMADDSUBPD ymm, ymm, ymm/m256 + /// VFMADDSUBPD ymm1, ymm2, ymm3/m256 + /// VFMADDSUBPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 MultiplyAddSubtract(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_fmsub_ps (__m128 a, __m128 b, __m128 c) - /// VFMSUBPS xmm, xmm, xmm/m128 + /// VFMSUBPS xmm1, xmm2, xmm3/m128 + /// VFMSUBPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 MultiplySubtract(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_fmsub_pd (__m128d a, __m128d b, __m128d c) - /// VFMSUBPS xmm, xmm, xmm/m128 + /// VFMSUBPD xmm1, xmm2, xmm3/m128 + /// VFMSUBPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 MultiplySubtract(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_fmsub_ps (__m256 a, __m256 b, __m256 c) - /// VFMSUBPS ymm, ymm, ymm/m256 + /// VFMSUBPS ymm1, ymm2, ymm3/m256 + /// VFMSUBPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 MultiplySubtract(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_fmsub_pd (__m256d a, __m256d b, __m256d c) - /// VFMSUBPD ymm, ymm, ymm/m256 + /// VFMSUBPD ymm1, ymm2, ymm3/m256 + /// VFMSUBPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 MultiplySubtract(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_fmsub_ss (__m128 a, __m128 b, __m128 c) - /// VFMSUBSS xmm, xmm, xmm/m32 + /// VFMSUBSS xmm1, xmm2, xmm3/m32 + /// VFMSUBSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} /// public static Vector128 MultiplySubtractScalar(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_fmsub_sd (__m128d a, __m128d b, __m128d c) - /// VFMSUBSD xmm, xmm, xmm/m64 + /// VFMSUBSD xmm1, xmm2, xmm3/m64 + /// VFMSUBSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} /// public static Vector128 MultiplySubtractScalar(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_fmsubadd_ps (__m128 a, __m128 b, __m128 c) - /// VFMSUBADDPS xmm, xmm, xmm/m128 + /// VFMSUBADDPS xmm1, xmm2, xmm3/m128 + /// VFMSUBADDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 MultiplySubtractAdd(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_fmsubadd_pd (__m128d a, __m128d b, __m128d c) - /// VFMSUBADDPD xmm, xmm, xmm/m128 + /// VFMSUBADDPD xmm1, xmm2, xmm3/m128 + /// VFMSUBADDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 MultiplySubtractAdd(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_fmsubadd_ps (__m256 a, __m256 b, __m256 c) - /// VFMSUBADDPS ymm, ymm, ymm/m256 + /// VFMSUBADDPS ymm1, ymm2, ymm3/m256 + /// VFMSUBADDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 MultiplySubtractAdd(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_fmsubadd_pd (__m256d a, __m256d b, __m256d c) - /// VFMSUBADDPD ymm, ymm, ymm/m256 + /// VFMSUBADDPD ymm1, ymm2, ymm3/m256 + /// VFMSUBADDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 MultiplySubtractAdd(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_fnmadd_ps (__m128 a, __m128 b, __m128 c) - /// VFNMADDPS xmm, xmm, xmm/m128 + /// VFNMADDPS xmm1, xmm2, xmm3/m128 + /// VFNMADDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 MultiplyAddNegated(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_fnmadd_pd (__m128d a, __m128d b, __m128d c) - /// VFNMADDPD xmm, xmm, xmm/m128 + /// VFNMADDPD xmm1, xmm2, xmm3/m128 + /// VFNMADDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 MultiplyAddNegated(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_fnmadd_ps (__m256 a, __m256 b, __m256 c) - /// VFNMADDPS ymm, ymm, ymm/m256 + /// VFNMADDPS ymm1, ymm2, ymm3/m256 + /// VFNMADDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 MultiplyAddNegated(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_fnmadd_pd (__m256d a, __m256d b, __m256d c) - /// VFNMADDPD ymm, ymm, ymm/m256 + /// VFNMADDPD ymm1, ymm2, ymm3/m256 + /// VFNMADDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 MultiplyAddNegated(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_fnmadd_ss (__m128 a, __m128 b, __m128 c) - /// VFNMADDSS xmm, xmm, xmm/m32 + /// VFNMADDSS xmm1, xmm2, xmm3/m32 + /// VFNMADDSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} /// public static Vector128 MultiplyAddNegatedScalar(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_fnmadd_sd (__m128d a, __m128d b, __m128d c) - /// VFNMADDSD xmm, xmm, xmm/m64 + /// VFNMADDSD xmm1, xmm2, xmm3/m64 + /// VFNMADDSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} /// public static Vector128 MultiplyAddNegatedScalar(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_fnmsub_ps (__m128 a, __m128 b, __m128 c) - /// VFNMSUBPS xmm, xmm, xmm/m128 + /// VFNMSUBPS xmm1, xmm2, xmm3/m128 + /// VFNMSUBPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 MultiplySubtractNegated(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_fnmsub_pd (__m128d a, __m128d b, __m128d c) - /// VFNMSUBPD xmm, xmm, xmm/m128 + /// VFNMSUBPD xmm1, xmm2, xmm3/m128 + /// VFNMSUBPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 MultiplySubtractNegated(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_fnmsub_ps (__m256 a, __m256 b, __m256 c) - /// VFNMSUBPS ymm, ymm, ymm/m256 + /// VFNMSUBPS ymm1, ymm2, ymm3/m256 + /// VFNMSUBPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 MultiplySubtractNegated(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_fnmsub_pd (__m256d a, __m256d b, __m256d c) - /// VFNMSUBPD ymm, ymm, ymm/m256 + /// VFNMSUBPD ymm1, ymm2, ymm3/m256 + /// VFNMSUBPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 MultiplySubtractNegated(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_fnmsub_ss (__m128 a, __m128 b, __m128 c) - /// VFNMSUBSS xmm, xmm, xmm/m32 + /// VFNMSUBSS xmm1, xmm2, xmm3/m32 + /// VFNMSUBSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} /// public static Vector128 MultiplySubtractNegatedScalar(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_fnmsub_sd (__m128d a, __m128d b, __m128d c) - /// VFNMSUBSD xmm, xmm, xmm/m64 + /// VFNMSUBSD xmm1, xmm2, xmm3/m64 + /// VFNMSUBSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} /// public static Vector128 MultiplySubtractNegatedScalar(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Fma.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Fma.cs index d147e8b4a712b6..5f94aa053be3e5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Fma.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Fma.cs @@ -26,171 +26,203 @@ internal X64() { } /// /// __m128 _mm_fmadd_ps (__m128 a, __m128 b, __m128 c) - /// VFMADDPS xmm, xmm, xmm/m128 + /// VFMADDPS xmm1, xmm2, xmm3/m128 + /// VFMADDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 MultiplyAdd(Vector128 a, Vector128 b, Vector128 c) => MultiplyAdd(a, b, c); /// /// __m128d _mm_fmadd_pd (__m128d a, __m128d b, __m128d c) - /// VFMADDPD xmm, xmm, xmm/m128 + /// VFMADDPD xmm1, xmm2, xmm3/m128 + /// VFMADDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 MultiplyAdd(Vector128 a, Vector128 b, Vector128 c) => MultiplyAdd(a, b, c); /// /// __m256 _mm256_fmadd_ps (__m256 a, __m256 b, __m256 c) - /// VFMADDPS ymm, ymm, ymm/m256 + /// VFMADDPS ymm1, ymm2, ymm3/m256 + /// VFMADDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 MultiplyAdd(Vector256 a, Vector256 b, Vector256 c) => MultiplyAdd(a, b, c); /// /// __m256d _mm256_fmadd_pd (__m256d a, __m256d b, __m256d c) - /// VFMADDPS ymm, ymm, ymm/m256 + /// VFMADDPD ymm1, ymm2, ymm3/m256 + /// VFMADDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 MultiplyAdd(Vector256 a, Vector256 b, Vector256 c) => MultiplyAdd(a, b, c); /// /// __m128 _mm_fmadd_ss (__m128 a, __m128 b, __m128 c) - /// VFMADDSS xmm, xmm, xmm/m32 + /// VFMADDSS xmm1, xmm2, xmm3/m32 + /// VFMADDSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} /// public static Vector128 MultiplyAddScalar(Vector128 a, Vector128 b, Vector128 c) => MultiplyAddScalar(a, b, c); /// /// __m128d _mm_fmadd_sd (__m128d a, __m128d b, __m128d c) - /// VFMADDSS xmm, xmm, xmm/m64 + /// VFMADDSD xmm1, xmm2, xmm3/m64 + /// VFMADDSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} /// public static Vector128 MultiplyAddScalar(Vector128 a, Vector128 b, Vector128 c) => MultiplyAddScalar(a, b, c); /// /// __m128 _mm_fmaddsub_ps (__m128 a, __m128 b, __m128 c) - /// VFMADDSUBPS xmm, xmm, xmm/m128 + /// VFMADDSUBPS xmm1, xmm2, xmm3/m128 + /// VFMADDSUBPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 MultiplyAddSubtract(Vector128 a, Vector128 b, Vector128 c) => MultiplyAddSubtract(a, b, c); /// /// __m128d _mm_fmaddsub_pd (__m128d a, __m128d b, __m128d c) - /// VFMADDSUBPD xmm, xmm, xmm/m128 + /// VFMADDSUBPD xmm1, xmm2, xmm3/m128 + /// VFMADDSUBPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 MultiplyAddSubtract(Vector128 a, Vector128 b, Vector128 c) => MultiplyAddSubtract(a, b, c); /// /// __m256 _mm256_fmaddsub_ps (__m256 a, __m256 b, __m256 c) - /// VFMADDSUBPS ymm, ymm, ymm/m256 + /// VFMADDSUBPS ymm1, ymm2, ymm3/m256 + /// VFMADDSUBPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 MultiplyAddSubtract(Vector256 a, Vector256 b, Vector256 c) => MultiplyAddSubtract(a, b, c); /// /// __m256d _mm256_fmaddsub_pd (__m256d a, __m256d b, __m256d c) - /// VFMADDSUBPD ymm, ymm, ymm/m256 + /// VFMADDSUBPD ymm1, ymm2, ymm3/m256 + /// VFMADDSUBPD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 MultiplyAddSubtract(Vector256 a, Vector256 b, Vector256 c) => MultiplyAddSubtract(a, b, c); /// /// __m128 _mm_fmsub_ps (__m128 a, __m128 b, __m128 c) - /// VFMSUBPS xmm, xmm, xmm/m128 + /// VFMSUBPS xmm1, xmm2, xmm3/m128 + /// VFMSUBPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 MultiplySubtract(Vector128 a, Vector128 b, Vector128 c) => MultiplySubtract(a, b, c); /// /// __m128d _mm_fmsub_pd (__m128d a, __m128d b, __m128d c) - /// VFMSUBPS xmm, xmm, xmm/m128 + /// VFMSUBPD xmm1, xmm2, xmm3/m128 + /// VFMSUBPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 MultiplySubtract(Vector128 a, Vector128 b, Vector128 c) => MultiplySubtract(a, b, c); /// /// __m256 _mm256_fmsub_ps (__m256 a, __m256 b, __m256 c) - /// VFMSUBPS ymm, ymm, ymm/m256 + /// VFMSUBPS ymm1, ymm2, ymm3/m256 + /// VFMSUBPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 MultiplySubtract(Vector256 a, Vector256 b, Vector256 c) => MultiplySubtract(a, b, c); /// /// __m256d _mm256_fmsub_pd (__m256d a, __m256d b, __m256d c) - /// VFMSUBPD ymm, ymm, ymm/m256 + /// VFMSUBPD ymm1, ymm2, ymm3/m256 + /// VFMSUBPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 MultiplySubtract(Vector256 a, Vector256 b, Vector256 c) => MultiplySubtract(a, b, c); /// /// __m128 _mm_fmsub_ss (__m128 a, __m128 b, __m128 c) - /// VFMSUBSS xmm, xmm, xmm/m32 + /// VFMSUBSS xmm1, xmm2, xmm3/m32 + /// VFMSUBSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} /// public static Vector128 MultiplySubtractScalar(Vector128 a, Vector128 b, Vector128 c) => MultiplySubtractScalar(a, b, c); /// /// __m128d _mm_fmsub_sd (__m128d a, __m128d b, __m128d c) - /// VFMSUBSD xmm, xmm, xmm/m64 + /// VFMSUBSD xmm1, xmm2, xmm3/m64 + /// VFMSUBSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} /// public static Vector128 MultiplySubtractScalar(Vector128 a, Vector128 b, Vector128 c) => MultiplySubtractScalar(a, b, c); /// /// __m128 _mm_fmsubadd_ps (__m128 a, __m128 b, __m128 c) - /// VFMSUBADDPS xmm, xmm, xmm/m128 + /// VFMSUBADDPS xmm1, xmm2, xmm3/m128 + /// VFMSUBADDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 MultiplySubtractAdd(Vector128 a, Vector128 b, Vector128 c) => MultiplySubtractAdd(a, b, c); /// /// __m128d _mm_fmsubadd_pd (__m128d a, __m128d b, __m128d c) - /// VFMSUBADDPD xmm, xmm, xmm/m128 + /// VFMSUBADDPD xmm1, xmm2, xmm3/m128 + /// VFMSUBADDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 MultiplySubtractAdd(Vector128 a, Vector128 b, Vector128 c) => MultiplySubtractAdd(a, b, c); /// /// __m256 _mm256_fmsubadd_ps (__m256 a, __m256 b, __m256 c) - /// VFMSUBADDPS ymm, ymm, ymm/m256 + /// VFMSUBADDPS ymm1, ymm2, ymm3/m256 + /// VFMSUBADDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 MultiplySubtractAdd(Vector256 a, Vector256 b, Vector256 c) => MultiplySubtractAdd(a, b, c); /// /// __m256d _mm256_fmsubadd_pd (__m256d a, __m256d b, __m256d c) - /// VFMSUBADDPD ymm, ymm, ymm/m256 + /// VFMSUBADDPD ymm1, ymm2, ymm3/m256 + /// VFMSUBADDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 MultiplySubtractAdd(Vector256 a, Vector256 b, Vector256 c) => MultiplySubtractAdd(a, b, c); /// /// __m128 _mm_fnmadd_ps (__m128 a, __m128 b, __m128 c) - /// VFNMADDPS xmm, xmm, xmm/m128 + /// VFNMADDPS xmm1, xmm2, xmm3/m128 + /// VFNMADDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 MultiplyAddNegated(Vector128 a, Vector128 b, Vector128 c) => MultiplyAddNegated(a, b, c); /// /// __m128d _mm_fnmadd_pd (__m128d a, __m128d b, __m128d c) - /// VFNMADDPD xmm, xmm, xmm/m128 + /// VFNMADDPD xmm1, xmm2, xmm3/m128 + /// VFNMADDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 MultiplyAddNegated(Vector128 a, Vector128 b, Vector128 c) => MultiplyAddNegated(a, b, c); /// /// __m256 _mm256_fnmadd_ps (__m256 a, __m256 b, __m256 c) - /// VFNMADDPS ymm, ymm, ymm/m256 + /// VFNMADDPS ymm1, ymm2, ymm3/m256 + /// VFNMADDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 MultiplyAddNegated(Vector256 a, Vector256 b, Vector256 c) => MultiplyAddNegated(a, b, c); /// /// __m256d _mm256_fnmadd_pd (__m256d a, __m256d b, __m256d c) - /// VFNMADDPD ymm, ymm, ymm/m256 + /// VFNMADDPD ymm1, ymm2, ymm3/m256 + /// VFNMADDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 MultiplyAddNegated(Vector256 a, Vector256 b, Vector256 c) => MultiplyAddNegated(a, b, c); /// /// __m128 _mm_fnmadd_ss (__m128 a, __m128 b, __m128 c) - /// VFNMADDSS xmm, xmm, xmm/m32 + /// VFNMADDSS xmm1, xmm2, xmm3/m32 + /// VFNMADDSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} /// public static Vector128 MultiplyAddNegatedScalar(Vector128 a, Vector128 b, Vector128 c) => MultiplyAddNegatedScalar(a, b, c); /// /// __m128d _mm_fnmadd_sd (__m128d a, __m128d b, __m128d c) - /// VFNMADDSD xmm, xmm, xmm/m64 + /// VFNMADDSD xmm1, xmm2, xmm3/m64 + /// VFNMADDSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} /// public static Vector128 MultiplyAddNegatedScalar(Vector128 a, Vector128 b, Vector128 c) => MultiplyAddNegatedScalar(a, b, c); /// /// __m128 _mm_fnmsub_ps (__m128 a, __m128 b, __m128 c) - /// VFNMSUBPS xmm, xmm, xmm/m128 + /// VFNMSUBPS xmm1, xmm2, xmm3/m128 + /// VFNMSUBPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 MultiplySubtractNegated(Vector128 a, Vector128 b, Vector128 c) => MultiplySubtractNegated(a, b, c); /// /// __m128d _mm_fnmsub_pd (__m128d a, __m128d b, __m128d c) - /// VFNMSUBPD xmm, xmm, xmm/m128 + /// VFNMSUBPD xmm1, xmm2, xmm3/m128 + /// VFNMSUBPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 MultiplySubtractNegated(Vector128 a, Vector128 b, Vector128 c) => MultiplySubtractNegated(a, b, c); /// /// __m256 _mm256_fnmsub_ps (__m256 a, __m256 b, __m256 c) - /// VFNMSUBPS ymm, ymm, ymm/m256 + /// VFNMSUBPS ymm1, ymm2, ymm3/m256 + /// VFNMSUBPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst /// public static Vector256 MultiplySubtractNegated(Vector256 a, Vector256 b, Vector256 c) => MultiplySubtractNegated(a, b, c); /// /// __m256d _mm256_fnmsub_pd (__m256d a, __m256d b, __m256d c) - /// VFNMSUBPD ymm, ymm, ymm/m256 + /// VFNMSUBPD ymm1, ymm2, ymm3/m256 + /// VFNMSUBPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst /// public static Vector256 MultiplySubtractNegated(Vector256 a, Vector256 b, Vector256 c) => MultiplySubtractNegated(a, b, c); /// /// __m128 _mm_fnmsub_ss (__m128 a, __m128 b, __m128 c) - /// VFNMSUBSS xmm, xmm, xmm/m32 + /// VFNMSUBSS xmm1, xmm2, xmm3/m32 + /// VFNMSUBSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} /// public static Vector128 MultiplySubtractNegatedScalar(Vector128 a, Vector128 b, Vector128 c) => MultiplySubtractNegatedScalar(a, b, c); /// /// __m128d _mm_fnmsub_sd (__m128d a, __m128d b, __m128d c) - /// VFNMSUBSD xmm, xmm, xmm/m64 + /// VFNMSUBSD xmm1, xmm2, xmm3/m32 + /// VFNMSUBSD xmm1 {k1}{z}, xmm2, xmm3/m32{er} /// public static Vector128 MultiplySubtractNegatedScalar(Vector128 a, Vector128 b, Vector128 c) => MultiplySubtractNegatedScalar(a, b, c); } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Lzcnt.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Lzcnt.PlatformNotSupported.cs index 849a68476610e1..f15deb98b688f4 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Lzcnt.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Lzcnt.PlatformNotSupported.cs @@ -24,7 +24,7 @@ internal X64() { } /// /// unsigned __int64 _lzcnt_u64 (unsigned __int64 a) - /// LZCNT reg, reg/m64 + /// LZCNT r64, r/m64 /// This intrinsic is only available on 64-bit processes /// public static ulong LeadingZeroCount(ulong value) { throw new PlatformNotSupportedException(); } @@ -32,7 +32,7 @@ internal X64() { } /// /// unsigned int _lzcnt_u32 (unsigned int a) - /// LZCNT reg, reg/m32 + /// LZCNT r32, r/m32 /// public static uint LeadingZeroCount(uint value) { throw new PlatformNotSupportedException(); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Lzcnt.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Lzcnt.cs index 2493ed2aba1b1e..4fdfabd818966f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Lzcnt.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Lzcnt.cs @@ -25,7 +25,7 @@ internal X64() { } /// /// unsigned __int64 _lzcnt_u64 (unsigned __int64 a) - /// LZCNT reg, reg/m64 + /// LZCNT r64, r/m64 /// This intrinsic is only available on 64-bit processes /// public static ulong LeadingZeroCount(ulong value) => LeadingZeroCount(value); @@ -33,7 +33,7 @@ internal X64() { } /// /// unsigned int _lzcnt_u32 (unsigned int a) - /// LZCNT reg, reg/m32 + /// LZCNT r32, r/m32 /// public static uint LeadingZeroCount(uint value) => LeadingZeroCount(value); } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.PlatformNotSupported.cs index 6cb481911325cc..a815701bc83015 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.PlatformNotSupported.cs @@ -27,12 +27,14 @@ internal X64() { } /// /// __m128i _mm_clmulepi64_si128 (__m128i a, __m128i b, const int imm8) - /// PCLMULQDQ xmm, xmm/m128, imm8 + /// PCLMULQDQ xmm1, xmm2/m128, imm8 + /// VPCLMULQDQ xmm1, xmm2, xmm3/m128, imm8 /// public static Vector128 CarrylessMultiply(Vector128 left, Vector128 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_clmulepi64_si128 (__m128i a, __m128i b, const int imm8) - /// PCLMULQDQ xmm, xmm/m128, imm8 + /// PCLMULQDQ xmm1, xmm2/m128, imm8 + /// VPCLMULQDQ xmm1, xmm2, xmm3/m128, imm8 /// public static Vector128 CarrylessMultiply(Vector128 left, Vector128 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.cs index 0423cd49d726f9..aa1c3d21b8c0b5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.cs @@ -27,12 +27,14 @@ internal X64() { } /// /// __m128i _mm_clmulepi64_si128 (__m128i a, __m128i b, const int imm8) - /// PCLMULQDQ xmm, xmm/m128, imm8 + /// PCLMULQDQ xmm1, xmm2/m128, imm8 + /// VPCLMULQDQ xmm1, xmm2, xmm3/m128, imm8 /// public static Vector128 CarrylessMultiply(Vector128 left, Vector128 right, [ConstantExpected] byte control) => CarrylessMultiply(left, right, control); /// /// __m128i _mm_clmulepi64_si128 (__m128i a, __m128i b, const int imm8) - /// PCLMULQDQ xmm, xmm/m128, imm8 + /// PCLMULQDQ xmm1, xmm2/m128, imm8 + /// VPCLMULQDQ xmm1, xmm2, xmm3/m128, imm8 /// public static Vector128 CarrylessMultiply(Vector128 left, Vector128 right, [ConstantExpected] byte control) => CarrylessMultiply(left, right, control); } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Popcnt.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Popcnt.PlatformNotSupported.cs index 1913605685c337..bd979943225397 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Popcnt.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Popcnt.PlatformNotSupported.cs @@ -24,7 +24,7 @@ internal X64() { } /// /// __int64 _mm_popcnt_u64 (unsigned __int64 a) - /// POPCNT reg64, reg/m64 + /// POPCNT r64, r/m64 /// This intrinsic is only available on 64-bit processes /// public static ulong PopCount(ulong value) { throw new PlatformNotSupportedException(); } @@ -32,7 +32,7 @@ internal X64() { } /// /// int _mm_popcnt_u32 (unsigned int a) - /// POPCNT reg, reg/m32 + /// POPCNT r32, r/m32 /// public static uint PopCount(uint value) { throw new PlatformNotSupportedException(); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Popcnt.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Popcnt.cs index 185f7fc00e6192..c04de74cefc46b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Popcnt.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Popcnt.cs @@ -25,7 +25,7 @@ internal X64() { } /// /// __int64 _mm_popcnt_u64 (unsigned __int64 a) - /// POPCNT reg64, reg/m64 + /// POPCNT r64, r/m64 /// This intrinsic is only available on 64-bit processes /// public static ulong PopCount(ulong value) => PopCount(value); @@ -33,7 +33,7 @@ internal X64() { } /// /// int _mm_popcnt_u32 (unsigned int a) - /// POPCNT reg, reg/m32 + /// POPCNT r32, r/m32 /// public static uint PopCount(uint value) => PopCount(value); } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse.PlatformNotSupported.cs index 5f7655fdb367c4..7541f8e61a7699 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse.PlatformNotSupported.cs @@ -24,550 +24,627 @@ internal X64() { } public static new bool IsSupported { [Intrinsic] get { return false; } } - /// - /// __int64 _mm_cvtss_si64 (__m128 a) - /// CVTSS2SI r64, xmm/m32 - /// This intrinsic is only available on 64-bit processes - /// - public static long ConvertToInt64(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_cvtsi64_ss (__m128 a, __int64 b) - /// CVTSI2SS xmm, reg/m64 + /// CVTSI2SS xmm1, r/m64 + /// VCVTSI2SS xmm1, xmm2, r/m64 /// This intrinsic is only available on 64-bit processes /// public static Vector128 ConvertScalarToVector128Single(Vector128 upper, long value) { throw new PlatformNotSupportedException(); } + /// + /// __int64 _mm_cvtss_si64 (__m128 a) + /// CVTSS2SI r64, xmm1/m32 + /// VCVTSS2SI r64, xmm1/m32 + /// This intrinsic is only available on 64-bit processes + /// + public static long ConvertToInt64(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __int64 _mm_cvttss_si64 (__m128 a) - /// CVTTSS2SI r64, xmm/m32 + /// CVTTSS2SI r64, xmm1/m32 + /// VCVTTSS2SI r64, xmm1/m32 /// This intrinsic is only available on 64-bit processes /// public static long ConvertToInt64WithTruncation(Vector128 value) { throw new PlatformNotSupportedException(); } - } /// /// __m128 _mm_add_ps (__m128 a, __m128 b) - /// ADDPS xmm, xmm/m128 + /// ADDPS xmm1, xmm2/m128 + /// VADDPS xmm1, xmm2, xmm3/m128 + /// VADDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Add(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_add_ss (__m128 a, __m128 b) - /// ADDSS xmm, xmm/m32 + /// ADDSS xmm1, xmm2/m32 + /// VADDSS xmm1, xmm2, xmm3/m32 + /// VADDSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} /// public static Vector128 AddScalar(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_and_ps (__m128 a, __m128 b) - /// ANDPS xmm, xmm/m128 + /// ANDPS xmm1, xmm2/m128 + /// VANDPS xmm1, xmm2, xmm3/m128 + /// VANDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 And(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_andnot_ps (__m128 a, __m128 b) - /// ANDNPS xmm, xmm/m128 + /// ANDNPS xmm1, xmm2/m128 + /// VANDNPS xmm1, xmm2, xmm3/m128 + /// VANDNPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 AndNot(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_cmpeq_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(0) + /// CMPPS xmm1, xmm2/m128, imm8(0) + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(0) /// public static Vector128 CompareEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// int _mm_comieq_ss (__m128 a, __m128 b) - /// COMISS xmm, xmm/m32 + /// __m128 _mm_cmpeq_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(0) + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(0) /// - public static bool CompareScalarOrderedEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// int _mm_ucomieq_ss (__m128 a, __m128 b) - /// UCOMISS xmm, xmm/m32 + /// __m128 _mm_cmpgt_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(1) ; with swapped operands + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(1) ; with swapped operands /// - public static bool CompareScalarUnorderedEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cmpeq_ss (__m128 a, __m128 b) - /// CMPSS xmm, xmm/m32, imm8(0) + /// __m128 _mm_cmpge_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(2) ; with swapped operands + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(2) ; with swapped operands /// - public static Vector128 CompareScalarEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cmpgt_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(1) with swapped operands + /// __m128 _mm_cmplt_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(1) + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(1) /// - public static Vector128 CompareGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// int _mm_comigt_ss (__m128 a, __m128 b) - /// COMISS xmm, xmm/m32 + /// __m128 _mm_cmple_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(2) + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(2) /// - public static bool CompareScalarOrderedGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// int _mm_ucomigt_ss (__m128 a, __m128 b) - /// UCOMISS xmm, xmm/m32 + /// __m128 _mm_cmpneq_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(4) + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(4) /// - public static bool CompareScalarUnorderedGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareNotEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cmpgt_ss (__m128 a, __m128 b) - /// CMPSS xmm, xmm/m32, imm8(1) with swapped operands + /// __m128 _mm_cmpngt_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(5) ; with swapped operands + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(5) ; with swapped operands /// - public static Vector128 CompareScalarGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareNotGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cmpge_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(2) with swapped operands + /// __m128 _mm_cmpnge_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(6) ; with swapped operands + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(6) ; with swapped operands /// - public static Vector128 CompareGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareNotGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// int _mm_comige_ss (__m128 a, __m128 b) - /// COMISS xmm, xmm/m32 + /// __m128 _mm_cmpnlt_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(5) + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(5) /// - public static bool CompareScalarOrderedGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareNotLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// int _mm_ucomige_ss (__m128 a, __m128 b) - /// UCOMISS xmm, xmm/m32 + /// __m128 _mm_cmpnle_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(6) + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(6) /// - public static bool CompareScalarUnorderedGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareNotLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cmpge_ss (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m32, imm8(2) with swapped operands + /// __m128 _mm_cmpord_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(7) + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(7) /// - public static Vector128 CompareScalarGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + public static Vector128 CompareOrdered(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cmplt_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(1) + /// __m128 _mm_cmpeq_ss (__m128 a, __m128 b) + /// CMPSS xmm1, xmm2/m32, imm8(0) + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(0) /// - public static Vector128 CompareLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareScalarEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// int _mm_comilt_ss (__m128 a, __m128 b) - /// COMISS xmm, xmm/m32 + /// __m128 _mm_cmpgt_ss (__m128 a, __m128 b) + /// CMPSS xmm1, xmm2/m32, imm8(1) ; with swapped operands + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(1) ; with swapped operands /// - public static bool CompareScalarOrderedLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareScalarGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// int _mm_ucomilt_ss (__m128 a, __m128 b) - /// UCOMISS xmm, xmm/m32 + /// __m128 _mm_cmpge_ss (__m128 a, __m128 b) + /// CMPSS xmm1, xmm2/m32, imm8(2) ; with swapped operands + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(2) ; with swapped operands /// - public static bool CompareScalarUnorderedLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareScalarGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_cmplt_ss (__m128 a, __m128 b) - /// CMPSS xmm, xmm/m32, imm8(1) + /// CMPSS xmm1, xmm2/m32, imm8(1) + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(1) /// public static Vector128 CompareScalarLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// - /// __m128 _mm_cmple_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(2) + /// __m128 _mm_cmple_ss (__m128 a, __m128 b) + /// CMPSS xmm1, xmm2/m32, imm8(2) + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(2) /// - public static Vector128 CompareLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareScalarLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// int _mm_comile_ss (__m128 a, __m128 b) - /// COMISS xmm, xmm/m32 + /// __m128 _mm_cmpneq_ss (__m128 a, __m128 b) + /// CMPSS xmm1, xmm2/m32, imm8(4) + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(4) /// - public static bool CompareScalarOrderedLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareScalarNotEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// int _mm_ucomile_ss (__m128 a, __m128 b) - /// UCOMISS xmm, xmm/m32 + /// __m128 _mm_cmpngt_ss (__m128 a, __m128 b) + /// CMPSS xmm1, xmm2/m32, imm8(5) ; with swapped operands + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(5) ; with swapped operands /// - public static bool CompareScalarUnorderedLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareScalarNotGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cmple_ss (__m128 a, __m128 b) - /// CMPSS xmm, xmm/m32, imm8(2) + /// __m128 _mm_cmpnge_ss (__m128 a, __m128 b) + /// CMPSS xmm1, xmm2/m32, imm8(6) ; with swapped operands + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(6) ; with swapped operands /// - public static Vector128 CompareScalarLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareScalarNotGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cmpneq_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(4) + /// __m128 _mm_cmpnlt_ss (__m128 a, __m128 b) + /// CMPSS xmm1, xmm2/m32, imm8(5) + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(5) /// - public static Vector128 CompareNotEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareScalarNotLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// int _mm_comineq_ss (__m128 a, __m128 b) - /// COMISS xmm, xmm/m32 + /// __m128 _mm_cmpnle_ss (__m128 a, __m128 b) + /// CMPSS xmm1, xmm2/m32, imm8(6) + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(6) /// - public static bool CompareScalarOrderedNotEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + public static Vector128 CompareScalarNotLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// int _mm_ucomineq_ss (__m128 a, __m128 b) - /// UCOMISS xmm, xmm/m32 + /// __m128 _mm_cmpord_ss (__m128 a, __m128 b) + /// CMPSS xmm1, xmm2/m32, imm8(7) + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(7) /// - public static bool CompareScalarUnorderedNotEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareScalarOrdered(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cmpneq_ss (__m128 a, __m128 b) - /// CMPSS xmm, xmm/m32, imm8(4) + /// int _mm_comieq_ss (__m128 a, __m128 b) + /// COMISS xmm1, xmm2/m32 ; ZF=1 && PF=0 + /// VCOMISS xmm1, xmm2/m32 ; ZF=1 && PF=0 + /// VCOMISS xmm1, xmm2/m32{sae} ; ZF=1 && PF=0 /// - public static Vector128 CompareScalarNotEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static bool CompareScalarOrderedEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cmpngt_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(5) with swapped operands + /// int _mm_comigt_ss (__m128 a, __m128 b) + /// COMISS xmm1, xmm2/m32 ; ZF=0 && CF=0 + /// VCOMISS xmm1, xmm2/m32 ; ZF=0 && CF=0 + /// VCOMISS xmm1, xmm2/m32{sae} ; ZF=0 && CF=0 /// - public static Vector128 CompareNotGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static bool CompareScalarOrderedGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cmpngt_ss (__m128 a, __m128 b) - /// CMPSS xmm, xmm/m32, imm8(5) with swapped operands + /// int _mm_comige_ss (__m128 a, __m128 b) + /// COMISS xmm1, xmm2/m32 ; CF=0 + /// VCOMISS xmm1, xmm2/m32 ; CF=0 + /// VCOMISS xmm1, xmm2/m32{sae} ; CF=0 /// - public static Vector128 CompareScalarNotGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static bool CompareScalarOrderedGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cmpnge_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(6) with swapped operands + /// int _mm_comilt_ss (__m128 a, __m128 b) + /// COMISS xmm1, xmm2/m32 ; PF=0 && CF=1 + /// VCOMISS xmm1, xmm2/m32 ; PF=0 && CF=1 + /// VCOMISS xmm1, xmm2/m32{sae} ; PF=0 && CF=1 /// - public static Vector128 CompareNotGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static bool CompareScalarOrderedLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cmpnge_ss (__m128 a, __m128 b) - /// CMPSS xmm, xmm/m32, imm8(6) with swapped operands + /// int _mm_comile_ss (__m128 a, __m128 b) + /// COMISS xmm1, xmm2/m32 ; PF=0 && (ZF=1 || CF=1) + /// VCOMISS xmm1, xmm2/m32 ; PF=0 && (ZF=1 || CF=1) + /// VCOMISS xmm1, xmm2/m32{sae} ; PF=0 && (ZF=1 || CF=1) /// - public static Vector128 CompareScalarNotGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static bool CompareScalarOrderedLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cmpnlt_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(5) + /// int _mm_comineq_ss (__m128 a, __m128 b) + /// COMISS xmm1, xmm2/m32 ; ZF=0 || PF=1 + /// VCOMISS xmm1, xmm2/m32 ; ZF=0 || PF=1 + /// VCOMISS xmm1, xmm2/m32{sae} ; ZF=0 || PF=1 /// - public static Vector128 CompareNotLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + public static bool CompareScalarOrderedNotEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cmpnlt_ss (__m128 a, __m128 b) - /// CMPSS xmm, xmm/m32, imm8(5) + /// __m128 _mm_cmpunord_ss (__m128 a, __m128 b) + /// CMPSS xmm1, xmm2/m32, imm8(3) + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(3) /// - public static Vector128 CompareScalarNotLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareScalarUnordered(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cmpnle_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(6) + /// int _mm_ucomieq_ss (__m128 a, __m128 b) + /// UCOMISS xmm1, xmm2/m32 ; ZF=1 && PF=0 + /// VUCOMISS xmm1, xmm2/m32 ; ZF=1 && PF=0 + /// VUCOMISS xmm1, xmm2/m32{sae} ; ZF=1 && PF=0 /// - public static Vector128 CompareNotLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static bool CompareScalarUnorderedEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cmpnle_ss (__m128 a, __m128 b) - /// CMPSS xmm, xmm/m32, imm8(6) + /// int _mm_ucomigt_ss (__m128 a, __m128 b) + /// UCOMISS xmm1, xmm2/m32 ; ZF=0 && CF=0 + /// VUCOMISS xmm1, xmm2/m32 ; ZF=0 && CF=0 + /// VUCOMISS xmm1, xmm2/m32{sae} ; ZF=0 && CF=0 /// - public static Vector128 CompareScalarNotLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static bool CompareScalarUnorderedGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cmpord_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(7) + /// int _mm_ucomige_ss (__m128 a, __m128 b) + /// UCOMISS xmm1, xmm2/m32 ; CF=0 + /// VUCOMISS xmm1, xmm2/m32 ; CF=0 + /// VUCOMISS xmm1, xmm2/m32{sae} ; CF=0 /// - public static Vector128 CompareOrdered(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static bool CompareScalarUnorderedGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cmpord_ss (__m128 a, __m128 b) - /// CMPSS xmm, xmm/m32, imm8(7) + /// int _mm_ucomilt_ss (__m128 a, __m128 b) + /// UCOMISS xmm1, xmm2/m32 ; PF=0 && CF=1 + /// VUCOMISS xmm1, xmm2/m32 ; PF=0 && CF=1 + /// VUCOMISS xmm1, xmm2/m32{sae} ; PF=0 && CF=1 /// - public static Vector128 CompareScalarOrdered(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static bool CompareScalarUnorderedLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cmpunord_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(3) + /// int _mm_ucomile_ss (__m128 a, __m128 b) + /// UCOMISS xmm1, xmm2/m32 ; PF=0 && (ZF=1 || CF=1) + /// VUCOMISS xmm1, xmm2/m32 ; PF=0 && (ZF=1 || CF=1) + /// VUCOMISS xmm1, xmm2/m32{sae} ; PF=0 && (ZF=1 || CF=1) /// - public static Vector128 CompareUnordered(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static bool CompareScalarUnorderedLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cmpunord_ss (__m128 a, __m128 b) - /// CMPSS xmm, xmm/m32, imm8(3) + /// int _mm_ucomineq_ss (__m128 a, __m128 b) + /// UCOMISS xmm1, xmm2/m32 ; ZF=0 || PF=1 + /// VUCOMISS xmm1, xmm2/m32 ; ZF=0 || PF=1 + /// VUCOMISS xmm1, xmm2/m32{sae} ; ZF=0 || PF=1 /// - public static Vector128 CompareScalarUnordered(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + public static bool CompareScalarUnorderedNotEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// int _mm_cvtss_si32 (__m128 a) - /// CVTSS2SI r32, xmm/m32 + /// __m128 _mm_cmpunord_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(3) + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(3) /// - public static int ConvertToInt32(Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 CompareUnordered(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_cvtsi32_ss (__m128 a, int b) - /// CVTSI2SS xmm, reg/m32 + /// CVTSI2SS xmm1, r/m32 + /// VCVTSI2SS xmm1, xmm2, r/m32 /// public static Vector128 ConvertScalarToVector128Single(Vector128 upper, int value) { throw new PlatformNotSupportedException(); } + /// + /// int _mm_cvtss_si32 (__m128 a) + /// CVTSS2SI r32, xmm1/m32 + /// VCVTSS2SI r32, xmm1/m32 + /// + public static int ConvertToInt32(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// int _mm_cvttss_si32 (__m128 a) - /// CVTTSS2SI r32, xmm/m32 + /// CVTTSS2SI r32, xmm1/m32 + /// VCVTTSS2SI r32, xmm1/m32 /// public static int ConvertToInt32WithTruncation(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_div_ps (__m128 a, __m128 b) - /// DIVPS xmm, xmm/m128 + /// DIVPS xmm, xmm2/m128 + /// VDIVPS xmm1, xmm2, xmm3/m128 + /// VDIVPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Divide(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_div_ss (__m128 a, __m128 b) - /// DIVSS xmm, xmm/m32 + /// DIVSS xmm1, xmm2/m32 + /// VDIVSS xmm1, xmm2, xmm3/m32 /// public static Vector128 DivideScalar(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// - /// __m128 _mm_loadu_ps (float const* mem_address) - /// MOVUPS xmm, m128 - /// - public static unsafe Vector128 LoadVector128(float* address) { throw new PlatformNotSupportedException(); } - - /// - /// __m128 _mm_load_ss (float const* mem_address) - /// MOVSS xmm, m32 - /// - public static unsafe Vector128 LoadScalarVector128(float* address) { throw new PlatformNotSupportedException(); } - /// /// __m128 _mm_load_ps (float const* mem_address) - /// MOVAPS xmm, m128 + /// MOVAPS xmm1, m128 + /// VMOVAPS xmm1, m128 + /// VMOVAPS xmm1 {k1}{z}, m128 /// public static unsafe Vector128 LoadAlignedVector128(float* address) { throw new PlatformNotSupportedException(); } - /// /// __m128 _mm_loadh_pi (__m128 a, __m64 const* mem_addr) - /// MOVHPS xmm, m64 + /// MOVHPS xmm1, m64 + /// VMOVHPS xmm1, xmm2, m64 /// public static unsafe Vector128 LoadHigh(Vector128 lower, float* address) { throw new PlatformNotSupportedException(); } - /// /// __m128 _mm_loadl_pi (__m128 a, __m64 const* mem_addr) - /// MOVLPS xmm, m64 + /// MOVLPS xmm1, m64 + /// VMOVLPS xmm1, xmm2, m64 /// public static unsafe Vector128 LoadLow(Vector128 upper, float* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128 _mm_load_ss (float const* mem_address) + /// MOVSS xmm1, m32 + /// VMOVSS xmm1, m32 + /// VMOVSS xmm1 {k1}, m32 + /// + public static unsafe Vector128 LoadScalarVector128(float* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128 _mm_loadu_ps (float const* mem_address) + /// MOVUPS xmm1, m128 + /// VMOVUPS xmm1, m128 + /// VMOVUPS xmm1 {k1}{z}, m128 + /// + public static unsafe Vector128 LoadVector128(float* address) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_max_ps (__m128 a, __m128 b) - /// MAXPS xmm, xmm/m128 + /// MAXPS xmm1, xmm2/m128 + /// VMAXPS xmm1, xmm2, xmm3/m128 + /// VMAXPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Max(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_max_ss (__m128 a, __m128 b) - /// MAXSS xmm, xmm/m32 + /// MAXSS xmm1, xmm2/m32 + /// VMAXSS xmm1, xmm2, xmm3/m32 /// public static Vector128 MaxScalar(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_min_ps (__m128 a, __m128 b) - /// MINPS xmm, xmm/m128 + /// MINPS xmm1, xmm2/m128 + /// VMINPS xmm1, xmm2, xmm3/m128 + /// VMINPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Min(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_min_ss (__m128 a, __m128 b) - /// MINSS xmm, xmm/m32 + /// MINSS xmm1, xmm2/m32 + /// VMINSS xmm1, xmm2, xmm3/m32 /// public static Vector128 MinScalar(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// - /// __m128 _mm_move_ss (__m128 a, __m128 b) - /// MOVSS xmm, xmm - /// - public static Vector128 MoveScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m128 _mm_movehl_ps (__m128 a, __m128 b) - /// MOVHLPS xmm, xmm + /// MOVHLPS xmm1, xmm2 + /// VMOVHLPS xmm1, xmm2, xmm3 /// public static Vector128 MoveHighToLow(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// /// __m128 _mm_movelh_ps (__m128 a, __m128 b) - /// MOVLHPS xmm, xmm + /// MOVLHPS xmm1, xmm2 + /// VMOVLHPS xmm1, xmm2, xmm3 /// public static Vector128 MoveLowToHigh(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// /// int _mm_movemask_ps (__m128 a) - /// MOVMSKPS reg, xmm + /// MOVMSKPS r32, xmm1 + /// VMOVMSKPS r32, xmm1 /// public static int MoveMask(Vector128 value) { throw new PlatformNotSupportedException(); } + /// + /// __m128 _mm_move_ss (__m128 a, __m128 b) + /// MOVSS xmm1, xmm2 + /// VMOVSS xmm1, xmm2, xmm3 + /// VMOVSS xmm1 {k1}{z}, xmm2, xmm3 + /// + public static Vector128 MoveScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_mul_ps (__m128 a, __m128 b) - /// MULPS xmm, xmm/m128 + /// MULPS xmm1, xmm2/m128 + /// VMULPS xmm1, xmm2, xmm3/m128 + /// VMULPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Multiply(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_mul_ss (__m128 a, __m128 b) - /// MULPS xmm, xmm/m32 + /// MULSS xmm1, xmm2/m32 + /// VMULSS xmm1, xmm2, xmm3/m32 /// public static Vector128 MultiplyScalar(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + /// + /// __m128 _mm_or_ps (__m128 a, __m128 b) + /// ORPS xmm1, xmm2/m128 + /// VORPS xmm1, xmm2, xmm3/m128 + /// VORPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst + /// + public static Vector128 Or(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + /// /// void _mm_prefetch(char* p, int i) /// PREFETCHT0 m8 /// public static unsafe void Prefetch0(void* address) { throw new PlatformNotSupportedException(); } - /// /// void _mm_prefetch(char* p, int i) /// PREFETCHT1 m8 /// public static unsafe void Prefetch1(void* address) { throw new PlatformNotSupportedException(); } - /// /// void _mm_prefetch(char* p, int i) /// PREFETCHT2 m8 /// public static unsafe void Prefetch2(void* address) { throw new PlatformNotSupportedException(); } - /// /// void _mm_prefetch(char* p, int i) /// PREFETCHNTA m8 /// public static unsafe void PrefetchNonTemporal(void* address) { throw new PlatformNotSupportedException(); } - /// - /// __m128 _mm_or_ps (__m128 a, __m128 b) - /// ORPS xmm, xmm/m128 - /// - public static Vector128 Or(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// /// __m128 _mm_rcp_ps (__m128 a) - /// RCPPS xmm, xmm/m128 + /// RCPPS xmm1, xmm2/m128 + /// VRCPPS xmm1, xmm2/m128 /// public static Vector128 Reciprocal(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_rcp_ss (__m128 a) - /// RCPSS xmm, xmm/m32 + /// RCPSS xmm1, xmm2/m32 + /// VRCPSS xmm1, xmm2, xmm3/m32 /// public static Vector128 ReciprocalScalar(Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m128 _mm_rcp_ss (__m128 a, __m128 b) - /// RCPSS xmm, xmm/m32 + /// RCPSS xmm1, xmm2/m32 + /// VRCPSS xmm1, xmm2, xmm3/m32 /// The above native signature does not exist. We provide this additional overload for consistency with the other scalar APIs. /// public static Vector128 ReciprocalScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_rsqrt_ps (__m128 a) - /// RSQRTPS xmm, xmm/m128 + /// RSQRTPS xmm1, xmm2/m128 + /// VRSQRTPS xmm1, xmm2/m128 /// public static Vector128 ReciprocalSqrt(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_rsqrt_ss (__m128 a) - /// RSQRTSS xmm, xmm/m32 + /// RSQRTSS xmm1, xmm2/m32 + /// VRSQRTSS xmm1, xmm2, xmm3/m32 /// public static Vector128 ReciprocalSqrtScalar(Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m128 _mm_rsqrt_ss (__m128 a, __m128 b) - /// RSQRTSS xmm, xmm/m32 + /// RSQRTSS xmm1, xmm2/m32 + /// VRSQRTSS xmm1, xmm2, xmm3/m32 /// The above native signature does not exist. We provide this additional overload for consistency with the other scalar APIs. /// public static Vector128 ReciprocalSqrtScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_shuffle_ps (__m128 a, __m128 b, unsigned int control) - /// SHUFPS xmm, xmm/m128, imm8 + /// SHUFPS xmm1, xmm2/m128, imm8 + /// VSHUFPS xmm1, xmm2, xmm3/m128, imm8 + /// VSHUFPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst, imm8 /// public static Vector128 Shuffle(Vector128 left, Vector128 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_sqrt_ps (__m128 a) - /// SQRTPS xmm, xmm/m128 + /// SQRTPS xmm1, xmm2/m128 + /// VSQRTPS xmm1, xmm2/m128 + /// VSQRTPS xmm1 {k1}{z}, xmm2/m128/m32bcst /// public static Vector128 Sqrt(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_sqrt_ss (__m128 a) - /// SQRTSS xmm, xmm/m32 + /// SQRTSS xmm1, xmm2/m32 + /// VSQRTSS xmm1, xmm2, xmm3/m32 + /// VSQRTSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} /// public static Vector128 SqrtScalar(Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m128 _mm_sqrt_ss (__m128 a, __m128 b) - /// SQRTSS xmm, xmm/m32 + /// SQRTSS xmm1, xmm2/m32 + /// VSQRTSS xmm1, xmm2, xmm3/m32 + /// VSQRTSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} /// The above native signature does not exist. We provide this additional overload for consistency with the other scalar APIs. /// public static Vector128 SqrtScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } + /// + /// void _mm_storeu_ps (float* mem_addr, __m128 a) + /// MOVUPS m128, xmm1 + /// VMOVUPS m128, xmm1 + /// VMOVUPS m128 {k1}{z}, xmm1 + /// + public static unsafe void Store(float* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_store_ps (float* mem_addr, __m128 a) - /// MOVAPS m128, xmm + /// MOVAPS m128, xmm1 + /// VMOVAPS m128, xmm1 + /// VMOVAPS m128 {k1}{z}, xmm1 /// public static unsafe void StoreAligned(float* address, Vector128 source) { throw new PlatformNotSupportedException(); } - /// /// void _mm_stream_ps (float* mem_addr, __m128 a) - /// MOVNTPS m128, xmm + /// MOVNTPS m128, xmm1 + /// VMOVNTPS m128, xmm1 /// public static unsafe void StoreAlignedNonTemporal(float* address, Vector128 source) { throw new PlatformNotSupportedException(); } - - /// - /// void _mm_storeu_ps (float* mem_addr, __m128 a) - /// MOVUPS m128, xmm - /// - public static unsafe void Store(float* address, Vector128 source) { throw new PlatformNotSupportedException(); } - /// /// void _mm_sfence(void) /// SFENCE /// public static void StoreFence() { throw new PlatformNotSupportedException(); } - - /// - /// void _mm_store_ss (float* mem_addr, __m128 a) - /// MOVSS m32, xmm - /// - public static unsafe void StoreScalar(float* address, Vector128 source) { throw new PlatformNotSupportedException(); } - /// /// void _mm_storeh_pi (__m64* mem_addr, __m128 a) - /// MOVHPS m64, xmm + /// MOVHPS m64, xmm1 + /// VMOVHPS m64, xmm1 /// public static unsafe void StoreHigh(float* address, Vector128 source) { throw new PlatformNotSupportedException(); } - /// /// void _mm_storel_pi (__m64* mem_addr, __m128 a) - /// MOVLPS m64, xmm + /// MOVLPS m64, xmm1 + /// VMOVLPS m64, xmm1 /// public static unsafe void StoreLow(float* address, Vector128 source) { throw new PlatformNotSupportedException(); } + /// + /// void _mm_store_ss (float* mem_addr, __m128 a) + /// MOVSS m32, xmm1 + /// VMOVSS m32, xmm1 + /// VMOVSS m32 {k1}, xmm1 + /// + public static unsafe void StoreScalar(float* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_sub_ps (__m128d a, __m128d b) - /// SUBPS xmm, xmm/m128 + /// SUBPS xmm1, xmm2/m128 + /// VSUBPS xmm1, xmm2, xmm3/m128 + /// VSUBPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Subtract(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_sub_ss (__m128 a, __m128 b) - /// SUBSS xmm, xmm/m32 + /// SUBSS xmm1, xmm2/m32 + /// VSUBSS xmm1, xmm2, xmm3/m32 + /// VSUBSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} /// public static Vector128 SubtractScalar(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_unpackhi_ps (__m128 a, __m128 b) - /// UNPCKHPS xmm, xmm/m128 + /// UNPCKHPS xmm1, xmm2/m128 + /// VUNPCKHPS xmm1, xmm2, xmm3/m128 + /// VUNPCKHPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 UnpackHigh(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_unpacklo_ps (__m128 a, __m128 b) - /// UNPCKLPS xmm, xmm/m128 + /// UNPCKLPS xmm1, xmm2/m128 + /// VUNPCKLPS xmm1, xmm2, xmm3/m128 + /// VUNPCKLPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 UnpackLow(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_xor_ps (__m128 a, __m128 b) - /// XORPS xmm, xmm/m128 + /// XORPS xmm1, xmm2/m128 + /// VXORPS xmm1, xmm2, xmm3/m128 + /// VXORPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Xor(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse.cs index 3ca759b1472551..67bd57161ab2f9 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse.cs @@ -24,22 +24,25 @@ internal X64() { } public static new bool IsSupported { get => IsSupported; } - /// - /// __int64 _mm_cvtss_si64 (__m128 a) - /// CVTSS2SI r64, xmm/m32 - /// This intrinsic is only available on 64-bit processes - /// - public static long ConvertToInt64(Vector128 value) => ConvertToInt64(value); /// /// __m128 _mm_cvtsi64_ss (__m128 a, __int64 b) - /// CVTSI2SS xmm, reg/m64 + /// CVTSI2SS xmm1, r/m64 + /// VCVTSI2SS xmm1, xmm2, r/m64 /// This intrinsic is only available on 64-bit processes /// public static Vector128 ConvertScalarToVector128Single(Vector128 upper, long value) => ConvertScalarToVector128Single(upper, value); + /// + /// __int64 _mm_cvtss_si64 (__m128 a) + /// CVTSS2SI r64, xmm1/m32 + /// VCVTSS2SI r64, xmm1/m32 + /// This intrinsic is only available on 64-bit processes + /// + public static long ConvertToInt64(Vector128 value) => ConvertToInt64(value); /// /// __int64 _mm_cvttss_si64 (__m128 a) - /// CVTTSS2SI r64, xmm/m32 + /// CVTTSS2SI r64, xmm1/m32 + /// VCVTTSS2SI r64, xmm1/m32 /// This intrinsic is only available on 64-bit processes /// public static long ConvertToInt64WithTruncation(Vector128 value) => ConvertToInt64WithTruncation(value); @@ -47,367 +50,414 @@ internal X64() { } /// /// __m128 _mm_add_ps (__m128 a, __m128 b) - /// ADDPS xmm, xmm/m128 + /// ADDPS xmm1, xmm2/m128 + /// VADDPS xmm1, xmm2, xmm3/m128 + /// VADDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Add(Vector128 left, Vector128 right) => Add(left, right); /// /// __m128 _mm_add_ss (__m128 a, __m128 b) - /// ADDSS xmm, xmm/m32 + /// ADDSS xmm1, xmm2/m32 + /// VADDSS xmm1, xmm2, xmm3/m32 + /// VADDSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} /// public static Vector128 AddScalar(Vector128 left, Vector128 right) => AddScalar(left, right); /// /// __m128 _mm_and_ps (__m128 a, __m128 b) - /// ANDPS xmm, xmm/m128 + /// ANDPS xmm1, xmm2/m128 + /// VANDPS xmm1, xmm2, xmm3/m128 + /// VANDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 And(Vector128 left, Vector128 right) => And(left, right); /// /// __m128 _mm_andnot_ps (__m128 a, __m128 b) - /// ANDNPS xmm, xmm/m128 + /// ANDNPS xmm1, xmm2/m128 + /// VANDNPS xmm1, xmm2, xmm3/m128 + /// VANDNPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 AndNot(Vector128 left, Vector128 right) => AndNot(left, right); /// /// __m128 _mm_cmpeq_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(0) + /// CMPPS xmm1, xmm2/m128, imm8(0) + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(0) /// public static Vector128 CompareEqual(Vector128 left, Vector128 right) => CompareEqual(left, right); - - /// - /// int _mm_comieq_ss (__m128 a, __m128 b) - /// COMISS xmm, xmm/m32 - /// - public static bool CompareScalarOrderedEqual(Vector128 left, Vector128 right) => CompareScalarOrderedEqual(left, right); - /// - /// int _mm_ucomieq_ss (__m128 a, __m128 b) - /// UCOMISS xmm, xmm/m32 + /// __m128 _mm_cmpgt_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(1) ; with swapped operands + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(1) ; with swapped operands /// - public static bool CompareScalarUnorderedEqual(Vector128 left, Vector128 right) => CompareScalarUnorderedEqual(left, right); - + public static Vector128 CompareGreaterThan(Vector128 left, Vector128 right) => CompareGreaterThan(left, right); /// - /// __m128 _mm_cmpeq_ss (__m128 a, __m128 b) - /// CMPSS xmm, xmm/m32, imm8(0) + /// __m128 _mm_cmpge_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(2) ; with swapped operands + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(2) ; with swapped operands /// - public static Vector128 CompareScalarEqual(Vector128 left, Vector128 right) => CompareScalarEqual(left, right); - + public static Vector128 CompareGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareGreaterThanOrEqual(left, right); /// - /// __m128 _mm_cmpgt_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(1) with swapped operands + /// __m128 _mm_cmplt_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(1) + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(1) /// - public static Vector128 CompareGreaterThan(Vector128 left, Vector128 right) => CompareGreaterThan(left, right); - + public static Vector128 CompareLessThan(Vector128 left, Vector128 right) => CompareLessThan(left, right); /// - /// int _mm_comigt_ss (__m128 a, __m128 b) - /// COMISS xmm, xmm/m32 + /// __m128 _mm_cmple_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(2) + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(2) /// - public static bool CompareScalarOrderedGreaterThan(Vector128 left, Vector128 right) => CompareScalarOrderedGreaterThan(left, right); - + public static Vector128 CompareLessThanOrEqual(Vector128 left, Vector128 right) => CompareLessThanOrEqual(left, right); /// - /// int _mm_ucomigt_ss (__m128 a, __m128 b) - /// UCOMISS xmm, xmm/m32 + /// __m128 _mm_cmpneq_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(4) + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(4) /// - public static bool CompareScalarUnorderedGreaterThan(Vector128 left, Vector128 right) => CompareScalarUnorderedGreaterThan(left, right); - + public static Vector128 CompareNotEqual(Vector128 left, Vector128 right) => CompareNotEqual(left, right); /// - /// __m128 _mm_cmpgt_ss (__m128 a, __m128 b) - /// CMPSS xmm, xmm/m32, imm8(1) with swapped operands + /// __m128 _mm_cmpngt_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(5) ; with swapped operands + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(5) ; with swapped operands /// - public static Vector128 CompareScalarGreaterThan(Vector128 left, Vector128 right) => CompareScalarGreaterThan(left, right); - + public static Vector128 CompareNotGreaterThan(Vector128 left, Vector128 right) => CompareNotGreaterThan(left, right); /// - /// __m128 _mm_cmpge_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(2) with swapped operands + /// __m128 _mm_cmpnge_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(6) ; with swapped operands + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(6) ; with swapped operands /// - public static Vector128 CompareGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareGreaterThanOrEqual(left, right); - + public static Vector128 CompareNotGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareNotGreaterThanOrEqual(left, right); /// - /// int _mm_comige_ss (__m128 a, __m128 b) - /// COMISS xmm, xmm/m32 + /// __m128 _mm_cmpnlt_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(5) + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(5) /// - public static bool CompareScalarOrderedGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareScalarOrderedGreaterThanOrEqual(left, right); - + public static Vector128 CompareNotLessThan(Vector128 left, Vector128 right) => CompareNotLessThan(left, right); /// - /// int _mm_ucomige_ss (__m128 a, __m128 b) - /// UCOMISS xmm, xmm/m32 + /// __m128 _mm_cmpnle_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(6) + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(6) /// - public static bool CompareScalarUnorderedGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareScalarUnorderedGreaterThanOrEqual(left, right); - + public static Vector128 CompareNotLessThanOrEqual(Vector128 left, Vector128 right) => CompareNotLessThanOrEqual(left, right); /// - /// __m128 _mm_cmpge_ss (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m32, imm8(2) with swapped operands + /// __m128 _mm_cmpord_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(7) + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(7) /// - public static Vector128 CompareScalarGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareScalarGreaterThanOrEqual(left, right); + public static Vector128 CompareOrdered(Vector128 left, Vector128 right) => CompareOrdered(left, right); /// - /// __m128 _mm_cmplt_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(1) + /// __m128 _mm_cmpeq_ss (__m128 a, __m128 b) + /// CMPSS xmm1, xmm2/m32, imm8(0) + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(0) /// - public static Vector128 CompareLessThan(Vector128 left, Vector128 right) => CompareLessThan(left, right); - + public static Vector128 CompareScalarEqual(Vector128 left, Vector128 right) => CompareScalarEqual(left, right); /// - /// int _mm_comilt_ss (__m128 a, __m128 b) - /// COMISS xmm, xmm/m32 + /// __m128 _mm_cmpgt_ss (__m128 a, __m128 b) + /// CMPSS xmm1, xmm2/m32, imm8(1) ; with swapped operands + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(1) ; with swapped operands /// - public static bool CompareScalarOrderedLessThan(Vector128 left, Vector128 right) => CompareScalarOrderedLessThan(left, right); - + public static Vector128 CompareScalarGreaterThan(Vector128 left, Vector128 right) => CompareScalarGreaterThan(left, right); /// - /// int _mm_ucomilt_ss (__m128 a, __m128 b) - /// UCOMISS xmm, xmm/m32 + /// __m128 _mm_cmpge_ss (__m128 a, __m128 b) + /// CMPSS xmm1, xmm2/m32, imm8(2) ; with swapped operands + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(2) ; with swapped operands /// - public static bool CompareScalarUnorderedLessThan(Vector128 left, Vector128 right) => CompareScalarUnorderedLessThan(left, right); - + public static Vector128 CompareScalarGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareScalarGreaterThanOrEqual(left, right); /// /// __m128 _mm_cmplt_ss (__m128 a, __m128 b) - /// CMPSS xmm, xmm/m32, imm8(1) + /// CMPSS xmm1, xmm2/m32, imm8(1) + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(1) /// public static Vector128 CompareScalarLessThan(Vector128 left, Vector128 right) => CompareScalarLessThan(left, right); - /// - /// __m128 _mm_cmple_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(2) + /// __m128 _mm_cmple_ss (__m128 a, __m128 b) + /// CMPSS xmm1, xmm2/m32, imm8(2) + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(2) /// - public static Vector128 CompareLessThanOrEqual(Vector128 left, Vector128 right) => CompareLessThanOrEqual(left, right); - + public static Vector128 CompareScalarLessThanOrEqual(Vector128 left, Vector128 right) => CompareScalarLessThanOrEqual(left, right); /// - /// int _mm_comile_ss (__m128 a, __m128 b) - /// COMISS xmm, xmm/m32 + /// __m128 _mm_cmpneq_ss (__m128 a, __m128 b) + /// CMPSS xmm1, xmm2/m32, imm8(4) + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(4) /// - public static bool CompareScalarOrderedLessThanOrEqual(Vector128 left, Vector128 right) => CompareScalarOrderedLessThanOrEqual(left, right); - + public static Vector128 CompareScalarNotEqual(Vector128 left, Vector128 right) => CompareScalarNotEqual(left, right); /// - /// int _mm_ucomile_ss (__m128 a, __m128 b) - /// UCOMISS xmm, xmm/m32 + /// __m128 _mm_cmpngt_ss (__m128 a, __m128 b) + /// CMPSS xmm1, xmm2/m32, imm8(5) ; with swapped operands + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(5) ; with swapped operands /// - public static bool CompareScalarUnorderedLessThanOrEqual(Vector128 left, Vector128 right) => CompareScalarUnorderedLessThanOrEqual(left, right); - + public static Vector128 CompareScalarNotGreaterThan(Vector128 left, Vector128 right) => CompareScalarNotGreaterThan(left, right); /// - /// __m128 _mm_cmple_ss (__m128 a, __m128 b) - /// CMPSS xmm, xmm/m32, imm8(2) + /// __m128 _mm_cmpnge_ss (__m128 a, __m128 b) + /// CMPSS xmm1, xmm2/m32, imm8(6) ; with swapped operands + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(6) ; with swapped operands /// - public static Vector128 CompareScalarLessThanOrEqual(Vector128 left, Vector128 right) => CompareScalarLessThanOrEqual(left, right); - + public static Vector128 CompareScalarNotGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareScalarNotGreaterThanOrEqual(left, right); /// - /// __m128 _mm_cmpneq_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(4) + /// __m128 _mm_cmpnlt_ss (__m128 a, __m128 b) + /// CMPSS xmm1, xmm2/m32, imm8(5) + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(5) /// - public static Vector128 CompareNotEqual(Vector128 left, Vector128 right) => CompareNotEqual(left, right); - + public static Vector128 CompareScalarNotLessThan(Vector128 left, Vector128 right) => CompareScalarNotLessThan(left, right); /// - /// int _mm_comineq_ss (__m128 a, __m128 b) - /// COMISS xmm, xmm/m32 + /// __m128 _mm_cmpnle_ss (__m128 a, __m128 b) + /// CMPSS xmm1, xmm2/m32, imm8(6) + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(6) /// - public static bool CompareScalarOrderedNotEqual(Vector128 left, Vector128 right) => CompareScalarOrderedNotEqual(left, right); + public static Vector128 CompareScalarNotLessThanOrEqual(Vector128 left, Vector128 right) => CompareScalarNotLessThanOrEqual(left, right); /// - /// int _mm_ucomineq_ss (__m128 a, __m128 b) - /// UCOMISS xmm, xmm/m32 + /// __m128 _mm_cmpord_ss (__m128 a, __m128 b) + /// CMPSS xmm1, xmm2/m32, imm8(7) + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(7) /// - public static bool CompareScalarUnorderedNotEqual(Vector128 left, Vector128 right) => CompareScalarUnorderedNotEqual(left, right); - + public static Vector128 CompareScalarOrdered(Vector128 left, Vector128 right) => CompareScalarOrdered(left, right); /// - /// __m128 _mm_cmpneq_ss (__m128 a, __m128 b) - /// CMPSS xmm, xmm/m32, imm8(4) + /// int _mm_comieq_ss (__m128 a, __m128 b) + /// COMISS xmm1, xmm2/m32 ; ZF=1 && PF=0 + /// VCOMISS xmm1, xmm2/m32 ; ZF=1 && PF=0 + /// VCOMISS xmm1, xmm2/m32{sae} ; ZF=1 && PF=0 /// - public static Vector128 CompareScalarNotEqual(Vector128 left, Vector128 right) => CompareScalarNotEqual(left, right); - + public static bool CompareScalarOrderedEqual(Vector128 left, Vector128 right) => CompareScalarOrderedEqual(left, right); /// - /// __m128 _mm_cmpngt_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(5) with swapped operands + /// int _mm_comigt_ss (__m128 a, __m128 b) + /// COMISS xmm1, xmm2/m32 ; ZF=0 && CF=0 + /// VCOMISS xmm1, xmm2/m32 ; ZF=0 && CF=0 + /// VCOMISS xmm1, xmm2/m32{sae} ; ZF=0 && CF=0 /// - public static Vector128 CompareNotGreaterThan(Vector128 left, Vector128 right) => CompareNotGreaterThan(left, right); - + public static bool CompareScalarOrderedGreaterThan(Vector128 left, Vector128 right) => CompareScalarOrderedGreaterThan(left, right); /// - /// __m128 _mm_cmpngt_ss (__m128 a, __m128 b) - /// CMPSS xmm, xmm/m32, imm8(5) with swapped operands + /// int _mm_comige_ss (__m128 a, __m128 b) + /// COMISS xmm1, xmm2/m32 ; CF=0 + /// VCOMISS xmm1, xmm2/m32 ; CF=0 + /// VCOMISS xmm1, xmm2/m32{sae} ; CF=0 /// - public static Vector128 CompareScalarNotGreaterThan(Vector128 left, Vector128 right) => CompareScalarNotGreaterThan(left, right); - + public static bool CompareScalarOrderedGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareScalarOrderedGreaterThanOrEqual(left, right); /// - /// __m128 _mm_cmpnge_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(6) with swapped operands + /// int _mm_comilt_ss (__m128 a, __m128 b) + /// COMISS xmm1, xmm2/m32 ; PF=0 && CF=1 + /// VCOMISS xmm1, xmm2/m32 ; PF=0 && CF=1 + /// VCOMISS xmm1, xmm2/m32{sae} ; PF=0 && CF=1 /// - public static Vector128 CompareNotGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareNotGreaterThanOrEqual(left, right); - + public static bool CompareScalarOrderedLessThan(Vector128 left, Vector128 right) => CompareScalarOrderedLessThan(left, right); /// - /// __m128 _mm_cmpnge_ss (__m128 a, __m128 b) - /// CMPSS xmm, xmm/m32, imm8(6) with swapped operands + /// int _mm_comile_ss (__m128 a, __m128 b) + /// COMISS xmm1, xmm2/m32 ; PF=0 && (ZF=1 || CF=1) + /// VCOMISS xmm1, xmm2/m32 ; PF=0 && (ZF=1 || CF=1) + /// VCOMISS xmm1, xmm2/m32{sae} ; PF=0 && (ZF=1 || CF=1) /// - public static Vector128 CompareScalarNotGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareScalarNotGreaterThanOrEqual(left, right); - + public static bool CompareScalarOrderedLessThanOrEqual(Vector128 left, Vector128 right) => CompareScalarOrderedLessThanOrEqual(left, right); /// - /// __m128 _mm_cmpnlt_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(5) + /// int _mm_comineq_ss (__m128 a, __m128 b) + /// COMISS xmm1, xmm2/m32 ; ZF=0 || PF=1 + /// VCOMISS xmm1, xmm2/m32 ; ZF=0 || PF=1 + /// VCOMISS xmm1, xmm2/m32{sae} ; ZF=0 || PF=1 /// - public static Vector128 CompareNotLessThan(Vector128 left, Vector128 right) => CompareNotLessThan(left, right); + public static bool CompareScalarOrderedNotEqual(Vector128 left, Vector128 right) => CompareScalarOrderedNotEqual(left, right); /// - /// __m128 _mm_cmpnlt_ss (__m128 a, __m128 b) - /// CMPSS xmm, xmm/m32, imm8(5) + /// __m128 _mm_cmpunord_ss (__m128 a, __m128 b) + /// CMPSS xmm1, xmm2/m32, imm8(3) + /// VCMPSS xmm1, xmm2, xmm3/m32, imm8(3) /// - public static Vector128 CompareScalarNotLessThan(Vector128 left, Vector128 right) => CompareScalarNotLessThan(left, right); - + public static Vector128 CompareScalarUnordered(Vector128 left, Vector128 right) => CompareScalarUnordered(left, right); /// - /// __m128 _mm_cmpnle_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(6) + /// int _mm_ucomieq_ss (__m128 a, __m128 b) + /// UCOMISS xmm1, xmm2/m32 ; ZF=1 && PF=0 + /// VUCOMISS xmm1, xmm2/m32 ; ZF=1 && PF=0 + /// VUCOMISS xmm1, xmm2/m32{sae} ; ZF=1 && PF=0 /// - public static Vector128 CompareNotLessThanOrEqual(Vector128 left, Vector128 right) => CompareNotLessThanOrEqual(left, right); - + public static bool CompareScalarUnorderedEqual(Vector128 left, Vector128 right) => CompareScalarUnorderedEqual(left, right); /// - /// __m128 _mm_cmpnle_ss (__m128 a, __m128 b) - /// CMPSS xmm, xmm/m32, imm8(6) + /// int _mm_ucomigt_ss (__m128 a, __m128 b) + /// UCOMISS xmm1, xmm2/m32 ; ZF=0 && CF=0 + /// VUCOMISS xmm1, xmm2/m32 ; ZF=0 && CF=0 + /// VUCOMISS xmm1, xmm2/m32{sae} ; ZF=0 && CF=0 /// - public static Vector128 CompareScalarNotLessThanOrEqual(Vector128 left, Vector128 right) => CompareScalarNotLessThanOrEqual(left, right); - + public static bool CompareScalarUnorderedGreaterThan(Vector128 left, Vector128 right) => CompareScalarUnorderedGreaterThan(left, right); /// - /// __m128 _mm_cmpord_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(7) + /// int _mm_ucomige_ss (__m128 a, __m128 b) + /// UCOMISS xmm1, xmm2/m32 ; CF=0 + /// VUCOMISS xmm1, xmm2/m32 ; CF=0 + /// VUCOMISS xmm1, xmm2/m32{sae} ; CF=0 /// - public static Vector128 CompareOrdered(Vector128 left, Vector128 right) => CompareOrdered(left, right); - + public static bool CompareScalarUnorderedGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareScalarUnorderedGreaterThanOrEqual(left, right); /// - /// __m128 _mm_cmpord_ss (__m128 a, __m128 b) - /// CMPSS xmm, xmm/m32, imm8(7) + /// int _mm_ucomilt_ss (__m128 a, __m128 b) + /// UCOMISS xmm1, xmm2/m32 ; PF=0 && CF=1 + /// VUCOMISS xmm1, xmm2/m32 ; PF=0 && CF=1 + /// VUCOMISS xmm1, xmm2/m32{sae} ; PF=0 && CF=1 /// - public static Vector128 CompareScalarOrdered(Vector128 left, Vector128 right) => CompareScalarOrdered(left, right); - + public static bool CompareScalarUnorderedLessThan(Vector128 left, Vector128 right) => CompareScalarUnorderedLessThan(left, right); /// - /// __m128 _mm_cmpunord_ps (__m128 a, __m128 b) - /// CMPPS xmm, xmm/m128, imm8(3) + /// int _mm_ucomile_ss (__m128 a, __m128 b) + /// UCOMISS xmm1, xmm2/m32 ; PF=0 && (ZF=1 || CF=1) + /// VUCOMISS xmm1, xmm2/m32 ; PF=0 && (ZF=1 || CF=1) + /// VUCOMISS xmm1, xmm2/m32{sae} ; PF=0 && (ZF=1 || CF=1) /// - public static Vector128 CompareUnordered(Vector128 left, Vector128 right) => CompareUnordered(left, right); - + public static bool CompareScalarUnorderedLessThanOrEqual(Vector128 left, Vector128 right) => CompareScalarUnorderedLessThanOrEqual(left, right); /// - /// __m128 _mm_cmpunord_ss (__m128 a, __m128 b) - /// CMPSS xmm, xmm/m32, imm8(3) + /// int _mm_ucomineq_ss (__m128 a, __m128 b) + /// UCOMISS xmm1, xmm2/m32 ; ZF=0 || PF=1 + /// VUCOMISS xmm1, xmm2/m32 ; ZF=0 || PF=1 + /// VUCOMISS xmm1, xmm2/m32{sae} ; ZF=0 || PF=1 /// - public static Vector128 CompareScalarUnordered(Vector128 left, Vector128 right) => CompareScalarUnordered(left, right); + public static bool CompareScalarUnorderedNotEqual(Vector128 left, Vector128 right) => CompareScalarUnorderedNotEqual(left, right); /// - /// int _mm_cvtss_si32 (__m128 a) - /// CVTSS2SI r32, xmm/m32 + /// __m128 _mm_cmpunord_ps (__m128 a, __m128 b) + /// CMPPS xmm1, xmm2/m128, imm8(3) + /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(3) /// - public static int ConvertToInt32(Vector128 value) => ConvertToInt32(value); + public static Vector128 CompareUnordered(Vector128 left, Vector128 right) => CompareUnordered(left, right); /// /// __m128 _mm_cvtsi32_ss (__m128 a, int b) - /// CVTSI2SS xmm, reg/m32 + /// CVTSI2SS xmm1, r/m32 + /// VCVTSI2SS xmm1, xmm2, r/m32 /// public static Vector128 ConvertScalarToVector128Single(Vector128 upper, int value) => ConvertScalarToVector128Single(upper, value); + /// + /// int _mm_cvtss_si32 (__m128 a) + /// CVTSS2SI r32, xmm1/m32 + /// VCVTSS2SI r32, xmm1/m32 + /// + public static int ConvertToInt32(Vector128 value) => ConvertToInt32(value); /// /// int _mm_cvttss_si32 (__m128 a) - /// CVTTSS2SI r32, xmm/m32 + /// CVTTSS2SI r32, xmm1/m32 + /// VCVTTSS2SI r32, xmm1/m32 /// public static int ConvertToInt32WithTruncation(Vector128 value) => ConvertToInt32WithTruncation(value); /// /// __m128 _mm_div_ps (__m128 a, __m128 b) - /// DIVPS xmm, xmm/m128 + /// DIVPS xmm, xmm2/m128 + /// VDIVPS xmm1, xmm2, xmm3/m128 + /// VDIVPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Divide(Vector128 left, Vector128 right) => Divide(left, right); /// /// __m128 _mm_div_ss (__m128 a, __m128 b) - /// DIVSS xmm, xmm/m32 + /// DIVSs xmm1, xmm2/m32 + /// VDIVSs xmm1, xmm2, xmm3/m32 /// public static Vector128 DivideScalar(Vector128 left, Vector128 right) => DivideScalar(left, right); - /// - /// __m128 _mm_loadu_ps (float const* mem_address) - /// MOVUPS xmm, m128 - /// - public static unsafe Vector128 LoadVector128(float* address) => LoadVector128(address); - - /// - /// __m128 _mm_load_ss (float const* mem_address) - /// MOVSS xmm, m32 - /// - public static unsafe Vector128 LoadScalarVector128(float* address) => LoadScalarVector128(address); - /// /// __m128 _mm_load_ps (float const* mem_address) - /// MOVAPS xmm, m128 + /// MOVAPS xmm1, m128 + /// VMOVAPS xmm1, m128 + /// VMOVAPS xmm1 {k1}{z}, m128 /// public static unsafe Vector128 LoadAlignedVector128(float* address) => LoadAlignedVector128(address); - /// /// __m128 _mm_loadh_pi (__m128 a, __m64 const* mem_addr) - /// MOVHPS xmm, m64 + /// MOVHPS xmm1, m64 + /// VMOVHPS xmm1, xmm2, m64 /// public static unsafe Vector128 LoadHigh(Vector128 lower, float* address) => LoadHigh(lower, address); - /// /// __m128 _mm_loadl_pi (__m128 a, __m64 const* mem_addr) - /// MOVLPS xmm, m64 + /// MOVLPS xmm1, m64 + /// VMOVLPS xmm1, xmm2, m64 /// public static unsafe Vector128 LoadLow(Vector128 upper, float* address) => LoadLow(upper, address); + /// + /// __m128 _mm_load_ss (float const* mem_address) + /// MOVSS xmm1, m32 + /// VMOVSS xmm1, m32 + /// VMOVSS xmm1 {k1}, m32 + /// + public static unsafe Vector128 LoadScalarVector128(float* address) => LoadScalarVector128(address); + /// + /// __m128 _mm_loadu_ps (float const* mem_address) + /// MOVUPS xmm1, m128 + /// VMOVUPS xmm1, m128 + /// VMOVUPS xmm1 {k1}{z}, m128 + /// + public static unsafe Vector128 LoadVector128(float* address) => LoadVector128(address); /// /// __m128 _mm_max_ps (__m128 a, __m128 b) - /// MAXPS xmm, xmm/m128 + /// MAXPS xmm1, xmm2/m128 + /// VMAXPS xmm1, xmm2, xmm3/m128 + /// VMAXPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Max(Vector128 left, Vector128 right) => Max(left, right); /// /// __m128 _mm_max_ss (__m128 a, __m128 b) - /// MAXSS xmm, xmm/m32 + /// MAXSS xmm1, xmm2/m32 + /// VMAXSS xmm1, xmm2, xmm3/m32 /// public static Vector128 MaxScalar(Vector128 left, Vector128 right) => MaxScalar(left, right); /// /// __m128 _mm_min_ps (__m128 a, __m128 b) - /// MINPS xmm, xmm/m128 + /// MINPS xmm1, xmm2/m128 + /// VMINPS xmm1, xmm2, xmm3/m128 + /// VMINPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Min(Vector128 left, Vector128 right) => Min(left, right); /// /// __m128 _mm_min_ss (__m128 a, __m128 b) - /// MINSS xmm, xmm/m32 + /// MINSS xmm1, xmm2/m32 + /// VMINSS xmm1, xmm2, xmm3/m32 /// public static Vector128 MinScalar(Vector128 left, Vector128 right) => MinScalar(left, right); - /// - /// __m128 _mm_move_ss (__m128 a, __m128 b) - /// MOVSS xmm, xmm - /// - public static Vector128 MoveScalar(Vector128 upper, Vector128 value) => MoveScalar(upper, value); - /// /// __m128 _mm_movehl_ps (__m128 a, __m128 b) - /// MOVHLPS xmm, xmm + /// MOVHLPS xmm1, xmm2 + /// VMOVHLPS xmm1, xmm2, xmm3 /// public static Vector128 MoveHighToLow(Vector128 left, Vector128 right) => MoveHighToLow(left, right); - /// /// __m128 _mm_movelh_ps (__m128 a, __m128 b) - /// MOVLHPS xmm, xmm + /// MOVLHPS xmm1, xmm2 + /// VMOVLHPS xmm1, xmm2, xmm3 /// public static Vector128 MoveLowToHigh(Vector128 left, Vector128 right) => MoveLowToHigh(left, right); - /// /// int _mm_movemask_ps (__m128 a) - /// MOVMSKPS reg, xmm + /// MOVMSKPS r32, xmm1 + /// VMOVMSKPS r32, xmm1 /// public static int MoveMask(Vector128 value) => MoveMask(value); + /// + /// __m128 _mm_move_ss (__m128 a, __m128 b) + /// MOVSS xmm1, xmm2 + /// VMOVSS xmm1, xmm2, xmm3 + /// VMOVSS xmm1 {k1}{z}, xmm2, xmm3 + /// + public static Vector128 MoveScalar(Vector128 upper, Vector128 value) => MoveScalar(upper, value); /// /// __m128 _mm_mul_ps (__m128 a, __m128 b) - /// MULPS xmm, xmm/m128 + /// MULPS xmm1, xmm2/m128 + /// VMULPS xmm1, xmm2, xmm3/m128 + /// VMULPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Multiply(Vector128 left, Vector128 right) => Multiply(left, right); /// /// __m128 _mm_mul_ss (__m128 a, __m128 b) - /// MULPS xmm, xmm/m32 + /// MULSS xmm1, xmm2/m32 + /// VMULSS xmm1, xmm2, xmm3/m32 /// public static Vector128 MultiplyScalar(Vector128 left, Vector128 right) => MultiplyScalar(left, right); /// /// __m128 _mm_or_ps (__m128 a, __m128 b) - /// ORPS xmm, xmm/m128 + /// ORPS xmm1, xmm2/m128 + /// VORPS xmm1, xmm2, xmm3/m128 + /// VORPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Or(Vector128 left, Vector128 right) => Or(left, right); @@ -416,19 +466,16 @@ internal X64() { } /// PREFETCHT0 m8 /// public static unsafe void Prefetch0(void* address) => Prefetch0(address); - /// /// void _mm_prefetch(char* p, int i) /// PREFETCHT1 m8 /// public static unsafe void Prefetch1(void* address) => Prefetch1(address); - /// /// void _mm_prefetch(char* p, int i) /// PREFETCHT2 m8 /// public static unsafe void Prefetch2(void* address) => Prefetch2(address); - /// /// void _mm_prefetch(char* p, int i) /// PREFETCHNTA m8 @@ -437,136 +484,160 @@ internal X64() { } /// /// __m128 _mm_rcp_ps (__m128 a) - /// RCPPS xmm, xmm/m128 + /// RCPPS xmm1, xmm2/m128 + /// VRCPPS xmm1, xmm2/m128 /// public static Vector128 Reciprocal(Vector128 value) => Reciprocal(value); /// /// __m128 _mm_rcp_ss (__m128 a) - /// RCPSS xmm, xmm/m32 + /// RCPSS xmm1, xmm2/m32 + /// VRCPSS xmm1, xmm2, xmm3/m32 /// public static Vector128 ReciprocalScalar(Vector128 value) => ReciprocalScalar(value); - /// /// __m128 _mm_rcp_ss (__m128 a, __m128 b) - /// RCPSS xmm, xmm/m32 + /// RCPSS xmm1, xmm2/m32 + /// VRCPSS xmm1, xmm2, xmm3/m32 /// The above native signature does not exist. We provide this additional overload for consistency with the other scalar APIs. /// public static Vector128 ReciprocalScalar(Vector128 upper, Vector128 value) => ReciprocalScalar(upper, value); /// /// __m128 _mm_rsqrt_ps (__m128 a) - /// RSQRTPS xmm, xmm/m128 + /// RSQRTPS xmm1, xmm2/m128 + /// VRSQRTPS xmm1, xmm2/m128 /// public static Vector128 ReciprocalSqrt(Vector128 value) => ReciprocalSqrt(value); /// /// __m128 _mm_rsqrt_ss (__m128 a) - /// RSQRTSS xmm, xmm/m32 + /// RSQRTSS xmm1, xmm2/m32 + /// VRSQRTSS xmm1, xmm2, xmm3/m32 /// public static Vector128 ReciprocalSqrtScalar(Vector128 value) => ReciprocalSqrtScalar(value); - /// /// __m128 _mm_rsqrt_ss (__m128 a, __m128 b) - /// RSQRTSS xmm, xmm/m32 + /// RSQRTSS xmm1, xmm2/m32 + /// VRSQRTSS xmm1, xmm2, xmm3/m32 /// The above native signature does not exist. We provide this additional overload for consistency with the other scalar APIs. /// public static Vector128 ReciprocalSqrtScalar(Vector128 upper, Vector128 value) => ReciprocalSqrtScalar(upper, value); /// /// __m128 _mm_shuffle_ps (__m128 a, __m128 b, unsigned int control) - /// SHUFPS xmm, xmm/m128, imm8 + /// SHUFPS xmm1, xmm2/m128, imm8 + /// VSHUFPS xmm1, xmm2, xmm3/m128, imm8 + /// VSHUFPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst, imm8 /// public static Vector128 Shuffle(Vector128 left, Vector128 right, [ConstantExpected] byte control) => Shuffle(left, right, control); /// /// __m128 _mm_sqrt_ps (__m128 a) - /// SQRTPS xmm, xmm/m128 + /// SQRTPS xmm1, xmm2/m128 + /// VSQRTPS xmm1, xmm2/m128 + /// VSQRTPS xmm1 {k1}{z}, xmm2/m128/m32bcst /// public static Vector128 Sqrt(Vector128 value) => Sqrt(value); /// /// __m128 _mm_sqrt_ss (__m128 a) - /// SQRTSS xmm, xmm/m32 + /// SQRTSS xmm1, xmm2/m32 + /// VSQRTSS xmm1, xmm2, xmm3/m32 + /// VSQRTSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} /// public static Vector128 SqrtScalar(Vector128 value) => SqrtScalar(value); - /// /// __m128 _mm_sqrt_ss (__m128 a, __m128 b) - /// SQRTSS xmm, xmm/m32 + /// SQRTSS xmm1, xmm2/m32 + /// VSQRTSS xmm1, xmm2, xmm3/m32 + /// VSQRTSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} /// The above native signature does not exist. We provide this additional overload for consistency with the other scalar APIs. /// public static Vector128 SqrtScalar(Vector128 upper, Vector128 value) => SqrtScalar(upper, value); + /// + /// void _mm_storeu_ps (float* mem_addr, __m128 a) + /// MOVAPS m128, xmm1 + /// VMOVAPS m128, xmm1 + /// VMOVAPS m128 {k1}{z}, xmm1 + /// + public static unsafe void Store(float* address, Vector128 source) => Store(address, source); /// /// void _mm_store_ps (float* mem_addr, __m128 a) - /// MOVAPS m128, xmm + /// MOVAPS m128, xmm1 + /// VMOVAPS m128, xmm1 + /// VMOVAPS m128 {k1}{z}, xmm1 /// public static unsafe void StoreAligned(float* address, Vector128 source) => StoreAligned(address, source); - /// /// void _mm_stream_ps (float* mem_addr, __m128 a) - /// MOVNTPS m128, xmm + /// MOVNTPS m128, xmm1 + /// VMOVNTPS m128, xmm1 /// public static unsafe void StoreAlignedNonTemporal(float* address, Vector128 source) => StoreAlignedNonTemporal(address, source); - - /// - /// void _mm_storeu_ps (float* mem_addr, __m128 a) - /// MOVUPS m128, xmm - /// - public static unsafe void Store(float* address, Vector128 source) => Store(address, source); - /// /// void _mm_sfence(void) /// SFENCE /// public static void StoreFence() => StoreFence(); - - /// - /// void _mm_store_ss (float* mem_addr, __m128 a) - /// MOVSS m32, xmm - /// - public static unsafe void StoreScalar(float* address, Vector128 source) => StoreScalar(address, source); - /// /// void _mm_storeh_pi (__m64* mem_addr, __m128 a) - /// MOVHPS m64, xmm + /// MOVHPS m64, xmm1 + /// VMOVHPS m64, xmm1 /// public static unsafe void StoreHigh(float* address, Vector128 source) => StoreHigh(address, source); - /// /// void _mm_storel_pi (__m64* mem_addr, __m128 a) - /// MOVLPS m64, xmm + /// MOVLPS m64, xmm1 + /// VMOVLPS m64, xmm1 /// public static unsafe void StoreLow(float* address, Vector128 source) => StoreLow(address, source); + /// + /// void _mm_store_ss (float* mem_addr, __m128 a) + /// MOVSS m32, xmm1 + /// VMOVSS m32, xmm1 + /// VMOVSS m32 {k1}, xmm1 + /// + public static unsafe void StoreScalar(float* address, Vector128 source) => StoreScalar(address, source); /// /// __m128d _mm_sub_ps (__m128d a, __m128d b) - /// SUBPS xmm, xmm/m128 + /// SUBPS xmm1, xmm2/m128 + /// VSUBPS xmm1, xmm2, xmm3/m128 + /// VSUBPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Subtract(Vector128 left, Vector128 right) => Subtract(left, right); /// /// __m128 _mm_sub_ss (__m128 a, __m128 b) - /// SUBSS xmm, xmm/m32 + /// SUBSS xmm1, xmm2/m32 + /// VSUBSS xmm1, xmm2, xmm3/m32 + /// VSUBSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} /// public static Vector128 SubtractScalar(Vector128 left, Vector128 right) => SubtractScalar(left, right); /// /// __m128 _mm_unpackhi_ps (__m128 a, __m128 b) - /// UNPCKHPS xmm, xmm/m128 + /// UNPCKHPS xmm1, xmm2/m128 + /// VUNPCKHPS xmm1, xmm2, xmm3/m128 + /// VUNPCKHPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 UnpackHigh(Vector128 left, Vector128 right) => UnpackHigh(left, right); /// /// __m128 _mm_unpacklo_ps (__m128 a, __m128 b) - /// UNPCKLPS xmm, xmm/m128 + /// UNPCKLPS xmm1, xmm2/m128 + /// VUNPCKLPS xmm1, xmm2, xmm3/m128 + /// VUNPCKLPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 UnpackLow(Vector128 left, Vector128 right) => UnpackLow(left, right); /// /// __m128 _mm_xor_ps (__m128 a, __m128 b) - /// XORPS xmm, xmm/m128 + /// XORPS xmm1, xmm2/m128 + /// VXORPS xmm1, xmm2, xmm3/m128 + /// VXORPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Xor(Vector128 left, Vector128 right) => Xor(left, right); } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse2.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse2.PlatformNotSupported.cs index 3b6ae16a2d1af7..b19e56740ca2a0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse2.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse2.PlatformNotSupported.cs @@ -25,52 +25,55 @@ internal X64() { } public static new bool IsSupported { [Intrinsic] get { return false; } } /// - /// __int64 _mm_cvtsd_si64 (__m128d a) - /// CVTSD2SI r64, xmm/m64 + /// __m128d _mm_cvtsi64_sd (__m128d a, __int64 b) + /// CVTSI2SD xmm1, r/m64 + /// VCVTSI2SD xmm1, xmm2, r/m64 /// This intrinsic is only available on 64-bit processes /// - public static long ConvertToInt64(Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 ConvertScalarToVector128Double(Vector128 upper, long value) { throw new PlatformNotSupportedException(); } /// - /// __int64 _mm_cvtsi128_si64 (__m128i a) - /// MOVQ reg/m64, xmm + /// __m128i _mm_cvtsi64_si128 (__int64 a) + /// MOVQ xmm1, r/m64 + /// VMOVQ xmm1, r/m64 /// This intrinsic is only available on 64-bit processes /// - public static long ConvertToInt64(Vector128 value) { throw new PlatformNotSupportedException(); } - + public static Vector128 ConvertScalarToVector128Int64(long value) { throw new PlatformNotSupportedException(); } /// - /// __int64 _mm_cvtsi128_si64 (__m128i a) - /// MOVQ reg/m64, xmm + /// __m128i _mm_cvtsi64_si128 (__int64 a) + /// MOVQ xmm1, r/m64 + /// VMOVQ xmm1, r/m64 /// This intrinsic is only available on 64-bit processes /// - public static ulong ConvertToUInt64(Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 ConvertScalarToVector128UInt64(ulong value) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_cvtsi64_sd (__m128d a, __int64 b) - /// CVTSI2SD xmm, reg/m64 + /// __int64 _mm_cvtsi128_si64 (__m128i a) + /// MOVQ r/m64, xmm1 + /// VMOVQ r/m64, xmm1 /// This intrinsic is only available on 64-bit processes /// - public static Vector128 ConvertScalarToVector128Double(Vector128 upper, long value) { throw new PlatformNotSupportedException(); } - + public static long ConvertToInt64(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128i _mm_cvtsi64_si128 (__int64 a) - /// MOVQ xmm, reg/m64 + /// __int64 _mm_cvtsd_si64 (__m128d a) + /// CVTSD2SI r64, xmm1/m64 + /// VCVTSD2SI r64, xmm1/m64 /// This intrinsic is only available on 64-bit processes /// - public static Vector128 ConvertScalarToVector128Int64(long value) { throw new PlatformNotSupportedException(); } - + public static long ConvertToInt64(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128i _mm_cvtsi64_si128 (__int64 a) - /// MOVQ xmm, reg/m64 + /// __int64 _mm_cvttsd_si64 (__m128d a) + /// CVTTSD2SI r64, xmm1/m64 + /// VCVTTSD2SI r64, xmm1/m64 /// This intrinsic is only available on 64-bit processes /// - public static Vector128 ConvertScalarToVector128UInt64(ulong value) { throw new PlatformNotSupportedException(); } - + public static long ConvertToInt64WithTruncation(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __int64 _mm_cvttsd_si64 (__m128d a) - /// CVTTSD2SI reg, xmm/m64 + /// __int64 _mm_cvtsi128_si64 (__m128i a) + /// MOVQ r/m64, xmm1 + /// VMOVQ r/m64, xmm1 /// This intrinsic is only available on 64-bit processes /// - public static long ConvertToInt64WithTruncation(Vector128 value) { throw new PlatformNotSupportedException(); } + public static ulong ConvertToUInt64(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// void _mm_stream_si64(__int64 *p, __int64 a) @@ -88,635 +91,709 @@ internal X64() { } /// /// __m128i _mm_add_epi8 (__m128i a, __m128i b) - /// PADDB xmm, xmm/m128 + /// PADDB xmm1, xmm2/m128 + /// VPADDB xmm1, xmm2, xmm3/m128 + /// VPADDB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Add(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_add_epi8 (__m128i a, __m128i b) - /// PADDB xmm, xmm/m128 + /// PADDB xmm1, xmm2/m128 + /// VPADDB xmm1, xmm2, xmm3/m128 + /// VPADDB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Add(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_add_epi16 (__m128i a, __m128i b) - /// PADDW xmm, xmm/m128 + /// PADDW xmm1, xmm2/m128 + /// VPADDW xmm1, xmm2, xmm3/m128 + /// VPADDW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Add(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_add_epi16 (__m128i a, __m128i b) - /// PADDW xmm, xmm/m128 + /// PADDW xmm1, xmm2/m128 + /// VPADDW xmm1, xmm2, xmm3/m128 + /// VPADDW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Add(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_add_epi32 (__m128i a, __m128i b) - /// PADDD xmm, xmm/m128 + /// PADDD xmm1, xmm2/m128 + /// VPADDD xmm1, xmm2, xmm3/m128 + /// VPADDD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Add(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_add_epi32 (__m128i a, __m128i b) - /// PADDD xmm, xmm/m128 + /// PADDD xmm1, xmm2/m128 + /// VPADDD xmm1, xmm2, xmm3/m128 + /// VPADDD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Add(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_add_epi64 (__m128i a, __m128i b) - /// PADDQ xmm, xmm/m128 + /// PADDQ xmm1, xmm2/m128 + /// VPADDQ xmm1, xmm2, xmm3/m128 + /// VPADDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Add(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_add_epi64 (__m128i a, __m128i b) - /// PADDQ xmm, xmm/m128 + /// PADDQ xmm1, xmm2/m128 + /// VPADDQ xmm1, xmm2, xmm3/m128 + /// VPADDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Add(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_add_pd (__m128d a, __m128d b) - /// ADDPD xmm, xmm/m128 + /// ADDPD xmm1, xmm2/m128 + /// VADDPD xmm1, xmm2, xmm3/m128 + /// VADDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Add(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_add_sd (__m128d a, __m128d b) - /// ADDSD xmm, xmm/m64 + /// ADDSD xmm1, xmm2/m64 + /// VADDSD xmm1, xmm2, xmm3/m64 + /// VADDSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} /// public static Vector128 AddScalar(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_adds_epi8 (__m128i a, __m128i b) - /// PADDSB xmm, xmm/m128 + /// PADDSB xmm1, xmm2/m128 + /// VPADDSB xmm1, xmm2, xmm3/m128 + /// VPADDSB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 AddSaturate(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_adds_epu8 (__m128i a, __m128i b) - /// PADDUSB xmm, xmm/m128 + /// PADDUSB xmm1, xmm2/m128 + /// VPADDUSB xmm1, xmm2, xmm3/m128 + /// VPADDUSB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 AddSaturate(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_adds_epi16 (__m128i a, __m128i b) - /// PADDSW xmm, xmm/m128 + /// PADDSW xmm1, xmm2/m128 + /// VPADDSW xmm1, xmm2, xmm3/m128 + /// VPADDSW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 AddSaturate(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_adds_epu16 (__m128i a, __m128i b) - /// PADDUSW xmm, xmm/m128 + /// PADDUSW xmm1, xmm2/m128 + /// VPADDUSW xmm1, xmm2, xmm3/m128 + /// VPADDUSW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 AddSaturate(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_and_si128 (__m128i a, __m128i b) - /// PAND xmm, xmm/m128 + /// PAND xmm1, xmm2/m128 + /// VPAND xmm1, xmm2, xmm3/m128 /// public static Vector128 And(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_and_si128 (__m128i a, __m128i b) - /// PAND xmm, xmm/m128 + /// PAND xmm1, xmm2/m128 + /// VPAND xmm1, xmm2, xmm3/m128 /// public static Vector128 And(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_and_si128 (__m128i a, __m128i b) - /// PAND xmm, xmm/m128 + /// PAND xmm1, xmm2/m128 + /// VPAND xmm1, xmm2, xmm3/m128 /// public static Vector128 And(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_and_si128 (__m128i a, __m128i b) - /// PAND xmm, xmm/m128 + /// PAND xmm1, xmm2/m128 + /// VPAND xmm1, xmm2, xmm3/m128 /// public static Vector128 And(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_and_si128 (__m128i a, __m128i b) - /// PAND xmm, xmm/m128 + /// PAND xmm1, xmm2/m128 + /// VPAND xmm1, xmm2, xmm3/m128 + /// VPANDD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 And(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_and_si128 (__m128i a, __m128i b) - /// PAND xmm, xmm/m128 + /// PAND xmm1, xmm2/m128 + /// VPAND xmm1, xmm2, xmm3/m128 + /// VPANDD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 And(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_and_si128 (__m128i a, __m128i b) - /// PAND xmm, xmm/m128 + /// PAND xmm1, xmm2/m128 + /// VPAND xmm1, xmm2, xmm3/m128 + /// VPANDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 And(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_and_si128 (__m128i a, __m128i b) - /// PAND xmm, xmm/m128 + /// PAND xmm1, xmm2/m128 + /// VPAND xmm1, xmm2, xmm3/m128 + /// VPANDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 And(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_and_pd (__m128d a, __m128d b) - /// ANDPD xmm, xmm/m128 + /// ANDPD xmm1, xmm2/m128 + /// VANDPD xmm1, xmm2, xmm3/m128 + /// VANDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 And(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_andnot_si128 (__m128i a, __m128i b) - /// PANDN xmm, xmm/m128 + /// PANDN xmm1, xmm2/m128 + /// VPANDN xmm1, xmm2, xmm3/m128 /// public static Vector128 AndNot(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_andnot_si128 (__m128i a, __m128i b) - /// PANDN xmm, xmm/m128 + /// PANDN xmm1, xmm2/m128 + /// VPANDN xmm1, xmm2, xmm3/m128 /// public static Vector128 AndNot(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_andnot_si128 (__m128i a, __m128i b) - /// PANDN xmm, xmm/m128 + /// PANDN xmm1, xmm2/m128 + /// VPANDN xmm1, xmm2, xmm3/m128 /// public static Vector128 AndNot(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_andnot_si128 (__m128i a, __m128i b) - /// PANDN xmm, xmm/m128 + /// PANDN xmm1, xmm2/m128 + /// VPANDN xmm1, xmm2, xmm3/m128 /// public static Vector128 AndNot(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_andnot_si128 (__m128i a, __m128i b) - /// PANDN xmm, xmm/m128 + /// PANDN xmm1, xmm2/m128 + /// VPANDN xmm1, xmm2, xmm3/m128 + /// VPANDND xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 AndNot(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_andnot_si128 (__m128i a, __m128i b) - /// PANDN xmm, xmm/m128 + /// PANDN xmm1, xmm2/m128 + /// VPANDN xmm1, xmm2, xmm3/m128 + /// VPANDND xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 AndNot(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_andnot_si128 (__m128i a, __m128i b) - /// PANDN xmm, xmm/m128 + /// PANDN xmm1, xmm2/m128 + /// VPANDN xmm1, xmm2, xmm3/m128 + /// VPANDNQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 AndNot(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_andnot_si128 (__m128i a, __m128i b) - /// PANDN xmm, xmm/m128 + /// PANDN xmm1, xmm2/m128 + /// VPANDN xmm1, xmm2, xmm3/m128 + /// VPANDNQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 AndNot(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_andnot_pd (__m128d a, __m128d b) - /// ADDNPD xmm, xmm/m128 + /// ANDNPD xmm1, xmm2/m128 + /// VANDNPD xmm1, xmm2, xmm3/m128 + /// VANDNPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 AndNot(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_avg_epu8 (__m128i a, __m128i b) - /// PAVGB xmm, xmm/m128 + /// PAVGB xmm1, xmm2/m128 + /// VPAVGB xmm1, xmm2, xmm3/m128 + /// VPAVGB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Average(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_avg_epu16 (__m128i a, __m128i b) - /// PAVGW xmm, xmm/m128 + /// PAVGW xmm1, xmm2/m128 + /// VPAVGW xmm1, xmm2, xmm3/m128 + /// VPAVGW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Average(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cmpeq_epi8 (__m128i a, __m128i b) - /// PCMPEQB xmm, xmm/m128 + /// PCMPEQB xmm1, xmm2/m128 + /// VPCMPEQB xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cmpeq_epi8 (__m128i a, __m128i b) - /// PCMPEQB xmm, xmm/m128 + /// PCMPEQB xmm1, xmm2/m128 + /// VPCMPEQB xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cmpeq_epi16 (__m128i a, __m128i b) - /// PCMPEQW xmm, xmm/m128 + /// PCMPEQW xmm1, xmm2/m128 + /// VPCMPEQW xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cmpeq_epi16 (__m128i a, __m128i b) - /// PCMPEQW xmm, xmm/m128 + /// PCMPEQW xmm1, xmm2/m128 + /// VPCMPEQW xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cmpeq_epi32 (__m128i a, __m128i b) - /// PCMPEQD xmm, xmm/m128 + /// PCMPEQD xmm1, xmm2/m128 + /// VPCMPEQD xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cmpeq_epi32 (__m128i a, __m128i b) - /// PCMPEQD xmm, xmm/m128 + /// PCMPEQD xmm1, xmm2/m128 + /// VPCMPEQD xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_cmpeq_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(0) + /// CMPPD xmm1, xmm2/m128, imm8(0) + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(0) /// public static Vector128 CompareEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// - /// int _mm_comieq_sd (__m128d a, __m128d b) - /// COMISD xmm, xmm/m64 - /// - public static bool CompareScalarOrderedEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - - /// - /// int _mm_ucomieq_sd (__m128d a, __m128d b) - /// UCOMISD xmm, xmm/m64 - /// - public static bool CompareScalarUnorderedEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - - /// - /// __m128d _mm_cmpeq_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(0) - /// - public static Vector128 CompareScalarEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_cmpgt_epi8 (__m128i a, __m128i b) - /// PCMPGTB xmm, xmm/m128 + /// PCMPGTB xmm1, xmm2/m128 + /// VPCMPGTB xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cmpgt_epi16 (__m128i a, __m128i b) - /// PCMPGTW xmm, xmm/m128 + /// PCMPGTW xmm1, xmm2/m128 + /// VPCMPGTW xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cmpgt_epi32 (__m128i a, __m128i b) - /// PCMPGTD xmm, xmm/m128 + /// PCMPGTD xmm1, xmm2/m128 + /// VPCMPGTD xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_cmpgt_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(1) with swapped operands + /// CMPPD xmm1, xmm2/m128, imm8(1) ; with swapped operands + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(1) ; with swapped operands /// public static Vector128 CompareGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// - /// int _mm_comigt_sd (__m128d a, __m128d b) - /// COMISD xmm, xmm/m64 - /// - public static bool CompareScalarOrderedGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - - /// - /// int _mm_ucomigt_sd (__m128d a, __m128d b) - /// UCOMISD xmm, xmm/m64 - /// - public static bool CompareScalarUnorderedGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - - /// - /// __m128d _mm_cmpgt_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(1) with swapped operands - /// - public static Vector128 CompareScalarGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// /// __m128d _mm_cmpge_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(2) with swapped operands + /// CMPPD xmm1, xmm2/m128, imm8(2) ; with swapped operands + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(2) ; with swapped operands /// public static Vector128 CompareGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// - /// int _mm_comige_sd (__m128d a, __m128d b) - /// COMISD xmm, xmm/m64 - /// - public static bool CompareScalarOrderedGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - - /// - /// int _mm_ucomige_sd (__m128d a, __m128d b) - /// UCOMISD xmm, xmm/m64 - /// - public static bool CompareScalarUnorderedGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - - /// - /// __m128d _mm_cmpge_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(2) with swapped operands - /// - public static Vector128 CompareScalarGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_cmplt_epi8 (__m128i a, __m128i b) - /// PCMPGTB xmm, xmm/m128 + /// PCMPGTB xmm1, xmm2/m128 ; with swapped operands + /// VPCMPGTB xmm1, xmm2, xmm3/m128 ; with swapped operands /// public static Vector128 CompareLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cmplt_epi16 (__m128i a, __m128i b) - /// PCMPGTW xmm, xmm/m128 + /// PCMPGTW xmm1, xmm2/m128 ; with swapped operands + /// VPCMPGTW xmm1, xmm2, xmm3/m128 ; with swapped operands /// public static Vector128 CompareLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cmplt_epi32 (__m128i a, __m128i b) - /// PCMPGTD xmm, xmm/m128 + /// PCMPGTD xmm1, xmm2/m128 ; with swapped operands + /// VPCMPGTD xmm1, xmm2, xmm3/m128 ; with swapped operands /// public static Vector128 CompareLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_cmplt_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(1) + /// CMPPD xmm1, xmm2/m128, imm8(1) + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(1) /// public static Vector128 CompareLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// int _mm_comilt_sd (__m128d a, __m128d b) - /// COMISD xmm, xmm/m64 + /// __m128d _mm_cmple_pd (__m128d a, __m128d b) + /// CMPPD xmm1, xmm2/m128, imm8(2) + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(2) /// - public static bool CompareScalarOrderedLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// int _mm_ucomilt_sd (__m128d a, __m128d b) - /// UCOMISD xmm, xmm/m64 + /// __m128d _mm_cmpneq_pd (__m128d a, __m128d b) + /// CMPPD xmm1, xmm2/m128, imm8(4) + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(4) /// - public static bool CompareScalarUnorderedLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareNotEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_cmplt_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(1) + /// __m128d _mm_cmpngt_pd (__m128d a, __m128d b) + /// CMPPD xmm1, xmm2/m128, imm8(5) ; with swapped operands + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(5) ; with swapped operands /// - public static Vector128 CompareScalarLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareNotGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_cmple_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(2) + /// __m128d _mm_cmpnge_pd (__m128d a, __m128d b) + /// CMPPD xmm1, xmm2/m128, imm8(6) ; with swapped operands + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(6) ; with swapped operands /// - public static Vector128 CompareLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareNotGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// int _mm_comile_sd (__m128d a, __m128d b) - /// COMISD xmm, xmm/m64 + /// __m128d _mm_cmpnlt_pd (__m128d a, __m128d b) + /// CMPPD xmm1, xmm2/m128, imm8(5) + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(5) /// - public static bool CompareScalarOrderedLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareNotLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// int _mm_ucomile_sd (__m128d a, __m128d b) - /// UCOMISD xmm, xmm/m64 + /// __m128d _mm_cmpnle_pd (__m128d a, __m128d b) + /// CMPPD xmm1, xmm2/m128, imm8(6) + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(6) /// - public static bool CompareScalarUnorderedLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareNotLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_cmple_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(2) + /// __m128d _mm_cmpord_pd (__m128d a, __m128d b) + /// CMPPD xmm1, xmm2/m128, imm8(7) + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(7) /// - public static Vector128 CompareScalarLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + public static Vector128 CompareOrdered(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_cmpneq_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(4) + /// __m128d _mm_cmpeq_sd (__m128d a, __m128d b) + /// CMPDS xmm1, xmm2/m64, imm8(0) + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(0) /// - public static Vector128 CompareNotEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareScalarEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// int _mm_comineq_sd (__m128d a, __m128d b) - /// COMISD xmm, xmm/m64 + /// __m128d _mm_cmpgt_sd (__m128d a, __m128d b) + /// CMPDS xmm1, xmm2/m64, imm8(1) ; with swapped operands + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(1) ; with swapped operands /// - public static bool CompareScalarOrderedNotEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareScalarGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// int _mm_ucomineq_sd (__m128d a, __m128d b) - /// UCOMISD xmm, xmm/m64 + /// __m128d _mm_cmpge_sd (__m128d a, __m128d b) + /// CMPDS xmm1, xmm2/m64, imm8(2) ; with swapped operands + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(2) ; with swapped operands /// - public static bool CompareScalarUnorderedNotEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareScalarGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_cmpneq_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(4) + /// __m128d _mm_cmplt_sd (__m128d a, __m128d b) + /// CMPDS xmm1, xmm2/m64, imm8(1) + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(1) /// - public static Vector128 CompareScalarNotEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareScalarLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_cmpngt_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(5) with swapped operands + /// __m128d _mm_cmple_sd (__m128d a, __m128d b) + /// CMPDS xmm1, xmm2/m64, imm8(2) + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(2) /// - public static Vector128 CompareNotGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareScalarLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_cmpngt_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(5) with swapped operands + /// __m128d _mm_cmpneq_sd (__m128d a, __m128d b) + /// CMPDS xmm1, xmm2/m64, imm8(4) + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(4) /// - public static Vector128 CompareScalarNotGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareScalarNotEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_cmpnge_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(6) with swapped operands + /// __m128d _mm_cmpngt_sd (__m128d a, __m128d b) + /// CMPDS xmm1, xmm2/m64, imm8(5) ; with swapped operands + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(5) ; with swapped operands /// - public static Vector128 CompareNotGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 CompareScalarNotGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_cmpnge_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(6) with swapped operands + /// CMPDS xmm1, xmm2/m64, imm8(6) ; with swapped operands + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(6) ; with swapped operands /// public static Vector128 CompareScalarNotGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - - /// - /// __m128d _mm_cmpnlt_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(5) - /// - public static Vector128 CompareNotLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// /// __m128d _mm_cmpnlt_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(5) + /// CMPDS xmm1, xmm2/m64, imm8(5) + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(5) /// public static Vector128 CompareScalarNotLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - - /// - /// __m128d _mm_cmpnle_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(6) - /// - public static Vector128 CompareNotLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// /// __m128d _mm_cmpnle_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(6) + /// CMPDS xmm1, xmm2/m64, imm8(6) + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(6) /// public static Vector128 CompareScalarNotLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// - /// __m128d _mm_cmpord_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(7) - /// - public static Vector128 CompareOrdered(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// /// __m128d _mm_cmpord_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(7) + /// CMPDS xmm1, xmm2/m64, imm8(7) + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(7) /// public static Vector128 CompareScalarOrdered(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// - /// __m128d _mm_cmpunord_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(3) + /// int _mm_comieq_sd (__m128d a, __m128d b) + /// COMISD xmm1, xmm2/m64 ; ZF=1 && PF=0 + /// VCOMISD xmm1, xmm2/m64 ; ZF=1 && PF=0 + /// VCOMISD xmm1, xmm2/m64{sae} ; ZF=1 && PF=0 /// - public static Vector128 CompareUnordered(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static bool CompareScalarOrderedEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_cmpunord_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(3) + /// int _mm_comigt_sd (__m128d a, __m128d b) + /// COMISD xmm1, xmm2/m64 ; ZF=0 && CF=0 + /// VCOMISD xmm1, xmm2/m64 ; ZF=0 && CF=0 + /// VCOMISD xmm1, xmm2/m64{sae} ; ZF=0 && CF=0 /// - public static Vector128 CompareScalarUnordered(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static bool CompareScalarOrderedGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128i _mm_cvtps_epi32 (__m128 a) - /// CVTPS2DQ xmm, xmm/m128 + /// int _mm_comige_sd (__m128d a, __m128d b) + /// COMISD xmm1, xmm2/m64 ; CF=0 + /// VCOMISD xmm1, xmm2/m64 ; CF=0 + /// VCOMISD xmm1, xmm2/m64{sae} ; CF=0 /// - public static Vector128 ConvertToVector128Int32(Vector128 value) { throw new PlatformNotSupportedException(); } + public static bool CompareScalarOrderedGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128i _mm_cvtpd_epi32 (__m128d a) - /// CVTPD2DQ xmm, xmm/m128 + /// int _mm_comilt_sd (__m128d a, __m128d b) + /// COMISD xmm1, xmm2/m64 ; PF=0 && CF=1 + /// VCOMISD xmm1, xmm2/m64 ; PF=0 && CF=1 + /// VCOMISD xmm1, xmm2/m64{sae} ; PF=0 && CF=1 /// - public static Vector128 ConvertToVector128Int32(Vector128 value) { throw new PlatformNotSupportedException(); } + public static bool CompareScalarOrderedLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cvtepi32_ps (__m128i a) - /// CVTDQ2PS xmm, xmm/m128 + /// int _mm_comile_sd (__m128d a, __m128d b) + /// COMISD xmm1, xmm2/m64 ; PF=0 && (ZF=1 || CF=1) + /// VCOMISD xmm1, xmm2/m64 ; PF=0 && (ZF=1 || CF=1) + /// VCOMISD xmm1, xmm2/m64{sae} ; PF=0 && (ZF=1 || CF=1) /// - public static Vector128 ConvertToVector128Single(Vector128 value) { throw new PlatformNotSupportedException(); } + public static bool CompareScalarOrderedLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_cvtpd_ps (__m128d a) - /// CVTPD2PS xmm, xmm/m128 + /// int _mm_comineq_sd (__m128d a, __m128d b) + /// COMISD xmm1, xmm2/m64 ; ZF=0 || PF=1 + /// VCOMISD xmm1, xmm2/m64 ; ZF=0 || PF=1 + /// VCOMISD xmm1, xmm2/m64{sae} ; ZF=0 || PF=1 /// - public static Vector128 ConvertToVector128Single(Vector128 value) { throw new PlatformNotSupportedException(); } + public static bool CompareScalarOrderedNotEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + /// - /// __m128d _mm_cvtepi32_pd (__m128i a) - /// CVTDQ2PD xmm, xmm/m128 + /// __m128d _mm_cmpunord_sd (__m128d a, __m128d b) + /// CMPDS xmm1, xmm2/m64, imm8(3) + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(3) /// - public static Vector128 ConvertToVector128Double(Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 CompareScalarUnordered(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_cvtps_pd (__m128 a) - /// CVTPS2PD xmm, xmm/m128 + /// int _mm_ucomieq_sd (__m128d a, __m128d b) + /// UCOMISD xmm1, xmm2/m64 ; ZF=1 && PF=0 + /// VUCOMISD xmm1, xmm2/m64 ; ZF=1 && PF=0 + /// VUCOMISD xmm1, xmm2/m64{sae} ; ZF=1 && PF=0 /// - public static Vector128 ConvertToVector128Double(Vector128 value) { throw new PlatformNotSupportedException(); } - + public static bool CompareScalarUnorderedEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// int _mm_cvtsd_si32 (__m128d a) - /// CVTSD2SI r32, xmm/m64 + /// int _mm_ucomigt_sd (__m128d a, __m128d b) + /// UCOMISD xmm1, xmm2/m64 ; ZF=0 && CF=0 + /// VUCOMISD xmm1, xmm2/m64 ; ZF=0 && CF=0 + /// VUCOMISD xmm1, xmm2/m64{sae} ; ZF=0 && CF=0 /// - public static int ConvertToInt32(Vector128 value) { throw new PlatformNotSupportedException(); } + public static bool CompareScalarUnorderedGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// int _mm_cvtsi128_si32 (__m128i a) - /// MOVD reg/m32, xmm + /// int _mm_ucomige_sd (__m128d a, __m128d b) + /// UCOMISD xmm1, xmm2/m64 ; CF=0 + /// VUCOMISD xmm1, xmm2/m64 ; CF=0 + /// VUCOMISD xmm1, xmm2/m64{sae} ; CF=0 /// - public static int ConvertToInt32(Vector128 value) { throw new PlatformNotSupportedException(); } + public static bool CompareScalarUnorderedGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// int _mm_cvtsi128_si32 (__m128i a) - /// MOVD reg/m32, xmm + /// int _mm_ucomilt_sd (__m128d a, __m128d b) + /// UCOMISD xmm1, xmm2/m64 ; PF=0 && CF=1 + /// VUCOMISD xmm1, xmm2/m64 ; PF=0 && CF=1 + /// VUCOMISD xmm1, xmm2/m64{sae} ; PF=0 && CF=1 /// - public static uint ConvertToUInt32(Vector128 value) { throw new PlatformNotSupportedException(); } + public static bool CompareScalarUnorderedLessThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + /// + /// int _mm_ucomile_sd (__m128d a, __m128d b) + /// UCOMISD xmm1, xmm2/m64 ; PF=0 && (ZF=1 || CF=1) + /// VUCOMISD xmm1, xmm2/m64 ; PF=0 && (ZF=1 || CF=1) + /// VUCOMISD xmm1, xmm2/m64{sae} ; PF=0 && (ZF=1 || CF=1) + /// + public static bool CompareScalarUnorderedLessThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + /// + /// int _mm_ucomineq_sd (__m128d a, __m128d b) + /// UCOMISD xmm1, xmm2/m64 ; ZF=0 || PF=1 + /// VUCOMISD xmm1, xmm2/m64 ; ZF=0 || PF=1 + /// VUCOMISD xmm1, xmm2/m64{sae} ; ZF=0 || PF=1 + /// + public static bool CompareScalarUnorderedNotEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + /// + /// __m128d _mm_cmpunord_pd (__m128d a, __m128d b) + /// CMPPD xmm1, xmm2/m128, imm8(3) + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(3) + /// + public static Vector128 CompareUnordered(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_cvtsi32_sd (__m128d a, int b) - /// CVTSI2SD xmm, reg/m32 + /// CVTSI2SD xmm1, r/m32 + /// VCVTSI2SD xmm1, xmm2, r/m32 /// public static Vector128 ConvertScalarToVector128Double(Vector128 upper, int value) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_cvtss_sd (__m128d a, __m128 b) - /// CVTSS2SD xmm, xmm/m32 + /// CVTSS2SD xmm1, xmm2/m32 + /// VCVTSS2SD xmm1, xmm2, xmm3/m32 /// public static Vector128 ConvertScalarToVector128Double(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cvtsi32_si128 (int a) - /// MOVD xmm, reg/m32 + /// MOVD xmm1, r/m32 + /// VMOVD xmm1, r/m32 /// public static Vector128 ConvertScalarToVector128Int32(int value) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_cvtsd_ss (__m128 a, __m128d b) - /// CVTSD2SS xmm, xmm/m64 + /// CVTSD2SS xmm1, xmm2/m64 + /// VCVTSD2SS xmm1, xmm2, xmm3/m64 /// public static Vector128 ConvertScalarToVector128Single(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cvtsi32_si128 (int a) - /// MOVD xmm, reg/m32 + /// MOVD xmm1, r/m32 + /// VMOVD xmm1, r/m32 /// public static Vector128 ConvertScalarToVector128UInt32(uint value) { throw new PlatformNotSupportedException(); } /// - /// __m128i _mm_cvttps_epi32 (__m128 a) - /// CVTTPS2DQ xmm, xmm/m128 + /// __m128d _mm_cvtepi32_pd (__m128i a) + /// CVTDQ2PD xmm1, xmm2/m64 + /// VCVTDQ2PD xmm1, xmm2/m64 + /// VCVTDQ2PD xmm1 {k1}{z}, xmm2/m64/m32bcst /// - public static Vector128 ConvertToVector128Int32WithTruncation(Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 ConvertToVector128Double(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128i _mm_cvttpd_epi32 (__m128d a) - /// CVTTPD2DQ xmm, xmm/m128 + /// __m128d _mm_cvtps_pd (__m128 a) + /// CVTPS2PD xmm1, xmm2/m64 + /// VCVTPS2PD xmm1, xmm2/m64 + /// VCVTPS2PD xmm1 {k1}{z}, xmm2/m64/m32bcst /// - public static Vector128 ConvertToVector128Int32WithTruncation(Vector128 value) { throw new PlatformNotSupportedException(); } - + public static Vector128 ConvertToVector128Double(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// int _mm_cvttsd_si32 (__m128d a) - /// CVTTSD2SI reg, xmm/m64 + /// __m128i _mm_cvtps_epi32 (__m128 a) + /// CVTPS2DQ xmm1, xmm2/m128 + /// VCVTPS2DQ xmm1, xmm2/m128 + /// VCVTPS2DQ xmm1 {k1}{z}, xmm2/m128/m32bcst /// - public static int ConvertToInt32WithTruncation(Vector128 value) { throw new PlatformNotSupportedException(); } - + public static Vector128 ConvertToVector128Int32(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_div_pd (__m128d a, __m128d b) - /// DIVPD xmm, xmm/m128 + /// __m128i _mm_cvtpd_epi32 (__m128d a) + /// CVTPD2DQ xmm1, xmm2/m128 + /// VCVTPD2DQ xmm1, xmm2/m128 + /// VCVTPD2DQ xmm1 {k1}{z}, xmm2/m128/m64bcst /// - public static Vector128 Divide(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 ConvertToVector128Int32(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_div_sd (__m128d a, __m128d b) - /// DIVSD xmm, xmm/m64 + /// __m128i _mm_cvttps_epi32 (__m128 a) + /// CVTTPS2DQ xmm1, xmm2/m128 + /// VCVTTPS2DQ xmm1, xmm2/m128 + /// VCVTTPS2DQ xmm1 {k1}{z}, xmm2/m128/m32bcst /// - public static Vector128 DivideScalar(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + public static Vector128 ConvertToVector128Int32WithTruncation(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// int _mm_extract_epi16 (__m128i a, int immediate) - /// PEXTRW reg, xmm, imm8 + /// __m128i _mm_cvttpd_epi32 (__m128d a) + /// CVTTPD2DQ xmm1, xmm2/m128 + /// VCVTTPD2DQ xmm1, xmm2/m128 + /// VCVTTPD2DQ xmm1 {k1}{z}, xmm2/m128/m64bcst /// - public static ushort Extract(Vector128 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } - + public static Vector128 ConvertToVector128Int32WithTruncation(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128i _mm_insert_epi16 (__m128i a, int i, int immediate) - /// PINSRW xmm, reg/m16, imm8 + /// __m128 _mm_cvtepi32_ps (__m128i a) + /// CVTDQ2PS xmm1, xmm2/m128 + /// VCVTDQ2PS xmm1, xmm2/m128 + /// VCVTDQ2PS xmm1 {k1}{z}, xmm2/m128/m32bcst /// - public static Vector128 Insert(Vector128 value, short data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } + public static Vector128 ConvertToVector128Single(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128i _mm_insert_epi16 (__m128i a, int i, int immediate) - /// PINSRW xmm, reg/m16, imm8 + /// __m128 _mm_cvtpd_ps (__m128d a) + /// CVTPD2PS xmm1, xmm2/m128 + /// VCVTPD2PS xmm1, xmm2/m128 + /// VCVTPD2PS xmm1 {k1}{z}, xmm2/m128/m64bcst /// - public static Vector128 Insert(Vector128 value, ushort data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } + public static Vector128 ConvertToVector128Single(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128i _mm_loadu_si128 (__m128i const* mem_address) - /// MOVDQU xmm, m128 - /// - public static unsafe Vector128 LoadVector128(sbyte* address) { throw new PlatformNotSupportedException(); } - /// - /// __m128i _mm_loadu_si128 (__m128i const* mem_address) - /// MOVDQU xmm, m128 - /// - public static unsafe Vector128 LoadVector128(byte* address) { throw new PlatformNotSupportedException(); } - /// - /// __m128i _mm_loadu_si128 (__m128i const* mem_address) - /// MOVDQU xmm, m128 + /// int _mm_cvtsi128_si32 (__m128i a) + /// MOVD r/m32, xmm1 + /// VMOVD r/m32, xmm1 /// - public static unsafe Vector128 LoadVector128(short* address) { throw new PlatformNotSupportedException(); } + public static int ConvertToInt32(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128i _mm_loadu_si128 (__m128i const* mem_address) - /// MOVDQU xmm, m128 + /// int _mm_cvtsd_si32 (__m128d a) + /// CVTSD2SI r32, xmm1/m64 + /// VCVTSD2SI r32, xmm1/m64 /// - public static unsafe Vector128 LoadVector128(ushort* address) { throw new PlatformNotSupportedException(); } + public static int ConvertToInt32(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128i _mm_loadu_si128 (__m128i const* mem_address) - /// MOVDQU xmm, m128 + /// int _mm_cvttsd_si32 (__m128d a) + /// CVTTSD2SI r32, xmm1/m64 + /// VCVTTSD2SI r32, xmm1/m64 /// - public static unsafe Vector128 LoadVector128(int* address) { throw new PlatformNotSupportedException(); } + public static int ConvertToInt32WithTruncation(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128i _mm_loadu_si128 (__m128i const* mem_address) - /// MOVDQU xmm, m128 + /// int _mm_cvtsi128_si32 (__m128i a) + /// MOVD r/m32, xmm1 + /// VMOVD r/m32, xmm1 /// - public static unsafe Vector128 LoadVector128(uint* address) { throw new PlatformNotSupportedException(); } + public static uint ConvertToUInt32(Vector128 value) { throw new PlatformNotSupportedException(); } + /// - /// __m128i _mm_loadu_si128 (__m128i const* mem_address) - /// MOVDQU xmm, m128 + /// __m128d _mm_div_pd (__m128d a, __m128d b) + /// DIVPD xmm1, xmm2/m128 + /// VDIVPD xmm1, xmm2, xmm3/m128 + /// VDIVPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// - public static unsafe Vector128 LoadVector128(long* address) { throw new PlatformNotSupportedException(); } + public static Vector128 Divide(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + /// - /// __m128i _mm_loadu_si128 (__m128i const* mem_address) - /// MOVDQU xmm, m128 + /// __m128d _mm_div_sd (__m128d a, __m128d b) + /// DIVSD xmm1, xmm2/m64 + /// VDIVSD xmm1, xmm2, xmm3/m64 /// - public static unsafe Vector128 LoadVector128(ulong* address) { throw new PlatformNotSupportedException(); } + public static Vector128 DivideScalar(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + /// - /// __m128d _mm_loadu_pd (double const* mem_address) - /// MOVUPD xmm, m128 + /// int _mm_extract_epi16 (__m128i a, int immediate) + /// PEXTRW r/m16, xmm1, imm8 + /// VPEXTRW r/m16, xmm1, imm8 /// - public static unsafe Vector128 LoadVector128(double* address) { throw new PlatformNotSupportedException(); } + public static ushort Extract(Vector128 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_load_sd (double const* mem_address) - /// MOVSD xmm, m64 + /// __m128i _mm_insert_epi16 (__m128i a, int i, int immediate) + /// PINSRW xmm1, r/m16, imm8 + /// VPINSRW xmm1, xmm2, r/m16, imm8 /// - public static unsafe Vector128 LoadScalarVector128(double* address) { throw new PlatformNotSupportedException(); } + public static Vector128 Insert(Vector128 value, short data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_insert_epi16 (__m128i a, int i, int immediate) + /// PINSRW xmm1, r/m16, imm8 + /// VPINSRW xmm1, xmm2, r/m16, imm8 + /// + public static Vector128 Insert(Vector128 value, ushort data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_load_si128 (__m128i const* mem_address) - /// MOVDQA xmm, m128 + /// MOVDQA xmm1, m128 + /// VMOVDQA xmm1, m128 + /// VMOVDQA32 xmm1 {k1}{z}, m128 /// public static unsafe Vector128 LoadAlignedVector128(sbyte* address) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_load_si128 (__m128i const* mem_address) - /// MOVDQA xmm, m128 + /// MOVDQA xmm1, m128 + /// VMOVDQA xmm1, m128 + /// VMOVDQA32 xmm1 {k1}{z}, m128 /// public static unsafe Vector128 LoadAlignedVector128(byte* address) { throw new PlatformNotSupportedException(); } /// @@ -726,32 +803,44 @@ internal X64() { } public static unsafe Vector128 LoadAlignedVector128(short* address) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_load_si128 (__m128i const* mem_address) - /// MOVDQA xmm, m128 + /// MOVDQA xmm1, m128 + /// VMOVDQA xmm1, m128 + /// VMOVDQA32 xmm1 {k1}{z}, m128 /// public static unsafe Vector128 LoadAlignedVector128(ushort* address) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_load_si128 (__m128i const* mem_address) - /// MOVDQA xmm, m128 + /// MOVDQA xmm1, m128 + /// VMOVDQA xmm1, m128 + /// VMOVDQA32 xmm1 {k1}{z}, m128 /// public static unsafe Vector128 LoadAlignedVector128(int* address) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_load_si128 (__m128i const* mem_address) - /// MOVDQA xmm, m128 + /// MOVDQA xmm1, m128 + /// VMOVDQA xmm1, m128 + /// VMOVDQA32 xmm1 {k1}{z}, m128 /// public static unsafe Vector128 LoadAlignedVector128(uint* address) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_load_si128 (__m128i const* mem_address) - /// MOVDQA xmm, m128 + /// MOVDQA xmm1, m128 + /// VMOVDQA xmm1, m128 + /// VMOVDQA64 xmm1 {k1}{z}, m128 /// public static unsafe Vector128 LoadAlignedVector128(long* address) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_load_si128 (__m128i const* mem_address) - /// MOVDQA xmm, m128 + /// MOVDQA xmm1, m128 + /// VMOVDQA xmm1, m128 + /// VMOVDQA64 xmm1 {k1}{z}, m128 /// public static unsafe Vector128 LoadAlignedVector128(ulong* address) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_load_pd (double const* mem_address) - /// MOVAPD xmm, m128 + /// MOVAPD xmm1, m128 + /// VMOVAPD xmm1, m128 + /// VMOVAPD xmm1 {k1}{z}, m128 /// public static unsafe Vector128 LoadAlignedVector128(double* address) { throw new PlatformNotSupportedException(); } @@ -760,70 +849,154 @@ internal X64() { } /// LFENCE /// public static void LoadFence() { throw new PlatformNotSupportedException(); } - /// /// __m128d _mm_loadh_pd (__m128d a, double const* mem_addr) - /// MOVHPD xmm, m64 + /// MOVHPD xmm1, m64 + /// VMOVHPD xmm1, xmm2, m64 /// public static unsafe Vector128 LoadHigh(Vector128 lower, double* address) { throw new PlatformNotSupportedException(); } - /// /// __m128d _mm_loadl_pd (__m128d a, double const* mem_addr) - /// MOVLPD xmm, m64 + /// MOVLPD xmm1, m64 + /// VMOVLPD xmm1, xmm2, m64 /// public static unsafe Vector128 LoadLow(Vector128 upper, double* address) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_loadu_si32 (void const* mem_addr) - /// MOVD xmm, reg/m32 + /// MOVD xmm1, m32 + /// VMOVD xmm1, m32 /// public static unsafe Vector128 LoadScalarVector128(int* address) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_loadu_si32 (void const* mem_addr) - /// MOVD xmm, reg/m32 + /// MOVD xmm1, m32 + /// VMOVD xmm1, m32 /// public static unsafe Vector128 LoadScalarVector128(uint* address) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_loadl_epi64 (__m128i const* mem_addr) - /// MOVQ xmm, reg/m64 + /// MOVQ xmm1, m64 + /// VMOVQ xmm1, m64 /// public static unsafe Vector128 LoadScalarVector128(long* address) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_loadl_epi64 (__m128i const* mem_addr) - /// MOVQ xmm, reg/m64 + /// MOVQ xmm1, m64 + /// VMOVQ xmm1, m64 /// public static unsafe Vector128 LoadScalarVector128(ulong* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128d _mm_load_sd (double const* mem_address) + /// MOVSD xmm1, m64 + /// VMOVSD xmm1, m64 + /// VMOVSD xmm1 {k1}, m64 + /// + public static unsafe Vector128 LoadScalarVector128(double* address) { throw new PlatformNotSupportedException(); } + + /// + /// __m128i _mm_loadu_si128 (__m128i const* mem_address) + /// MOVDQU xmm1, m128 + /// VMOVDQU xmm1, m128 + /// VMOVDQU8 xmm1 {k1}{z}, m128 + /// + public static unsafe Vector128 LoadVector128(sbyte* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_loadu_si128 (__m128i const* mem_address) + /// MOVDQU xmm1, m128 + /// VMOVDQU xmm1, m128 + /// VMOVDQU8 xmm1 {k1}{z}, m128 + /// + public static unsafe Vector128 LoadVector128(byte* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_loadu_si128 (__m128i const* mem_address) + /// MOVDQU xmm1, m128 + /// VMOVDQU xmm1, m128 + /// VMOVDQU16 xmm1 {k1}{z}, m128 + /// + public static unsafe Vector128 LoadVector128(short* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_loadu_si128 (__m128i const* mem_address) + /// MOVDQU xmm1, m128 + /// VMOVDQU xmm1, m128 + /// VMOVDQU16 xmm1 {k1}{z}, m128 + /// + public static unsafe Vector128 LoadVector128(ushort* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_loadu_si128 (__m128i const* mem_address) + /// MOVDQU xmm1, m128 + /// VMOVDQU xmm1, m128 + /// VMOVDQU32 xmm1 {k1}{z}, m128 + /// + public static unsafe Vector128 LoadVector128(int* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_loadu_si128 (__m128i const* mem_address) + /// MOVDQU xmm1, m128 + /// VMOVDQU xmm1, m128 + /// VMOVDQU32 xmm1 {k1}{z}, m128 + /// + public static unsafe Vector128 LoadVector128(uint* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_loadu_si128 (__m128i const* mem_address) + /// MOVDQU xmm1, m128 + /// VMOVDQU xmm1, m128 + /// VMOVDQU64 xmm1 {k1}{z}, m128 + /// + public static unsafe Vector128 LoadVector128(long* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_loadu_si128 (__m128i const* mem_address) + /// MOVDQU xmm1, m128 + /// VMOVDQU xmm1, m128 + /// VMOVDQU64 xmm1 {k1}{z}, m128 + /// + public static unsafe Vector128 LoadVector128(ulong* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128d _mm_loadu_pd (double const* mem_address) + /// MOVUPD xmm1, m128 + /// VMOVUPD xmm1, m128 + /// VMOVUPD xmm1 {k1}{z}, m128 + /// + public static unsafe Vector128 LoadVector128(double* address) { throw new PlatformNotSupportedException(); } /// /// void _mm_maskmoveu_si128 (__m128i a, __m128i mask, char* mem_address) - /// MASKMOVDQU xmm, xmm + /// MASKMOVDQU xmm1, xmm2 ; Address: EDI/RDI + /// VMASKMOVDQU xmm1, xmm2 ; Address: EDI/RDI /// public static unsafe void MaskMove(Vector128 source, Vector128 mask, sbyte* address) { throw new PlatformNotSupportedException(); } /// /// void _mm_maskmoveu_si128 (__m128i a, __m128i mask, char* mem_address) - /// MASKMOVDQU xmm, xmm + /// MASKMOVDQU xmm1, xmm2 ; Address: EDI/RDI + /// VMASKMOVDQU xmm1, xmm2 ; Address: EDI/RDI /// public static unsafe void MaskMove(Vector128 source, Vector128 mask, byte* address) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_max_epu8 (__m128i a, __m128i b) - /// PMAXUB xmm, xmm/m128 + /// PMAXUB xmm1, xmm2/m128 + /// VPMAXUB xmm1, xmm2, xmm3/m128 + /// VPMAXUB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Max(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_max_epi16 (__m128i a, __m128i b) - /// PMAXSW xmm, xmm/m128 + /// PMAXSW xmm1, xmm2/m128 + /// VPMAXSW xmm1, xmm2, xmm3/m128 + /// VPMAXSW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Max(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_max_pd (__m128d a, __m128d b) - /// MAXPD xmm, xmm/m128 + /// MAXPD xmm1, xmm2/m128 + /// VMAXPD xmm1, xmm2, xmm3/m128 + /// VMAXPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Max(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_max_sd (__m128d a, __m128d b) - /// MAXSD xmm, xmm/m64 + /// MAXSD xmm1, xmm2/m64 + /// VMAXSD xmm1, xmm2, xmm3/m64 /// public static Vector128 MaxScalar(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } @@ -835,631 +1008,796 @@ internal X64() { } /// /// __m128i _mm_min_epu8 (__m128i a, __m128i b) - /// PMINUB xmm, xmm/m128 + /// PMINUB xmm1, xmm2/m128 + /// VPMINUB xmm1, xmm2, xmm3/m128 + /// VPMINUB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Min(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_min_epi16 (__m128i a, __m128i b) - /// PMINSW xmm, xmm/m128 + /// PMINSW xmm1, xmm2/m128 + /// VPMINSW xmm1, xmm2, xmm3/m128 + /// VPMINSW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Min(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_min_pd (__m128d a, __m128d b) - /// MINPD xmm, xmm/m128 + /// MINPD xmm1, xmm2/m128 + /// VMINPD xmm1, xmm2, xmm3/m128 + /// VMINPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Min(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_min_sd (__m128d a, __m128d b) - /// MINSD xmm, xmm/m64 + /// MINSD xmm1, xmm2/m64 + /// VMINSD xmm1, xmm2, xmm3/m64 /// public static Vector128 MinScalar(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// - /// __m128d _mm_move_sd (__m128d a, __m128d b) - /// MOVSD xmm, xmm - /// - public static Vector128 MoveScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// int _mm_movemask_epi8 (__m128i a) - /// PMOVMSKB reg, xmm + /// PMOVMSKB r32, xmm1 + /// VPMOVMSKB r32, xmm1 /// public static int MoveMask(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// int _mm_movemask_epi8 (__m128i a) - /// PMOVMSKB reg, xmm + /// PMOVMSKB r32, xmm1 + /// VPMOVMSKB r32, xmm1 /// public static int MoveMask(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// int _mm_movemask_pd (__m128d a) - /// MOVMSKPD reg, xmm + /// MOVMSKPD r32, xmm1 + /// VMOVMSKPD r32, xmm1 /// public static int MoveMask(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_move_epi64 (__m128i a) - /// MOVQ xmm, xmm + /// MOVQ xmm1, xmm2 + /// VMOVQ xmm1, xmm2 /// public static Vector128 MoveScalar(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_move_epi64 (__m128i a) - /// MOVQ xmm, xmm + /// MOVQ xmm1, xmm2 + /// VMOVQ xmm1, xmm2 /// public static Vector128 MoveScalar(Vector128 value) { throw new PlatformNotSupportedException(); } + /// + /// __m128d _mm_move_sd (__m128d a, __m128d b) + /// MOVSD xmm1, xmm2 + /// VMOVSD xmm1, xmm2, xmm3 + /// VMOVSD xmm1 {k1}{z}, xmm2, xmm3 + /// + public static Vector128 MoveScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_mul_epu32 (__m128i a, __m128i b) - /// PMULUDQ xmm, xmm/m128 + /// PMULUDQ xmm1, xmm2/m128 + /// VPMULUDQ xmm1, xmm2, xmm3/m128 + /// VPMULUDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Multiply(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_mul_pd (__m128d a, __m128d b) - /// MULPD xmm, xmm/m128 + /// MULPD xmm1, xmm2/m128 + /// VMULPD xmm1, xmm2, xmm3/m128 + /// VMULPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Multiply(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_mul_sd (__m128d a, __m128d b) - /// MULSD xmm, xmm/m64 + /// __m128i _mm_madd_epi16 (__m128i a, __m128i b) + /// PMADDWD xmm1, xmm2/m128 + /// VPMADDWD xmm1, xmm2, xmm3/m128 + /// VPMADDWD xmm1 {k1}{z}, xmm2, xmm3/m128 /// - public static Vector128 MultiplyScalar(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + public static Vector128 MultiplyAddAdjacent(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_mulhi_epi16 (__m128i a, __m128i b) - /// PMULHW xmm, xmm/m128 + /// PMULHW xmm1, xmm2/m128 + /// VPMULHW xmm1, xmm2, xmm3/m128 + /// VPMULHW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 MultiplyHigh(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_mulhi_epu16 (__m128i a, __m128i b) - /// PMULHUW xmm, xmm/m128 + /// PMULHUW xmm1, xmm2/m128 + /// VPMULHUW xmm1, xmm2, xmm3/m128 + /// VPMULHUW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 MultiplyHigh(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// - /// __m128i _mm_madd_epi16 (__m128i a, __m128i b) - /// PMADDWD xmm, xmm/m128 - /// - public static Vector128 MultiplyAddAdjacent(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_mullo_epi16 (__m128i a, __m128i b) - /// PMULLW xmm, xmm/m128 + /// PMULLW xmm1, xmm2/m128 + /// VPMULLW xmm1, xmm2, xmm3/m128 + /// VPMULLW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 MultiplyLow(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_mullo_epi16 (__m128i a, __m128i b) - /// PMULLW xmm, xmm/m128 + /// PMULLW xmm1, xmm2/m128 + /// VPMULLW xmm1, xmm2, xmm3/m128 + /// VPMULLW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 MultiplyLow(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + /// + /// __m128d _mm_mul_sd (__m128d a, __m128d b) + /// MULSD xmm1, xmm2/m64 + /// VMULSD xmm1, xmm2, xmm3/m64 + /// + public static Vector128 MultiplyScalar(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + /// /// __m128i _mm_or_si128 (__m128i a, __m128i b) - /// POR xmm, xmm/m128 + /// POR xmm1, xmm2/m128 + /// VPOR xmm1, xmm2, xmm3/m128 /// public static Vector128 Or(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_or_si128 (__m128i a, __m128i b) - /// POR xmm, xmm/m128 + /// POR xmm1, xmm2/m128 + /// VPOR xmm1, xmm2, xmm3/m128 /// public static Vector128 Or(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_or_si128 (__m128i a, __m128i b) - /// POR xmm, xmm/m128 + /// POR xmm1, xmm2/m128 + /// VPOR xmm1, xmm2, xmm3/m128 /// public static Vector128 Or(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_or_si128 (__m128i a, __m128i b) - /// POR xmm, xmm/m128 + /// POR xmm1, xmm2/m128 + /// VPOR xmm1, xmm2, xmm3/m128 /// public static Vector128 Or(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_or_si128 (__m128i a, __m128i b) - /// POR xmm, xmm/m128 + /// POR xmm1, xmm2/m128 + /// VPOR xmm1, xmm2, xmm3/m128 + /// VPORD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Or(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_or_si128 (__m128i a, __m128i b) - /// POR xmm, xmm/m128 + /// POR xmm1, xmm2/m128 + /// VPOR xmm1, xmm2, xmm3/m128 + /// VPORD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Or(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_or_si128 (__m128i a, __m128i b) - /// POR xmm, xmm/m128 + /// POR xmm1, xmm2/m128 + /// VPOR xmm1, xmm2, xmm3/m128 + /// VPORQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Or(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_or_si128 (__m128i a, __m128i b) - /// POR xmm, xmm/m128 + /// POR xmm1, xmm2/m128 + /// VPOR xmm1, xmm2, xmm3/m128 + /// VPORQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Or(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_or_pd (__m128d a, __m128d b) - /// ORPD xmm, xmm/m128 + /// ORPD xmm1, xmm2/m128 + /// VORPD xmm1, xmm2, xmm3/m128 + /// VORPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Or(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_packs_epi16 (__m128i a, __m128i b) - /// PACKSSWB xmm, xmm/m128 + /// PACKSSWB xmm1, xmm2/m128 + /// VPACKSSWB xmm1, xmm2, xmm3/m128 + /// VPACKSSWB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 PackSignedSaturate(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_packs_epi32 (__m128i a, __m128i b) - /// PACKSSDW xmm, xmm/m128 + /// PACKSSDW xmm1, xmm2/m128 + /// VPACKSSDW xmm1, xmm2, xmm3/m128 + /// VPACKSSDW xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 PackSignedSaturate(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_packus_epi16 (__m128i a, __m128i b) - /// PACKUSWB xmm, xmm/m128 + /// PACKUSWB xmm1, xmm2/m128 + /// VPACKUSWB xmm1, xmm2, xmm3/m128 + /// VPACKUSWB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 PackUnsignedSaturate(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// - /// __m128i _mm_sad_epu8 (__m128i a, __m128i b) - /// PSADBW xmm, xmm/m128 - /// - public static Vector128 SumAbsoluteDifferences(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - - /// - /// __m128i _mm_shuffle_epi32 (__m128i a, int immediate) - /// PSHUFD xmm, xmm/m128, imm8 - /// - public static Vector128 Shuffle(Vector128 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - /// - /// __m128i _mm_shuffle_epi32 (__m128i a, int immediate) - /// PSHUFD xmm, xmm/m128, imm8 - /// - public static Vector128 Shuffle(Vector128 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - /// - /// __m128d _mm_shuffle_pd (__m128d a, __m128d b, int immediate) - /// SHUFPD xmm, xmm/m128, imm8 - /// - public static Vector128 Shuffle(Vector128 left, Vector128 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - - /// - /// __m128i _mm_shufflehi_epi16 (__m128i a, int immediate) - /// PSHUFHW xmm, xmm/m128, imm8 - /// - public static Vector128 ShuffleHigh(Vector128 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - /// - /// __m128i _mm_shufflehi_epi16 (__m128i a, int control) - /// PSHUFHW xmm, xmm/m128, imm8 - /// - public static Vector128 ShuffleHigh(Vector128 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - - /// - /// __m128i _mm_shufflelo_epi16 (__m128i a, int control) - /// PSHUFLW xmm, xmm/m128, imm8 - /// - public static Vector128 ShuffleLow(Vector128 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - /// - /// __m128i _mm_shufflelo_epi16 (__m128i a, int control) - /// PSHUFLW xmm, xmm/m128, imm8 - /// - public static Vector128 ShuffleLow(Vector128 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_sll_epi16 (__m128i a, __m128i count) - /// PSLLW xmm, xmm/m128 + /// PSLLW xmm1, xmm2/m128 + /// VPSLLW xmm1, xmm2, xmm3/m128 + /// VPSLLW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftLeftLogical(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_sll_epi16 (__m128i a, __m128i count) - /// PSLLW xmm, xmm/m128 + /// PSLLW xmm1, xmm2/m128 + /// VPSLLW xmm1, xmm2, xmm3/m128 + /// VPSLLW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftLeftLogical(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_sll_epi32 (__m128i a, __m128i count) - /// PSLLD xmm, xmm/m128 + /// PSLLD xmm1, xmm2/m128 + /// VPSLLD xmm1, xmm2, xmm3/m128 + /// VPSLLD xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftLeftLogical(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_sll_epi32 (__m128i a, __m128i count) - /// PSLLD xmm, xmm/m128 + /// PSLLD xmm1, xmm2/m128 + /// VPSLLD xmm1, xmm2, xmm3/m128 + /// VPSLLD xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftLeftLogical(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_sll_epi64 (__m128i a, __m128i count) - /// PSLLQ xmm, xmm/m128 + /// PSLLQ xmm1, xmm2/m128 + /// VPSLLQ xmm1, xmm2, xmm3/m128 + /// VPSLLQ xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftLeftLogical(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_sll_epi64 (__m128i a, __m128i count) - /// PSLLQ xmm, xmm/m128 + /// PSLLQ xmm1, xmm2/m128 + /// VPSLLQ xmm1, xmm2, xmm3/m128 + /// VPSLLQ xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftLeftLogical(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_slli_epi16 (__m128i a, int immediate) - /// PSLLW xmm, imm8 + /// PSLLW xmm1, imm8 + /// VPSLLW xmm1, xmm2, imm8 + /// VPSLLW xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftLeftLogical(Vector128 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_slli_epi16 (__m128i a, int immediate) - /// PSLLW xmm, imm8 + /// PSLLW xmm1, imm8 + /// VPSLLW xmm1, xmm2, imm8 + /// VPSLLW xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftLeftLogical(Vector128 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_slli_epi32 (__m128i a, int immediate) - /// PSLLD xmm, imm8 + /// PSLLD xmm1, imm8 + /// VPSLLD xmm1, xmm2, imm8 + /// VPSLLD xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftLeftLogical(Vector128 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_slli_epi32 (__m128i a, int immediate) - /// PSLLD xmm, imm8 + /// PSLLD xmm1, imm8 + /// VPSLLD xmm1, xmm2, imm8 + /// VPSLLD xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftLeftLogical(Vector128 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_slli_epi64 (__m128i a, int immediate) - /// PSLLQ xmm, imm8 + /// PSLLQ xmm1, imm8 + /// VPSLLQ xmm1, xmm2, imm8 + /// VPSLLQ xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftLeftLogical(Vector128 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_slli_epi64 (__m128i a, int immediate) - /// PSLLQ xmm, imm8 + /// PSLLQ xmm1, imm8 + /// VPSLLQ xmm1, xmm2, imm8 + /// VPSLLQ xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftLeftLogical(Vector128 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_bslli_si128 (__m128i a, int imm8) - /// PSLLDQ xmm, imm8 + /// PSLLDQ xmm1, imm8 + /// VPSLLDQ xmm1, xmm2/m128, imm8 /// public static Vector128 ShiftLeftLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_bslli_si128 (__m128i a, int imm8) - /// PSLLDQ xmm, imm8 + /// PSLLDQ xmm1, imm8 + /// VPSLLDQ xmm1, xmm2/m128, imm8 /// public static Vector128 ShiftLeftLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_bslli_si128 (__m128i a, int imm8) - /// PSLLDQ xmm, imm8 + /// PSLLDQ xmm1, imm8 + /// VPSLLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftLeftLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_bslli_si128 (__m128i a, int imm8) - /// PSLLDQ xmm, imm8 + /// PSLLDQ xmm1, imm8 + /// VPSLLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftLeftLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_bslli_si128 (__m128i a, int imm8) - /// PSLLDQ xmm, imm8 + /// PSLLDQ xmm1, imm8 + /// VPSLLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftLeftLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_bslli_si128 (__m128i a, int imm8) - /// PSLLDQ xmm, imm8 + /// PSLLDQ xmm1, imm8 + /// VPSLLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftLeftLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_bslli_si128 (__m128i a, int imm8) - /// PSLLDQ xmm, imm8 + /// PSLLDQ xmm1, imm8 + /// VPSLLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftLeftLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_bslli_si128 (__m128i a, int imm8) - /// PSLLDQ xmm, imm8 + /// PSLLDQ xmm1, imm8 + /// VPSLLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftLeftLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_sra_epi16 (__m128i a, __m128i count) - /// PSRAW xmm, xmm/m128 + /// PSRAW xmm1, xmm2/m128 + /// VPSRAW xmm1, xmm2, xmm3/m128 + /// VPSRAW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftRightArithmetic(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_sra_epi32 (__m128i a, __m128i count) - /// PSRAD xmm, xmm/m128 + /// PSRAD xmm1, xmm2/m128 + /// VPSRAD xmm1, xmm2, xmm3/m128 + /// VPSRAD xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftRightArithmetic(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_srai_epi16 (__m128i a, int immediate) - /// PSRAW xmm, imm8 + /// PSRAW xmm1, imm8 + /// VPSRAW xmm1, xmm2, imm8 + /// VPSRAW xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftRightArithmetic(Vector128 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_srai_epi32 (__m128i a, int immediate) - /// PSRAD xmm, imm8 + /// PSRAD xmm1, imm8 + /// VPSRAD xmm1, xmm2, imm8 + /// VPSRAD xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftRightArithmetic(Vector128 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_srl_epi16 (__m128i a, __m128i count) - /// PSRLW xmm, xmm/m128 + /// PSRLW xmm1, xmm2/m128 + /// VPSRLW xmm1, xmm2, xmm3/m128 + /// VPSRLW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftRightLogical(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_srl_epi16 (__m128i a, __m128i count) - /// PSRLW xmm, xmm/m128 + /// PSRLW xmm1, xmm2/m128 + /// VPSRLW xmm1, xmm2, xmm3/m128 + /// VPSRLW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftRightLogical(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_srl_epi32 (__m128i a, __m128i count) - /// PSRLD xmm, xmm/m128 + /// PSRLD xmm1, xmm2/m128 + /// VPSRLD xmm1, xmm2, xmm3/m128 + /// VPSRLD xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftRightLogical(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_srl_epi32 (__m128i a, __m128i count) - /// PSRLD xmm, xmm/m128 + /// PSRLD xmm1, xmm2/m128 + /// VPSRLD xmm1, xmm2, xmm3/m128 + /// VPSRLD xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftRightLogical(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_srl_epi64 (__m128i a, __m128i count) - /// PSRLQ xmm, xmm/m128 + /// PSRLQ xmm1, xmm2/m128 + /// VPSRLQ xmm1, xmm2, xmm3/m128 + /// VPSRLQ xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftRightLogical(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_srl_epi64 (__m128i a, __m128i count) - /// PSRLQ xmm, xmm/m128 + /// PSRLQ xmm1, xmm2/m128 + /// VPSRLQ xmm1, xmm2, xmm3/m128 + /// VPSRLQ xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftRightLogical(Vector128 value, Vector128 count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_srli_epi16 (__m128i a, int immediate) - /// PSRLW xmm, imm8 + /// PSRLW xmm1, imm8 + /// VPSRLW xmm1, xmm2, imm8 + /// VPSRLW xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftRightLogical(Vector128 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_srli_epi16 (__m128i a, int immediate) - /// PSRLW xmm, imm8 + /// PSRLW xmm1, imm8 + /// VPSRLW xmm1, xmm2, imm8 + /// VPSRLW xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftRightLogical(Vector128 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_srli_epi32 (__m128i a, int immediate) - /// PSRLD xmm, imm8 + /// PSRLD xmm1, imm8 + /// VPSRLD xmm1, xmm2, imm8 + /// VPSRLD xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftRightLogical(Vector128 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_srli_epi32 (__m128i a, int immediate) - /// PSRLD xmm, imm8 + /// PSRLD xmm1, imm8 + /// VPSRLD xmm1, xmm2, imm8 + /// VPSRLD xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftRightLogical(Vector128 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_srli_epi64 (__m128i a, int immediate) - /// PSRLQ xmm, imm8 + /// PSRLQ xmm1, imm8 + /// VPSRLQ xmm1, xmm2, imm8 + /// VPSRLQ xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftRightLogical(Vector128 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_srli_epi64 (__m128i a, int immediate) - /// PSRLQ xmm, imm8 + /// PSRLQ xmm1, imm8 + /// VPSRLQ xmm1, xmm2, imm8 + /// VPSRLQ xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftRightLogical(Vector128 value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_bsrli_si128 (__m128i a, int imm8) - /// PSRLDQ xmm, imm8 + /// PSRLDQ xmm1, imm8 + /// VPSRLDQ xmm1, xmm2/m128, imm8 /// public static Vector128 ShiftRightLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_bsrli_si128 (__m128i a, int imm8) - /// PSRLDQ xmm, imm8 + /// PSRLDQ xmm1, imm8 + /// VPSRLDQ xmm1, xmm2/m128, imm8 /// public static Vector128 ShiftRightLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_bsrli_si128 (__m128i a, int imm8) - /// PSRLDQ xmm, imm8 + /// PSRLDQ xmm1, imm8 + /// VPSRLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftRightLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_bsrli_si128 (__m128i a, int imm8) - /// PSRLDQ xmm, imm8 + /// PSRLDQ xmm1, imm8 + /// VPSRLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftRightLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_bsrli_si128 (__m128i a, int imm8) - /// PSRLDQ xmm, imm8 + /// PSRLDQ xmm1, imm8 + /// VPSRLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftRightLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_bsrli_si128 (__m128i a, int imm8) - /// PSRLDQ xmm, imm8 + /// PSRLDQ xmm1, imm8 + /// VPSRLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftRightLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_bsrli_si128 (__m128i a, int imm8) - /// PSRLDQ xmm, imm8 + /// PSRLDQ xmm1, imm8 + /// VPSRLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftRightLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_bsrli_si128 (__m128i a, int imm8) - /// PSRLDQ xmm, imm8 + /// PSRLDQ xmm1, imm8 + /// VPSRLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftRightLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_shuffle_epi32 (__m128i a, int immediate) + /// PSHUFD xmm1, xmm2/m128, imm8 + /// VPSHUFD xmm1, xmm2/m128, imm8 + /// VPSHUFD xmm1 {k1}{z}, xmm2/m128/m32bcst, imm8 + /// + public static Vector128 Shuffle(Vector128 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_shuffle_epi32 (__m128i a, int immediate) + /// PSHUFD xmm1, xmm2/m128, imm8 + /// VPSHUFD xmm1, xmm2/m128, imm8 + /// VPSHUFD xmm1 {k1}{z}, xmm2/m128/m32bcst, imm8 + /// + public static Vector128 Shuffle(Vector128 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } + /// + /// __m128d _mm_shuffle_pd (__m128d a, __m128d b, int immediate) + /// SHUFPD xmm1, xmm2/m128, imm8 + /// VSHUFPD xmm1, xmm2, xmm3/m128, imm8 + /// VSHUFPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst, imm8 + /// + public static Vector128 Shuffle(Vector128 left, Vector128 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } + + /// + /// __m128i _mm_shufflehi_epi16 (__m128i a, int immediate) + /// PSHUFHW xmm1, xmm2/m128, imm8 + /// VPSHUFHW xmm1, xmm2/m128, imm8 + /// VPSHUFHW xmm1 {k1}{z}, xmm2/m128, imm8 + /// + public static Vector128 ShuffleHigh(Vector128 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_shufflehi_epi16 (__m128i a, int control) + /// PSHUFHW xmm1, xmm2/m128, imm8 + /// VPSHUFHW xmm1, xmm2/m128, imm8 + /// VPSHUFHW xmm1 {k1}{z}, xmm2/m128, imm8 + /// + public static Vector128 ShuffleHigh(Vector128 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } + + /// + /// __m128i _mm_shufflelo_epi16 (__m128i a, int control) + /// PSHUFLW xmm1, xmm2/m128, imm8 + /// VPSHUFLW xmm1, xmm2/m128, imm8 + /// VPSHUFLW xmm1 {k1}{z}, xmm2/m128, imm8 + /// + public static Vector128 ShuffleLow(Vector128 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_shufflelo_epi16 (__m128i a, int control) + /// PSHUFLW xmm1, xmm2/m128, imm8 + /// VPSHUFLW xmm1, xmm2/m128, imm8 + /// VPSHUFLW xmm1 {k1}{z}, xmm2/m128, imm8 + /// + public static Vector128 ShuffleLow(Vector128 value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } + /// /// __m128d _mm_sqrt_pd (__m128d a) - /// SQRTPD xmm, xmm/m128 + /// SQRTPD xmm1, xmm2/m128 + /// VSQRTPD xmm1, xmm2/m128 + /// VSQRTPD xmm1 {k1}{z}, xmm2/m128/m64bcst /// public static Vector128 Sqrt(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_sqrt_sd (__m128d a) - /// SQRTSD xmm, xmm/64 + /// SQRTSD xmm1, xmm2/m64 + /// VSQRTSD xmm1, xmm2, xmm3/m64 + /// VSQRTSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// public static Vector128 SqrtScalar(Vector128 value) { throw new PlatformNotSupportedException(); } - /// /// __m128d _mm_sqrt_sd (__m128d a, __m128d b) - /// SQRTSD xmm, xmm/64 + /// SQRTSD xmm1, xmm2/m64 + /// VSQRTSD xmm1, xmm2, xmm3/m64 + /// VSQRTSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} /// public static Vector128 SqrtScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// void _mm_store_sd (double* mem_addr, __m128d a) - /// MOVSD m64, xmm + /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) + /// MOVDQU m128, xmm1 + /// VMOVDQU m128, xmm1 + /// VMOVDQU8 m128 {k1}{z}, xmm1 /// - public static unsafe void StoreScalar(double* address, Vector128 source) { throw new PlatformNotSupportedException(); } + public static unsafe void Store(sbyte* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// - /// void _mm_storeu_si32 (void* mem_addr, __m128i a) - /// MOVD m32, xmm + /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) + /// MOVDQU m128, xmm1 + /// VMOVDQU m128, xmm1 + /// VMOVDQU8 m128 {k1}{z}, xmm1 /// - public static unsafe void StoreScalar(int* address, Vector128 source) { throw new PlatformNotSupportedException(); } + public static unsafe void Store(byte* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// - /// void _mm_storel_epi64 (__m128i* mem_addr, __m128i a) - /// MOVQ m64, xmm + /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) + /// MOVDQU m128, xmm1 + /// VMOVDQU m128, xmm1 + /// VMOVDQU16 m128 {k1}{z}, xmm1 /// - public static unsafe void StoreScalar(long* address, Vector128 source) { throw new PlatformNotSupportedException(); } + public static unsafe void Store(short* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// - /// void _mm_storeu_si32 (void* mem_addr, __m128i a) - /// MOVD m32, xmm + /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) + /// MOVDQU m128, xmm1 + /// VMOVDQU m128, xmm1 + /// VMOVDQU16 m128 {k1}{z}, xmm1 /// - public static unsafe void StoreScalar(uint* address, Vector128 source) { throw new PlatformNotSupportedException(); } + public static unsafe void Store(ushort* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// - /// void _mm_storel_epi64 (__m128i* mem_addr, __m128i a) - /// MOVQ m64, xmm + /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) + /// MOVDQU m128, xmm1 + /// VMOVDQU m128, xmm1 + /// VMOVDQU32 m128 {k1}{z}, xmm1 /// - public static unsafe void StoreScalar(ulong* address, Vector128 source) { throw new PlatformNotSupportedException(); } + public static unsafe void Store(int* address, Vector128 source) { throw new PlatformNotSupportedException(); } + /// + /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) + /// MOVDQU m128, xmm1 + /// VMOVDQU m128, xmm1 + /// VMOVDQU32 m128 {k1}{z}, xmm1 + /// + public static unsafe void Store(uint* address, Vector128 source) { throw new PlatformNotSupportedException(); } + /// + /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) + /// MOVDQU m128, xmm1 + /// VMOVDQU m128, xmm1 + /// VMOVDQU64 m128 {k1}{z}, xmm1 + /// + public static unsafe void Store(long* address, Vector128 source) { throw new PlatformNotSupportedException(); } + /// + /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) + /// MOVDQU m128, xmm1 + /// VMOVDQU m128, xmm1 + /// VMOVDQU64 m128 {k1}{z}, xmm1 + /// + public static unsafe void Store(ulong* address, Vector128 source) { throw new PlatformNotSupportedException(); } + /// + /// void _mm_storeu_pd (double* mem_addr, __m128d a) + /// MOVAPD m128, xmm1 + /// VMOVAPD m128, xmm1 + /// VMOVAPD m128 {k1}{z}, xmm1 + /// + public static unsafe void Store(double* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_store_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQA m128, xmm + /// MOVDQA m128, xmm1 + /// VMOVDQA m128, xmm1 + /// VMOVDQA32 m128 {k1}{z}, xmm1 /// public static unsafe void StoreAligned(sbyte* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_store_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQA m128, xmm + /// MOVDQA m128, xmm1 + /// VMOVDQA m128, xmm1 + /// VMOVDQA32 m128 {k1}{z}, xmm1 /// public static unsafe void StoreAligned(byte* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_store_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQA m128, xmm + /// MOVDQA m128, xmm1 + /// VMOVDQA m128, xmm1 + /// VMOVDQA32 m128 {k1}{z}, xmm1 /// public static unsafe void StoreAligned(short* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_store_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQA m128, xmm + /// MOVDQA m128, xmm1 + /// VMOVDQA m128, xmm1 + /// VMOVDQA32 m128 {k1}{z}, xmm1 /// public static unsafe void StoreAligned(ushort* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_store_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQA m128, xmm + /// MOVDQA m128, xmm1 + /// VMOVDQA m128, xmm1 + /// VMOVDQA32 m128 {k1}{z}, xmm1 /// public static unsafe void StoreAligned(int* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_store_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQA m128, xmm + /// MOVDQA m128, xmm1 + /// VMOVDQA m128, xmm1 + /// VMOVDQA32 m128 {k1}{z}, xmm1 /// public static unsafe void StoreAligned(uint* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_store_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQA m128, xmm + /// MOVDQA m128, xmm1 + /// VMOVDQA m128, xmm1 + /// VMOVDQA64 m128 {k1}{z}, xmm1 /// public static unsafe void StoreAligned(long* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_store_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQA m128, xmm + /// MOVDQA m128, xmm1 + /// VMOVDQA m128, xmm1 + /// VMOVDQA64 m128 {k1}{z}, xmm1 /// public static unsafe void StoreAligned(ulong* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_store_pd (double* mem_addr, __m128d a) - /// MOVAPD m128, xmm + /// MOVAPD m128, xmm1 + /// VMOVAPD m128, xmm1 + /// VMOVAPD m128 {k1}{z}, xmm1 /// public static unsafe void StoreAligned(double* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a) - /// MOVNTDQ m128, xmm + /// MOVNTDQ m128, xmm1 + /// VMOVNTDQ m128, xmm1 /// public static unsafe void StoreAlignedNonTemporal(sbyte* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a) - /// MOVNTDQ m128, xmm + /// MOVNTDQ m128, xmm1 + /// VMOVNTDQ m128, xmm1 /// public static unsafe void StoreAlignedNonTemporal(byte* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a) - /// MOVNTDQ m128, xmm + /// MOVNTDQ m128, xmm1 + /// VMOVNTDQ m128, xmm1 /// public static unsafe void StoreAlignedNonTemporal(short* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a) - /// MOVNTDQ m128, xmm + /// MOVNTDQ m128, xmm1 + /// VMOVNTDQ m128, xmm1 /// public static unsafe void StoreAlignedNonTemporal(ushort* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a) - /// MOVNTDQ m128, xmm + /// MOVNTDQ m128, xmm1 + /// VMOVNTDQ m128, xmm1 /// public static unsafe void StoreAlignedNonTemporal(int* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a) - /// MOVNTDQ m128, xmm + /// MOVNTDQ m128, xmm1 + /// VMOVNTDQ m128, xmm1 /// public static unsafe void StoreAlignedNonTemporal(uint* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a) - /// MOVNTDQ m128, xmm + /// MOVNTDQ m128, xmm1 + /// VMOVNTDQ m128, xmm1 /// public static unsafe void StoreAlignedNonTemporal(long* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a) - /// MOVNTDQ m128, xmm + /// MOVNTDQ m128, xmm1 + /// VMOVNTDQ m128, xmm1 /// public static unsafe void StoreAlignedNonTemporal(ulong* address, Vector128 source) { throw new PlatformNotSupportedException(); } /// /// void _mm_stream_pd (double* mem_addr, __m128d a) - /// MOVNTPD m128, xmm + /// MOVNTPD m128, xmm1 + /// VMOVNTPD m128, xmm1 /// public static unsafe void StoreAlignedNonTemporal(double* address, Vector128 source) { throw new PlatformNotSupportedException(); } - /// - /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQU m128, xmm - /// - public static unsafe void Store(sbyte* address, Vector128 source) { throw new PlatformNotSupportedException(); } - /// - /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQU m128, xmm - /// - public static unsafe void Store(byte* address, Vector128 source) { throw new PlatformNotSupportedException(); } - /// - /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQU m128, xmm - /// - public static unsafe void Store(short* address, Vector128 source) { throw new PlatformNotSupportedException(); } - /// - /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQU m128, xmm - /// - public static unsafe void Store(ushort* address, Vector128 source) { throw new PlatformNotSupportedException(); } - /// - /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQU m128, xmm - /// - public static unsafe void Store(int* address, Vector128 source) { throw new PlatformNotSupportedException(); } - /// - /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQU m128, xmm - /// - public static unsafe void Store(uint* address, Vector128 source) { throw new PlatformNotSupportedException(); } - /// - /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQU m128, xmm - /// - public static unsafe void Store(long* address, Vector128 source) { throw new PlatformNotSupportedException(); } - /// - /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQU m128, xmm - /// - public static unsafe void Store(ulong* address, Vector128 source) { throw new PlatformNotSupportedException(); } - /// - /// void _mm_storeu_pd (double* mem_addr, __m128d a) - /// MOVUPD m128, xmm - /// - public static unsafe void Store(double* address, Vector128 source) { throw new PlatformNotSupportedException(); } - /// /// void _mm_storeh_pd (double* mem_addr, __m128d a) - /// MOVHPD m64, xmm + /// MOVHPD m64, xmm1 + /// VMOVHPD m64, xmm1 /// public static unsafe void StoreHigh(double* address, Vector128 source) { throw new PlatformNotSupportedException(); } - /// /// void _mm_storel_pd (double* mem_addr, __m128d a) - /// MOVLPD m64, xmm + /// MOVLPD m64, xmm1 + /// VMOVLPD m64, xmm1 /// public static unsafe void StoreLow(double* address, Vector128 source) { throw new PlatformNotSupportedException(); } @@ -1474,214 +1812,332 @@ internal X64() { } /// public static unsafe void StoreNonTemporal(uint* address, uint value) { throw new PlatformNotSupportedException(); } + /// + /// void _mm_storeu_si32 (void* mem_addr, __m128i a) + /// MOVD m32, xmm1 + /// VMOVD m32, xmm1 + /// + public static unsafe void StoreScalar(int* address, Vector128 source) { throw new PlatformNotSupportedException(); } + /// + /// void _mm_storeu_si32 (void* mem_addr, __m128i a) + /// MOVD m32, xmm1 + /// VMOVD m32, xmm1 + /// + public static unsafe void StoreScalar(uint* address, Vector128 source) { throw new PlatformNotSupportedException(); } + /// + /// void _mm_storel_epi64 (__m128i* mem_addr, __m128i a) + /// MOVQ m64, xmm1 + /// VMOVQ m64, xmm1 + /// + public static unsafe void StoreScalar(long* address, Vector128 source) { throw new PlatformNotSupportedException(); } + /// + /// void _mm_storel_epi64 (__m128i* mem_addr, __m128i a) + /// MOVQ m64, xmm1 + /// VMOVQ m64, xmm1 + /// + public static unsafe void StoreScalar(ulong* address, Vector128 source) { throw new PlatformNotSupportedException(); } + /// + /// void _mm_store_sd (double* mem_addr, __m128d a) + /// MOVSD m64, xmm1 + /// VMOVSD m64, xmm1 + /// VMOVSD m64 {k1}, xmm1 + /// + public static unsafe void StoreScalar(double* address, Vector128 source) { throw new PlatformNotSupportedException(); } + /// /// __m128i _mm_sub_epi8 (__m128i a, __m128i b) - /// PSUBB xmm, xmm/m128 + /// PSUBB xmm1, xmm2/m128 + /// VPSUBB xmm1, xmm2, xmm3/m128 + /// VPSUBB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Subtract(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_sub_epi8 (__m128i a, __m128i b) - /// PSUBB xmm, xmm/m128 + /// PSUBB xmm1, xmm2/m128 + /// VPSUBB xmm1, xmm2, xmm3/m128 + /// VPSUBB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Subtract(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_sub_epi16 (__m128i a, __m128i b) - /// PSUBW xmm, xmm/m128 + /// PSUBW xmm1, xmm2/m128 + /// VPSUBW xmm1, xmm2, xmm3/m128 + /// VPSUBW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Subtract(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_sub_epi16 (__m128i a, __m128i b) - /// PSUBW xmm, xmm/m128 + /// PSUBW xmm1, xmm2/m128 + /// VPSUBW xmm1, xmm2, xmm3/m128 + /// VPSUBW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Subtract(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_sub_epi32 (__m128i a, __m128i b) - /// PSUBD xmm, xmm/m128 + /// PSUBD xmm1, xmm2/m128 + /// VPSUBD xmm1, xmm2, xmm3/m128 + /// VPSUBD xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Subtract(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_sub_epi32 (__m128i a, __m128i b) - /// PSUBD xmm, xmm/m128 + /// PSUBD xmm1, xmm2/m128 + /// VPSUBD xmm1, xmm2, xmm3/m128 + /// VPSUBD xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Subtract(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_sub_epi64 (__m128i a, __m128i b) - /// PSUBQ xmm, xmm/m128 + /// PSUBQ xmm1, xmm2/m128 + /// VPSUBQ xmm1, xmm2, xmm3/m128 + /// VPSUBQ xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Subtract(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_sub_epi64 (__m128i a, __m128i b) - /// PSUBQ xmm, xmm/m128 + /// PSUBQ xmm1, xmm2/m128 + /// VPSUBQ xmm1, xmm2, xmm3/m128 + /// VPSUBQ xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Subtract(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_sub_pd (__m128d a, __m128d b) - /// SUBPD xmm, xmm/m128 + /// SUBPD xmm1, xmm2/m128 + /// VSUBPD xmm1, xmm2, xmm3/m128 + /// VSUBPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Subtract(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_sub_sd (__m128d a, __m128d b) - /// SUBSD xmm, xmm/m64 + /// SUBSD xmm1, xmm2/m64 + /// VSUBSD xmm1, xmm2, xmm3/m64 + /// VSUBSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} /// public static Vector128 SubtractScalar(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_subs_epi8 (__m128i a, __m128i b) - /// PSUBSB xmm, xmm/m128 + /// PSUBSB xmm1, xmm2/m128 + /// VPSUBSB xmm1, xmm2, xmm3/m128 + /// VPSUBSB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 SubtractSaturate(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_subs_epi16 (__m128i a, __m128i b) - /// PSUBSW xmm, xmm/m128 + /// PSUBSW xmm1, xmm2/m128 + /// VPSUBSW xmm1, xmm2, xmm3/m128 + /// VPSUBSW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 SubtractSaturate(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_subs_epu8 (__m128i a, __m128i b) - /// PSUBUSB xmm, xmm/m128 + /// PSUBUSB xmm1, xmm2/m128 + /// VPSUBUSB xmm1, xmm2, xmm3/m128 + /// VPSUBUSB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 SubtractSaturate(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_subs_epu16 (__m128i a, __m128i b) - /// PSUBUSW xmm, xmm/m128 + /// PSUBUSW xmm1, xmm2/m128 + /// VPSUBUSW xmm1, xmm2, xmm3/m128 + /// VPSUBUSW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 SubtractSaturate(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_sad_epu8 (__m128i a, __m128i b) + /// PSADBW xmm1, xmm2/m128 + /// VPSADBW xmm1, xmm2, xmm3/m128 + /// VPSADBW xmm1 {k1}{z}, xmm2, xmm3/m128 + /// + public static Vector128 SumAbsoluteDifferences(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + /// /// __m128i _mm_unpackhi_epi8 (__m128i a, __m128i b) - /// PUNPCKHBW xmm, xmm/m128 + /// PUNPCKHBW xmm1, xmm2/m128 + /// VPUNPCKHBW xmm1, xmm2, xmm3/m128 + /// VPUNPCKHBW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 UnpackHigh(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_unpackhi_epi8 (__m128i a, __m128i b) - /// PUNPCKHBW xmm, xmm/m128 + /// PUNPCKHBW xmm1, xmm2/m128 + /// VPUNPCKHBW xmm1, xmm2, xmm3/m128 + /// VPUNPCKHBW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 UnpackHigh(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_unpackhi_epi16 (__m128i a, __m128i b) - /// PUNPCKHWD xmm, xmm/m128 + /// PUNPCKHWD xmm1, xmm2/m128 + /// VPUNPCKHWD xmm1, xmm2, xmm3/m128 + /// VPUNPCKHWD xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 UnpackHigh(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_unpackhi_epi16 (__m128i a, __m128i b) - /// PUNPCKHWD xmm, xmm/m128 + /// PUNPCKHWD xmm1, xmm2/m128 + /// VPUNPCKHWD xmm1, xmm2, xmm3/m128 + /// VPUNPCKHWD xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 UnpackHigh(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_unpackhi_epi32 (__m128i a, __m128i b) - /// PUNPCKHDQ xmm, xmm/m128 + /// PUNPCKHDQ xmm1, xmm2/m128 + /// VPUNPCKHDQ xmm1, xmm2, xmm3/m128 + /// VPUNPCKHDQ xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 UnpackHigh(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_unpackhi_epi32 (__m128i a, __m128i b) - /// PUNPCKHDQ xmm, xmm/m128 + /// PUNPCKHDQ xmm1, xmm2/m128 + /// VPUNPCKHDQ xmm1, xmm2, xmm3/m128 + /// VPUNPCKHDQ xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 UnpackHigh(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_unpackhi_epi64 (__m128i a, __m128i b) - /// PUNPCKHQDQ xmm, xmm/m128 + /// PUNPCKHQDQ xmm1, xmm2/m128 + /// VPUNPCKHQDQ xmm1, xmm2, xmm3/m128 + /// VPUNPCKHQDQ xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 UnpackHigh(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_unpackhi_epi64 (__m128i a, __m128i b) - /// PUNPCKHQDQ xmm, xmm/m128 + /// PUNPCKHQDQ xmm1, xmm2/m128 + /// VPUNPCKHQDQ xmm1, xmm2, xmm3/m128 + /// VPUNPCKHQDQ xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 UnpackHigh(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_unpackhi_pd (__m128d a, __m128d b) - /// UNPCKHPD xmm, xmm/m128 + /// UNPCKHPD xmm1, xmm2/m128 + /// VUNPCKHPD xmm1, xmm2, xmm3/m128 + /// VUNPCKHPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 UnpackHigh(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_unpacklo_epi8 (__m128i a, __m128i b) - /// PUNPCKLBW xmm, xmm/m128 + /// PUNPCKLBW xmm1, xmm2/m128 + /// VPUNPCKLBW xmm1, xmm2, xmm3/m128 + /// VPUNPCKLBW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 UnpackLow(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_unpacklo_epi8 (__m128i a, __m128i b) - /// PUNPCKLBW xmm, xmm/m128 + /// PUNPCKLBW xmm1, xmm2/m128 + /// VPUNPCKLBW xmm1, xmm2, xmm3/m128 + /// VPUNPCKLBW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 UnpackLow(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_unpacklo_epi16 (__m128i a, __m128i b) - /// PUNPCKLWD xmm, xmm/m128 + /// PUNPCKLWD xmm1, xmm2/m128 + /// VPUNPCKLWD xmm1, xmm2, xmm3/m128 + /// VPUNPCKLWD xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 UnpackLow(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_unpacklo_epi16 (__m128i a, __m128i b) - /// PUNPCKLWD xmm, xmm/m128 + /// PUNPCKLWD xmm1, xmm2/m128 + /// VPUNPCKLWD xmm1, xmm2, xmm3/m128 + /// VPUNPCKLWD xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 UnpackLow(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_unpacklo_epi32 (__m128i a, __m128i b) - /// PUNPCKLDQ xmm, xmm/m128 + /// PUNPCKLDQ xmm1, xmm2/m128 + /// VPUNPCKLDQ xmm1, xmm2, xmm3/m128 + /// VPUNPCKLDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 UnpackLow(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_unpacklo_epi32 (__m128i a, __m128i b) - /// PUNPCKLDQ xmm, xmm/m128 + /// PUNPCKLDQ xmm1, xmm2/m128 + /// VPUNPCKLDQ xmm1, xmm2, xmm3/m128 + /// VPUNPCKLDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 UnpackLow(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_unpacklo_epi64 (__m128i a, __m128i b) - /// PUNPCKLQDQ xmm, xmm/m128 + /// PUNPCKLQDQ xmm1, xmm2/m128 + /// VPUNPCKLQDQ xmm1, xmm2, xmm3/m128 + /// VPUNPCKLQDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 UnpackLow(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_unpacklo_epi64 (__m128i a, __m128i b) - /// PUNPCKLQDQ xmm, xmm/m128 + /// PUNPCKLQDQ xmm1, xmm2/m128 + /// VPUNPCKLQDQ xmm1, xmm2, xmm3/m128 + /// VPUNPCKLQDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 UnpackLow(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_unpacklo_pd (__m128d a, __m128d b) - /// UNPCKLPD xmm, xmm/m128 + /// UNPCKLPD xmm1, xmm2/m128 + /// VUNPCKLPD xmm1, xmm2, xmm3/m128 + /// VUNPCKLPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 UnpackLow(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_xor_si128 (__m128i a, __m128i b) - /// PXOR xmm, xmm/m128 + /// PXOR xmm1, xmm2/m128 + /// VPXOR xmm1, xmm2, xmm3/m128 /// public static Vector128 Xor(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_xor_si128 (__m128i a, __m128i b) - /// PXOR xmm, xmm/m128 + /// PXOR xmm1, xmm2/m128 + /// VPXOR xmm1, xmm2, xmm3/m128 /// public static Vector128 Xor(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_xor_si128 (__m128i a, __m128i b) - /// PXOR xmm, xmm/m128 + /// PXOR xmm1, xmm2/m128 + /// VPXOR xmm1, xmm2, xmm3/m128 /// public static Vector128 Xor(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_xor_si128 (__m128i a, __m128i b) - /// PXOR xmm, xmm/m128 + /// PXOR xmm1, xmm2/m128 + /// VPXOR xmm1, xmm2, xmm3/m128 /// public static Vector128 Xor(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_xor_si128 (__m128i a, __m128i b) - /// PXOR xmm, xmm/m128 + /// PXOR xmm1, xmm2/m128 + /// VPXOR xmm1, xmm2, xmm3/m128 + /// VPXORD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Xor(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_xor_si128 (__m128i a, __m128i b) - /// PXOR xmm, xmm/m128 + /// PXOR xmm1, xmm2/m128 + /// VPXOR xmm1, xmm2, xmm3/m128 + /// VPXORD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Xor(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_xor_si128 (__m128i a, __m128i b) - /// PXOR xmm, xmm/m128 + /// PXOR xmm1, xmm2/m128 + /// VPXOR xmm1, xmm2, xmm3/m128 + /// VPXORQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Xor(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_xor_si128 (__m128i a, __m128i b) - /// PXOR xmm, xmm/m128 + /// PXOR xmm1, xmm2/m128 + /// VPXOR xmm1, xmm2, xmm3/m128 + /// VPXORQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Xor(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_xor_pd (__m128d a, __m128d b) - /// XORPD xmm, xmm/m128 + /// XORPD xmm1, xmm2/m128 + /// VXORPD xmm1, xmm2, xmm3/m128 + /// VXORPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Xor(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse2.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse2.cs index 9e4a8d673bcd75..072c7f9b64bdcb 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse2.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse2.cs @@ -25,52 +25,55 @@ internal X64() { } public static new bool IsSupported { get => IsSupported; } /// - /// __int64 _mm_cvtsd_si64 (__m128d a) - /// CVTSD2SI r64, xmm/m64 + /// __m128d _mm_cvtsi64_sd (__m128d a, __int64 b) + /// CVTSI2SD xmm1, r/m64 + /// VCVTSI2SD xmm1, xmm2, r/m64 /// This intrinsic is only available on 64-bit processes /// - public static long ConvertToInt64(Vector128 value) => ConvertToInt64(value); + public static Vector128 ConvertScalarToVector128Double(Vector128 upper, long value) => ConvertScalarToVector128Double(upper, value); /// - /// __int64 _mm_cvtsi128_si64 (__m128i a) - /// MOVQ reg/m64, xmm + /// __m128i _mm_cvtsi64_si128 (__int64 a) + /// MOVQ xmm1, r/m64 + /// VMOVQ xmm1, r/m64 /// This intrinsic is only available on 64-bit processes /// - public static long ConvertToInt64(Vector128 value) => ConvertToInt64(value); - + public static Vector128 ConvertScalarToVector128Int64(long value) => ConvertScalarToVector128Int64(value); /// - /// __int64 _mm_cvtsi128_si64 (__m128i a) - /// MOVQ reg/m64, xmm + /// __m128i _mm_cvtsi64_si128 (__int64 a) + /// MOVQ xmm1, r/m64 + /// VMOVQ xmm1, r/m64 /// This intrinsic is only available on 64-bit processes /// - public static ulong ConvertToUInt64(Vector128 value) => ConvertToUInt64(value); + public static Vector128 ConvertScalarToVector128UInt64(ulong value) => ConvertScalarToVector128UInt64(value); /// - /// __m128d _mm_cvtsi64_sd (__m128d a, __int64 b) - /// CVTSI2SD xmm, reg/m64 + /// __int64 _mm_cvtsi128_si64 (__m128i a) + /// MOVQ r/m64, xmm1 + /// VMOVQ r/m64, xmm1 /// This intrinsic is only available on 64-bit processes /// - public static Vector128 ConvertScalarToVector128Double(Vector128 upper, long value) => ConvertScalarToVector128Double(upper, value); - + public static long ConvertToInt64(Vector128 value) => ConvertToInt64(value); /// - /// __m128i _mm_cvtsi64_si128 (__int64 a) - /// MOVQ xmm, reg/m64 + /// __int64 _mm_cvtsd_si64 (__m128d a) + /// CVTSD2SI r64, xmm1/m64 + /// VCVTSD2SI r64, xmm1/m64 /// This intrinsic is only available on 64-bit processes /// - public static Vector128 ConvertScalarToVector128Int64(long value) => ConvertScalarToVector128Int64(value); - + public static long ConvertToInt64(Vector128 value) => ConvertToInt64(value); /// - /// __m128i _mm_cvtsi64_si128 (__int64 a) - /// MOVQ xmm, reg/m64 + /// __int64 _mm_cvttsd_si64 (__m128d a) + /// CVTTSD2SI r64, xmm1/m64 + /// VCVTTSD2SI r64, xmm1/m64 /// This intrinsic is only available on 64-bit processes /// - public static Vector128 ConvertScalarToVector128UInt64(ulong value) => ConvertScalarToVector128UInt64(value); - + public static long ConvertToInt64WithTruncation(Vector128 value) => ConvertToInt64WithTruncation(value); /// - /// __int64 _mm_cvttsd_si64 (__m128d a) - /// CVTTSD2SI reg, xmm/m64 + /// __int64 _mm_cvtsi128_si64 (__m128i a) + /// MOVQ r/m64, xmm1 + /// VMOVQ r/m64, xmm1 /// This intrinsic is only available on 64-bit processes /// - public static long ConvertToInt64WithTruncation(Vector128 value) => ConvertToInt64WithTruncation(value); + public static ulong ConvertToUInt64(Vector128 value) => ConvertToUInt64(value); /// /// void _mm_stream_si64(__int64 *p, __int64 a) @@ -88,674 +91,758 @@ internal X64() { } /// /// __m128i _mm_add_epi8 (__m128i a, __m128i b) - /// PADDB xmm, xmm/m128 + /// PADDB xmm1, xmm2/m128 + /// VPADDB xmm1, xmm2, xmm3/m128 + /// VPADDB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Add(Vector128 left, Vector128 right) => Add(left, right); /// /// __m128i _mm_add_epi8 (__m128i a, __m128i b) - /// PADDB xmm, xmm/m128 + /// PADDB xmm1, xmm2/m128 + /// VPADDB xmm1, xmm2, xmm3/m128 + /// VPADDB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Add(Vector128 left, Vector128 right) => Add(left, right); /// /// __m128i _mm_add_epi16 (__m128i a, __m128i b) - /// PADDW xmm, xmm/m128 + /// PADDW xmm1, xmm2/m128 + /// VPADDW xmm1, xmm2, xmm3/m128 + /// VPADDW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Add(Vector128 left, Vector128 right) => Add(left, right); /// /// __m128i _mm_add_epi16 (__m128i a, __m128i b) - /// PADDW xmm, xmm/m128 + /// PADDW xmm1, xmm2/m128 + /// VPADDW xmm1, xmm2, xmm3/m128 + /// VPADDW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Add(Vector128 left, Vector128 right) => Add(left, right); /// /// __m128i _mm_add_epi32 (__m128i a, __m128i b) - /// PADDD xmm, xmm/m128 + /// PADDD xmm1, xmm2/m128 + /// VPADDD xmm1, xmm2, xmm3/m128 + /// VPADDD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Add(Vector128 left, Vector128 right) => Add(left, right); /// /// __m128i _mm_add_epi32 (__m128i a, __m128i b) - /// PADDD xmm, xmm/m128 + /// PADDD xmm1, xmm2/m128 + /// VPADDD xmm1, xmm2, xmm3/m128 + /// VPADDD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Add(Vector128 left, Vector128 right) => Add(left, right); /// /// __m128i _mm_add_epi64 (__m128i a, __m128i b) - /// PADDQ xmm, xmm/m128 + /// PADDQ xmm1, xmm2/m128 + /// VPADDQ xmm1, xmm2, xmm3/m128 + /// VPADDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Add(Vector128 left, Vector128 right) => Add(left, right); /// /// __m128i _mm_add_epi64 (__m128i a, __m128i b) - /// PADDQ xmm, xmm/m128 + /// PADDQ xmm1, xmm2/m128 + /// VPADDQ xmm1, xmm2, xmm3/m128 + /// VPADDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Add(Vector128 left, Vector128 right) => Add(left, right); /// /// __m128d _mm_add_pd (__m128d a, __m128d b) - /// ADDPD xmm, xmm/m128 + /// ADDPD xmm1, xmm2/m128 + /// VADDPD xmm1, xmm2, xmm3/m128 + /// VADDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Add(Vector128 left, Vector128 right) => Add(left, right); /// /// __m128d _mm_add_sd (__m128d a, __m128d b) - /// ADDSD xmm, xmm/m64 + /// ADDSD xmm1, xmm2/m64 + /// VADDSD xmm1, xmm2, xmm3/m64 + /// VADDSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} /// public static Vector128 AddScalar(Vector128 left, Vector128 right) => AddScalar(left, right); /// /// __m128i _mm_adds_epi8 (__m128i a, __m128i b) - /// PADDSB xmm, xmm/m128 + /// PADDSB xmm1, xmm2/m128 + /// VPADDSB xmm1, xmm2, xmm3/m128 + /// VPADDSB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 AddSaturate(Vector128 left, Vector128 right) => AddSaturate(left, right); /// /// __m128i _mm_adds_epu8 (__m128i a, __m128i b) - /// PADDUSB xmm, xmm/m128 + /// PADDUSB xmm1, xmm2/m128 + /// VPADDUSB xmm1, xmm2, xmm3/m128 + /// VPADDUSB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 AddSaturate(Vector128 left, Vector128 right) => AddSaturate(left, right); /// /// __m128i _mm_adds_epi16 (__m128i a, __m128i b) - /// PADDSW xmm, xmm/m128 + /// PADDSW xmm1, xmm2/m128 + /// VPADDSW xmm1, xmm2, xmm3/m128 + /// VPADDSW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 AddSaturate(Vector128 left, Vector128 right) => AddSaturate(left, right); /// /// __m128i _mm_adds_epu16 (__m128i a, __m128i b) - /// PADDUSW xmm, xmm/m128 + /// PADDUSW xmm1, xmm2/m128 + /// VPADDUSW xmm1, xmm2, xmm3/m128 + /// VPADDUSW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 AddSaturate(Vector128 left, Vector128 right) => AddSaturate(left, right); /// /// __m128i _mm_and_si128 (__m128i a, __m128i b) - /// PAND xmm, xmm/m128 + /// PAND xmm1, xmm2/m128 + /// VPAND xmm1, xmm2, xmm3/m128 /// public static Vector128 And(Vector128 left, Vector128 right) => And(left, right); /// /// __m128i _mm_and_si128 (__m128i a, __m128i b) - /// PAND xmm, xmm/m128 + /// PAND xmm1, xmm2/m128 + /// VPAND xmm1, xmm2, xmm3/m128 /// public static Vector128 And(Vector128 left, Vector128 right) => And(left, right); /// /// __m128i _mm_and_si128 (__m128i a, __m128i b) - /// PAND xmm, xmm/m128 + /// PAND xmm1, xmm2/m128 + /// VPAND xmm1, xmm2, xmm3/m128 /// public static Vector128 And(Vector128 left, Vector128 right) => And(left, right); /// /// __m128i _mm_and_si128 (__m128i a, __m128i b) - /// PAND xmm, xmm/m128 + /// PAND xmm1, xmm2/m128 + /// VPAND xmm1, xmm2, xmm3/m128 /// public static Vector128 And(Vector128 left, Vector128 right) => And(left, right); /// /// __m128i _mm_and_si128 (__m128i a, __m128i b) - /// PAND xmm, xmm/m128 + /// PAND xmm1, xmm2/m128 + /// VPAND xmm1, xmm2, xmm3/m128 + /// VPANDD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 And(Vector128 left, Vector128 right) => And(left, right); /// /// __m128i _mm_and_si128 (__m128i a, __m128i b) - /// PAND xmm, xmm/m128 + /// PAND xmm1, xmm2/m128 + /// VPAND xmm1, xmm2, xmm3/m128 + /// VPANDD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 And(Vector128 left, Vector128 right) => And(left, right); /// /// __m128i _mm_and_si128 (__m128i a, __m128i b) - /// PAND xmm, xmm/m128 + /// PAND xmm1, xmm2/m128 + /// VPAND xmm1, xmm2, xmm3/m128 + /// VPANDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 And(Vector128 left, Vector128 right) => And(left, right); /// /// __m128i _mm_and_si128 (__m128i a, __m128i b) - /// PAND xmm, xmm/m128 + /// PAND xmm1, xmm2/m128 + /// VPAND xmm1, xmm2, xmm3/m128 + /// VPANDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 And(Vector128 left, Vector128 right) => And(left, right); /// /// __m128d _mm_and_pd (__m128d a, __m128d b) - /// ANDPD xmm, xmm/m128 + /// ANDPD xmm1, xmm2/m128 + /// VANDPD xmm1, xmm2, xmm3/m128 + /// VANDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 And(Vector128 left, Vector128 right) => And(left, right); /// /// __m128i _mm_andnot_si128 (__m128i a, __m128i b) - /// PANDN xmm, xmm/m128 + /// PANDN xmm1, xmm2/m128 + /// VPANDN xmm1, xmm2, xmm3/m128 /// public static Vector128 AndNot(Vector128 left, Vector128 right) => AndNot(left, right); /// /// __m128i _mm_andnot_si128 (__m128i a, __m128i b) - /// PANDN xmm, xmm/m128 + /// PANDN xmm1, xmm2/m128 + /// VPANDN xmm1, xmm2, xmm3/m128 /// public static Vector128 AndNot(Vector128 left, Vector128 right) => AndNot(left, right); /// /// __m128i _mm_andnot_si128 (__m128i a, __m128i b) - /// PANDN xmm, xmm/m128 + /// PANDN xmm1, xmm2/m128 + /// VPANDN xmm1, xmm2, xmm3/m128 /// public static Vector128 AndNot(Vector128 left, Vector128 right) => AndNot(left, right); /// /// __m128i _mm_andnot_si128 (__m128i a, __m128i b) - /// PANDN xmm, xmm/m128 + /// PANDN xmm1, xmm2/m128 + /// VPANDN xmm1, xmm2, xmm3/m128 /// public static Vector128 AndNot(Vector128 left, Vector128 right) => AndNot(left, right); /// /// __m128i _mm_andnot_si128 (__m128i a, __m128i b) - /// PANDN xmm, xmm/m128 + /// PANDN xmm1, xmm2/m128 + /// VPANDN xmm1, xmm2, xmm3/m128 + /// VPANDND xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 AndNot(Vector128 left, Vector128 right) => AndNot(left, right); /// /// __m128i _mm_andnot_si128 (__m128i a, __m128i b) - /// PANDN xmm, xmm/m128 + /// PANDN xmm1, xmm2/m128 + /// VPANDN xmm1, xmm2, xmm3/m128 + /// VPANDND xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 AndNot(Vector128 left, Vector128 right) => AndNot(left, right); /// /// __m128i _mm_andnot_si128 (__m128i a, __m128i b) - /// PANDN xmm, xmm/m128 + /// PANDN xmm1, xmm2/m128 + /// VPANDN xmm1, xmm2, xmm3/m128 + /// VPANDNQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 AndNot(Vector128 left, Vector128 right) => AndNot(left, right); /// /// __m128i _mm_andnot_si128 (__m128i a, __m128i b) - /// PANDN xmm, xmm/m128 + /// PANDN xmm1, xmm2/m128 + /// VPANDN xmm1, xmm2, xmm3/m128 + /// VPANDNQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 AndNot(Vector128 left, Vector128 right) => AndNot(left, right); /// /// __m128d _mm_andnot_pd (__m128d a, __m128d b) - /// ADDNPD xmm, xmm/m128 + /// ANDNPD xmm1, xmm2/m128 + /// VANDNPD xmm1, xmm2, xmm3/m128 + /// VANDNPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 AndNot(Vector128 left, Vector128 right) => AndNot(left, right); /// /// __m128i _mm_avg_epu8 (__m128i a, __m128i b) - /// PAVGB xmm, xmm/m128 + /// PAVGB xmm1, xmm2/m128 + /// VPAVGB xmm1, xmm2, xmm3/m128 + /// VPAVGB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Average(Vector128 left, Vector128 right) => Average(left, right); /// /// __m128i _mm_avg_epu16 (__m128i a, __m128i b) - /// PAVGW xmm, xmm/m128 + /// PAVGW xmm1, xmm2/m128 + /// VPAVGW xmm1, xmm2, xmm3/m128 + /// VPAVGW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Average(Vector128 left, Vector128 right) => Average(left, right); /// /// __m128i _mm_cmpeq_epi8 (__m128i a, __m128i b) - /// PCMPEQB xmm, xmm/m128 + /// PCMPEQB xmm1, xmm2/m128 + /// VPCMPEQB xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareEqual(Vector128 left, Vector128 right) => CompareEqual(left, right); /// /// __m128i _mm_cmpeq_epi8 (__m128i a, __m128i b) - /// PCMPEQB xmm, xmm/m128 + /// PCMPEQB xmm1, xmm2/m128 + /// VPCMPEQB xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareEqual(Vector128 left, Vector128 right) => CompareEqual(left, right); /// /// __m128i _mm_cmpeq_epi16 (__m128i a, __m128i b) - /// PCMPEQW xmm, xmm/m128 + /// PCMPEQW xmm1, xmm2/m128 + /// VPCMPEQW xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareEqual(Vector128 left, Vector128 right) => CompareEqual(left, right); /// /// __m128i _mm_cmpeq_epi16 (__m128i a, __m128i b) - /// PCMPEQW xmm, xmm/m128 + /// PCMPEQW xmm1, xmm2/m128 + /// VPCMPEQW xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareEqual(Vector128 left, Vector128 right) => CompareEqual(left, right); /// /// __m128i _mm_cmpeq_epi32 (__m128i a, __m128i b) - /// PCMPEQD xmm, xmm/m128 + /// PCMPEQD xmm1, xmm2/m128 + /// VPCMPEQD xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareEqual(Vector128 left, Vector128 right) => CompareEqual(left, right); /// /// __m128i _mm_cmpeq_epi32 (__m128i a, __m128i b) - /// PCMPEQD xmm, xmm/m128 + /// PCMPEQD xmm1, xmm2/m128 + /// VPCMPEQD xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareEqual(Vector128 left, Vector128 right) => CompareEqual(left, right); /// /// __m128d _mm_cmpeq_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(0) + /// CMPPD xmm1, xmm2/m128, imm8(0) + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(0) /// public static Vector128 CompareEqual(Vector128 left, Vector128 right) => CompareEqual(left, right); - /// - /// int _mm_comieq_sd (__m128d a, __m128d b) - /// COMISD xmm, xmm/m64 - /// - public static bool CompareScalarOrderedEqual(Vector128 left, Vector128 right) => CompareScalarOrderedEqual(left, right); - - /// - /// int _mm_ucomieq_sd (__m128d a, __m128d b) - /// UCOMISD xmm, xmm/m64 - /// - public static bool CompareScalarUnorderedEqual(Vector128 left, Vector128 right) => CompareScalarUnorderedEqual(left, right); - - /// - /// __m128d _mm_cmpeq_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(0) - /// - public static Vector128 CompareScalarEqual(Vector128 left, Vector128 right) => CompareScalarEqual(left, right); - /// /// __m128i _mm_cmpgt_epi8 (__m128i a, __m128i b) - /// PCMPGTB xmm, xmm/m128 + /// PCMPGTB xmm1, xmm2/m128 + /// VPCMPGTB xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareGreaterThan(Vector128 left, Vector128 right) => CompareGreaterThan(left, right); /// /// __m128i _mm_cmpgt_epi16 (__m128i a, __m128i b) - /// PCMPGTW xmm, xmm/m128 + /// PCMPGTW xmm1, xmm2/m128 + /// VPCMPGTW xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareGreaterThan(Vector128 left, Vector128 right) => CompareGreaterThan(left, right); /// /// __m128i _mm_cmpgt_epi32 (__m128i a, __m128i b) - /// PCMPGTD xmm, xmm/m128 + /// PCMPGTD xmm1, xmm2/m128 + /// VPCMPGTD xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareGreaterThan(Vector128 left, Vector128 right) => CompareGreaterThan(left, right); /// /// __m128d _mm_cmpgt_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(1) with swapped operands + /// CMPPD xmm1, xmm2/m128, imm8(1) ; with swapped operands + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(1) ; with swapped operands /// public static Vector128 CompareGreaterThan(Vector128 left, Vector128 right) => CompareGreaterThan(left, right); - /// - /// int _mm_comigt_sd (__m128d a, __m128d b) - /// COMISD xmm, xmm/m64 - /// - public static bool CompareScalarOrderedGreaterThan(Vector128 left, Vector128 right) => CompareScalarOrderedGreaterThan(left, right); - - /// - /// int _mm_ucomigt_sd (__m128d a, __m128d b) - /// UCOMISD xmm, xmm/m64 - /// - public static bool CompareScalarUnorderedGreaterThan(Vector128 left, Vector128 right) => CompareScalarUnorderedGreaterThan(left, right); - - /// - /// __m128d _mm_cmpgt_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(1) with swapped operands - /// - public static Vector128 CompareScalarGreaterThan(Vector128 left, Vector128 right) => CompareScalarGreaterThan(left, right); - /// /// __m128d _mm_cmpge_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(2) with swapped operands + /// CMPPD xmm1, xmm2/m128, imm8(2) ; with swapped operands + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(2) ; with swapped operands /// public static Vector128 CompareGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareGreaterThanOrEqual(left, right); - /// - /// int _mm_comige_sd (__m128d a, __m128d b) - /// COMISD xmm, xmm/m64 - /// - public static bool CompareScalarOrderedGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareScalarOrderedGreaterThanOrEqual(left, right); - - /// - /// int _mm_ucomige_sd (__m128d a, __m128d b) - /// UCOMISD xmm, xmm/m64 - /// - public static bool CompareScalarUnorderedGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareScalarUnorderedGreaterThanOrEqual(left, right); - - /// - /// __m128d _mm_cmpge_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(2) with swapped operands - /// - public static Vector128 CompareScalarGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareScalarGreaterThanOrEqual(left, right); - /// /// __m128i _mm_cmplt_epi8 (__m128i a, __m128i b) - /// PCMPGTB xmm, xmm/m128 + /// PCMPGTB xmm1, xmm2/m128 ; with swapped operands + /// VPCMPGTB xmm1, xmm2, xmm3/m128 ; with swapped operands /// public static Vector128 CompareLessThan(Vector128 left, Vector128 right) => CompareLessThan(left, right); /// /// __m128i _mm_cmplt_epi16 (__m128i a, __m128i b) - /// PCMPGTW xmm, xmm/m128 + /// PCMPGTW xmm1, xmm2/m128 ; with swapped operands + /// VPCMPGTW xmm1, xmm2, xmm3/m128 ; with swapped operands /// public static Vector128 CompareLessThan(Vector128 left, Vector128 right) => CompareLessThan(left, right); /// /// __m128i _mm_cmplt_epi32 (__m128i a, __m128i b) - /// PCMPGTD xmm, xmm/m128 + /// PCMPGTD xmm1, xmm2/m128 ; with swapped operands + /// VPCMPGTD xmm1, xmm2, xmm3/m128 ; with swapped operands /// public static Vector128 CompareLessThan(Vector128 left, Vector128 right) => CompareLessThan(left, right); /// /// __m128d _mm_cmplt_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(1) + /// CMPPD xmm1, xmm2/m128, imm8(1) + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(1) /// public static Vector128 CompareLessThan(Vector128 left, Vector128 right) => CompareLessThan(left, right); /// - /// int _mm_comilt_sd (__m128d a, __m128d b) - /// COMISD xmm, xmm/m64 + /// __m128d _mm_cmple_pd (__m128d a, __m128d b) + /// CMPPD xmm1, xmm2/m128, imm8(2) + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(2) /// - public static bool CompareScalarOrderedLessThan(Vector128 left, Vector128 right) => CompareScalarOrderedLessThan(left, right); - + public static Vector128 CompareLessThanOrEqual(Vector128 left, Vector128 right) => CompareLessThanOrEqual(left, right); /// - /// int _mm_ucomilt_sd (__m128d a, __m128d b) - /// UCOMISD xmm, xmm/m64 + /// __m128d _mm_cmpneq_pd (__m128d a, __m128d b) + /// CMPPD xmm1, xmm2/m128, imm8(4) + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(4) /// - public static bool CompareScalarUnorderedLessThan(Vector128 left, Vector128 right) => CompareScalarUnorderedLessThan(left, right); - + public static Vector128 CompareNotEqual(Vector128 left, Vector128 right) => CompareNotEqual(left, right); /// - /// __m128d _mm_cmplt_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(1) + /// __m128d _mm_cmpngt_pd (__m128d a, __m128d b) + /// CMPPD xmm1, xmm2/m128, imm8(5) ; with swapped operands + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(5) ; with swapped operands /// - public static Vector128 CompareScalarLessThan(Vector128 left, Vector128 right) => CompareScalarLessThan(left, right); - + public static Vector128 CompareNotGreaterThan(Vector128 left, Vector128 right) => CompareNotGreaterThan(left, right); /// - /// __m128d _mm_cmple_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(2) + /// __m128d _mm_cmpnge_pd (__m128d a, __m128d b) + /// CMPPD xmm1, xmm2/m128, imm8(6) ; with swapped operands + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(6) ; with swapped operands /// - public static Vector128 CompareLessThanOrEqual(Vector128 left, Vector128 right) => CompareLessThanOrEqual(left, right); - + public static Vector128 CompareNotGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareNotGreaterThanOrEqual(left, right); /// - /// int _mm_comile_sd (__m128d a, __m128d b) - /// COMISD xmm, xmm/m64 + /// __m128d _mm_cmpnlt_pd (__m128d a, __m128d b) + /// CMPPD xmm1, xmm2/m128, imm8(5) + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(5) /// - public static bool CompareScalarOrderedLessThanOrEqual(Vector128 left, Vector128 right) => CompareScalarOrderedLessThanOrEqual(left, right); - + public static Vector128 CompareNotLessThan(Vector128 left, Vector128 right) => CompareNotLessThan(left, right); /// - /// int _mm_ucomile_sd (__m128d a, __m128d b) - /// UCOMISD xmm, xmm/m64 + /// __m128d _mm_cmpnle_pd (__m128d a, __m128d b) + /// CMPPD xmm1, xmm2/m128, imm8(6) + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(6) /// - public static bool CompareScalarUnorderedLessThanOrEqual(Vector128 left, Vector128 right) => CompareScalarUnorderedLessThanOrEqual(left, right); - + public static Vector128 CompareNotLessThanOrEqual(Vector128 left, Vector128 right) => CompareNotLessThanOrEqual(left, right); /// - /// __m128d _mm_cmple_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(2) + /// __m128d _mm_cmpord_pd (__m128d a, __m128d b) + /// CMPPD xmm1, xmm2/m128, imm8(7) + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(7) /// - public static Vector128 CompareScalarLessThanOrEqual(Vector128 left, Vector128 right) => CompareScalarLessThanOrEqual(left, right); + public static Vector128 CompareOrdered(Vector128 left, Vector128 right) => CompareOrdered(left, right); /// - /// __m128d _mm_cmpneq_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(4) + /// __m128d _mm_cmpeq_sd (__m128d a, __m128d b) + /// CMPDS xmm1, xmm2/m64, imm8(0) + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(0) /// - public static Vector128 CompareNotEqual(Vector128 left, Vector128 right) => CompareNotEqual(left, right); - + public static Vector128 CompareScalarEqual(Vector128 left, Vector128 right) => CompareScalarEqual(left, right); /// - /// int _mm_comineq_sd (__m128d a, __m128d b) - /// COMISD xmm, xmm/m64 + /// __m128d _mm_cmpgt_sd (__m128d a, __m128d b) + /// CMPDS xmm1, xmm2/m64, imm8(1) ; with swapped operands + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(1) ; with swapped operands /// - public static bool CompareScalarOrderedNotEqual(Vector128 left, Vector128 right) => CompareScalarOrderedNotEqual(left, right); - + public static Vector128 CompareScalarGreaterThan(Vector128 left, Vector128 right) => CompareScalarGreaterThan(left, right); /// - /// int _mm_ucomineq_sd (__m128d a, __m128d b) - /// UCOMISD xmm, xmm/m64 + /// __m128d _mm_cmpge_sd (__m128d a, __m128d b) + /// CMPDS xmm1, xmm2/m64, imm8(2) ; with swapped operands + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(2) ; with swapped operands /// - public static bool CompareScalarUnorderedNotEqual(Vector128 left, Vector128 right) => CompareScalarUnorderedNotEqual(left, right); - + public static Vector128 CompareScalarGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareScalarGreaterThanOrEqual(left, right); /// - /// __m128d _mm_cmpneq_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(4) + /// __m128d _mm_cmplt_sd (__m128d a, __m128d b) + /// CMPDS xmm1, xmm2/m64, imm8(1) + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(1) /// - public static Vector128 CompareScalarNotEqual(Vector128 left, Vector128 right) => CompareScalarNotEqual(left, right); - + public static Vector128 CompareScalarLessThan(Vector128 left, Vector128 right) => CompareScalarLessThan(left, right); /// - /// __m128d _mm_cmpngt_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(5) with swapped operands + /// __m128d _mm_cmple_sd (__m128d a, __m128d b) + /// CMPDS xmm1, xmm2/m64, imm8(2) + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(2) /// - public static Vector128 CompareNotGreaterThan(Vector128 left, Vector128 right) => CompareNotGreaterThan(left, right); - + public static Vector128 CompareScalarLessThanOrEqual(Vector128 left, Vector128 right) => CompareScalarLessThanOrEqual(left, right); /// - /// __m128d _mm_cmpngt_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(5) with swapped operands + /// __m128d _mm_cmpneq_sd (__m128d a, __m128d b) + /// CMPDS xmm1, xmm2/m64, imm8(4) + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(4) /// - public static Vector128 CompareScalarNotGreaterThan(Vector128 left, Vector128 right) => CompareScalarNotGreaterThan(left, right); - + public static Vector128 CompareScalarNotEqual(Vector128 left, Vector128 right) => CompareScalarNotEqual(left, right); /// - /// __m128d _mm_cmpnge_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(6) with swapped operands + /// __m128d _mm_cmpngt_sd (__m128d a, __m128d b) + /// CMPDS xmm1, xmm2/m64, imm8(5) ; with swapped operands + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(5) ; with swapped operands /// - public static Vector128 CompareNotGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareNotGreaterThanOrEqual(left, right); - + public static Vector128 CompareScalarNotGreaterThan(Vector128 left, Vector128 right) => CompareScalarNotGreaterThan(left, right); /// /// __m128d _mm_cmpnge_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(6) with swapped operands + /// CMPDS xmm1, xmm2/m64, imm8(6) ; with swapped operands + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(6) ; with swapped operands /// public static Vector128 CompareScalarNotGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareScalarNotGreaterThanOrEqual(left, right); - - /// - /// __m128d _mm_cmpnlt_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(5) - /// - public static Vector128 CompareNotLessThan(Vector128 left, Vector128 right) => CompareNotLessThan(left, right); - /// /// __m128d _mm_cmpnlt_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(5) + /// CMPDS xmm1, xmm2/m64, imm8(5) + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(5) /// public static Vector128 CompareScalarNotLessThan(Vector128 left, Vector128 right) => CompareScalarNotLessThan(left, right); - - /// - /// __m128d _mm_cmpnle_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(6) - /// - public static Vector128 CompareNotLessThanOrEqual(Vector128 left, Vector128 right) => CompareNotLessThanOrEqual(left, right); - /// /// __m128d _mm_cmpnle_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(6) + /// CMPDS xmm1, xmm2/m64, imm8(6) + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(6) /// public static Vector128 CompareScalarNotLessThanOrEqual(Vector128 left, Vector128 right) => CompareScalarNotLessThanOrEqual(left, right); - /// - /// __m128d _mm_cmpord_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(7) - /// - public static Vector128 CompareOrdered(Vector128 left, Vector128 right) => CompareOrdered(left, right); - /// /// __m128d _mm_cmpord_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(7) + /// CMPDS xmm1, xmm2/m64, imm8(7) + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(7) /// public static Vector128 CompareScalarOrdered(Vector128 left, Vector128 right) => CompareScalarOrdered(left, right); - /// - /// __m128d _mm_cmpunord_pd (__m128d a, __m128d b) - /// CMPPD xmm, xmm/m128, imm8(3) + /// int _mm_comieq_sd (__m128d a, __m128d b) + /// COMISD xmm1, xmm2/m64 ; ZF=1 && PF=0 + /// VCOMISD xmm1, xmm2/m64 ; ZF=1 && PF=0 + /// VCOMISD xmm1, xmm2/m64{sae} ; ZF=1 && PF=0 /// - public static Vector128 CompareUnordered(Vector128 left, Vector128 right) => CompareUnordered(left, right); - + public static bool CompareScalarOrderedEqual(Vector128 left, Vector128 right) => CompareScalarOrderedEqual(left, right); /// - /// __m128d _mm_cmpunord_sd (__m128d a, __m128d b) - /// CMPSD xmm, xmm/m64, imm8(3) + /// int _mm_comigt_sd (__m128d a, __m128d b) + /// COMISD xmm1, xmm2/m64 ; ZF=0 && CF=0 + /// VCOMISD xmm1, xmm2/m64 ; ZF=0 && CF=0 + /// VCOMISD xmm1, xmm2/m64{sae} ; ZF=0 && CF=0 /// - public static Vector128 CompareScalarUnordered(Vector128 left, Vector128 right) => CompareScalarUnordered(left, right); - + public static bool CompareScalarOrderedGreaterThan(Vector128 left, Vector128 right) => CompareScalarOrderedGreaterThan(left, right); /// - /// __m128i _mm_cvtps_epi32 (__m128 a) - /// CVTPS2DQ xmm, xmm/m128 + /// int _mm_comige_sd (__m128d a, __m128d b) + /// COMISD xmm1, xmm2/m64 ; CF=0 + /// VCOMISD xmm1, xmm2/m64 ; CF=0 + /// VCOMISD xmm1, xmm2/m64{sae} ; CF=0 /// - public static Vector128 ConvertToVector128Int32(Vector128 value) => ConvertToVector128Int32(value); + public static bool CompareScalarOrderedGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareScalarOrderedGreaterThanOrEqual(left, right); /// - /// __m128i _mm_cvtpd_epi32 (__m128d a) - /// CVTPD2DQ xmm, xmm/m128 + /// int _mm_comilt_sd (__m128d a, __m128d b) + /// COMISD xmm1, xmm2/m64 ; PF=0 && CF=1 + /// VCOMISD xmm1, xmm2/m64 ; PF=0 && CF=1 + /// VCOMISD xmm1, xmm2/m64{sae} ; PF=0 && CF=1 /// - public static Vector128 ConvertToVector128Int32(Vector128 value) => ConvertToVector128Int32(value); + public static bool CompareScalarOrderedLessThan(Vector128 left, Vector128 right) => CompareScalarOrderedLessThan(left, right); /// - /// __m128 _mm_cvtepi32_ps (__m128i a) - /// CVTDQ2PS xmm, xmm/m128 + /// int _mm_comile_sd (__m128d a, __m128d b) + /// COMISD xmm1, xmm2/m64 ; PF=0 && (ZF=1 || CF=1) + /// VCOMISD xmm1, xmm2/m64 ; PF=0 && (ZF=1 || CF=1) + /// VCOMISD xmm1, xmm2/m64{sae} ; PF=0 && (ZF=1 || CF=1) /// - public static Vector128 ConvertToVector128Single(Vector128 value) => ConvertToVector128Single(value); + public static bool CompareScalarOrderedLessThanOrEqual(Vector128 left, Vector128 right) => CompareScalarOrderedLessThanOrEqual(left, right); /// - /// __m128 _mm_cvtpd_ps (__m128d a) - /// CVTPD2PS xmm, xmm/m128 + /// int _mm_comineq_sd (__m128d a, __m128d b) + /// COMISD xmm1, xmm2/m64 ; ZF=0 || PF=1 + /// VCOMISD xmm1, xmm2/m64 ; ZF=0 || PF=1 + /// VCOMISD xmm1, xmm2/m64{sae} ; ZF=0 || PF=1 /// - public static Vector128 ConvertToVector128Single(Vector128 value) => ConvertToVector128Single(value); + public static bool CompareScalarOrderedNotEqual(Vector128 left, Vector128 right) => CompareScalarOrderedNotEqual(left, right); + /// - /// __m128d _mm_cvtepi32_pd (__m128i a) - /// CVTDQ2PD xmm, xmm/m128 + /// __m128d _mm_cmpunord_sd (__m128d a, __m128d b) + /// CMPDS xmm1, xmm2/m64, imm8(3) + /// VCMPDS xmm1, xmm2, xmm3/m64, imm8(3) /// - public static Vector128 ConvertToVector128Double(Vector128 value) => ConvertToVector128Double(value); + public static Vector128 CompareScalarUnordered(Vector128 left, Vector128 right) => CompareScalarUnordered(left, right); /// - /// __m128d _mm_cvtps_pd (__m128 a) - /// CVTPS2PD xmm, xmm/m128 + /// int _mm_ucomieq_sd (__m128d a, __m128d b) + /// UCOMISD xmm1, xmm2/m64 ; ZF=1 && PF=0 + /// VUCOMISD xmm1, xmm2/m64 ; ZF=1 && PF=0 + /// VUCOMISD xmm1, xmm2/m64{sae} ; ZF=1 && PF=0 /// - public static Vector128 ConvertToVector128Double(Vector128 value) => ConvertToVector128Double(value); - + public static bool CompareScalarUnorderedEqual(Vector128 left, Vector128 right) => CompareScalarUnorderedEqual(left, right); /// - /// int _mm_cvtsd_si32 (__m128d a) - /// CVTSD2SI r32, xmm/m64 + /// int _mm_ucomigt_sd (__m128d a, __m128d b) + /// UCOMISD xmm1, xmm2/m64 ; ZF=0 && CF=0 + /// VUCOMISD xmm1, xmm2/m64 ; ZF=0 && CF=0 + /// VUCOMISD xmm1, xmm2/m64{sae} ; ZF=0 && CF=0 /// - public static int ConvertToInt32(Vector128 value) => ConvertToInt32(value); + public static bool CompareScalarUnorderedGreaterThan(Vector128 left, Vector128 right) => CompareScalarUnorderedGreaterThan(left, right); /// - /// int _mm_cvtsi128_si32 (__m128i a) - /// MOVD reg/m32, xmm + /// int _mm_ucomige_sd (__m128d a, __m128d b) + /// UCOMISD xmm1, xmm2/m64 ; CF=0 + /// VUCOMISD xmm1, xmm2/m64 ; CF=0 + /// VUCOMISD xmm1, xmm2/m64{sae} ; CF=0 /// - public static int ConvertToInt32(Vector128 value) => ConvertToInt32(value); + public static bool CompareScalarUnorderedGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareScalarUnorderedGreaterThanOrEqual(left, right); + /// + /// int _mm_ucomilt_sd (__m128d a, __m128d b) + /// UCOMISD xmm1, xmm2/m64 ; PF=0 && CF=1 + /// VUCOMISD xmm1, xmm2/m64 ; PF=0 && CF=1 + /// VUCOMISD xmm1, xmm2/m64{sae} ; PF=0 && CF=1 + /// + public static bool CompareScalarUnorderedLessThan(Vector128 left, Vector128 right) => CompareScalarUnorderedLessThan(left, right); + /// + /// int _mm_ucomile_sd (__m128d a, __m128d b) + /// UCOMISD xmm1, xmm2/m64 ; PF=0 && (ZF=1 || CF=1) + /// VUCOMISD xmm1, xmm2/m64 ; PF=0 && (ZF=1 || CF=1) + /// VUCOMISD xmm1, xmm2/m64{sae} ; PF=0 && (ZF=1 || CF=1) + /// + public static bool CompareScalarUnorderedLessThanOrEqual(Vector128 left, Vector128 right) => CompareScalarUnorderedLessThanOrEqual(left, right); + /// + /// int _mm_ucomineq_sd (__m128d a, __m128d b) + /// UCOMISD xmm1, xmm2/m64 ; ZF=0 || PF=1 + /// VUCOMISD xmm1, xmm2/m64 ; ZF=0 || PF=1 + /// VUCOMISD xmm1, xmm2/m64{sae} ; ZF=0 || PF=1 + /// + public static bool CompareScalarUnorderedNotEqual(Vector128 left, Vector128 right) => CompareScalarUnorderedNotEqual(left, right); /// - /// int _mm_cvtsi128_si32 (__m128i a) - /// MOVD reg/m32, xmm + /// __m128d _mm_cmpunord_pd (__m128d a, __m128d b) + /// CMPPD xmm1, xmm2/m128, imm8(3) + /// VCMPPD xmm1, xmm2, xmm3/m128, imm8(3) /// - public static uint ConvertToUInt32(Vector128 value) => ConvertToUInt32(value); + public static Vector128 CompareUnordered(Vector128 left, Vector128 right) => CompareUnordered(left, right); /// /// __m128d _mm_cvtsi32_sd (__m128d a, int b) - /// CVTSI2SD xmm, reg/m32 + /// CVTSI2SD xmm1, r/m32 + /// VCVTSI2SD xmm1, xmm2, r/m32 /// public static Vector128 ConvertScalarToVector128Double(Vector128 upper, int value) => ConvertScalarToVector128Double(upper, value); - /// /// __m128d _mm_cvtss_sd (__m128d a, __m128 b) - /// CVTSS2SD xmm, xmm/m32 + /// CVTSS2SD xmm1, xmm2/m32 + /// VCVTSS2SD xmm1, xmm2, xmm3/m32 /// public static Vector128 ConvertScalarToVector128Double(Vector128 upper, Vector128 value) => ConvertScalarToVector128Double(upper, value); - /// /// __m128i _mm_cvtsi32_si128 (int a) - /// MOVD xmm, reg/m32 + /// MOVD xmm1, r/m32 + /// VMOVD xmm1, r/m32 /// public static Vector128 ConvertScalarToVector128Int32(int value) => ConvertScalarToVector128Int32(value); - /// /// __m128 _mm_cvtsd_ss (__m128 a, __m128d b) - /// CVTSD2SS xmm, xmm/m64 + /// CVTSD2SS xmm1, xmm2/m64 + /// VCVTSD2SS xmm1, xmm2, xmm3/m64 /// public static Vector128 ConvertScalarToVector128Single(Vector128 upper, Vector128 value) => ConvertScalarToVector128Single(upper, value); /// /// __m128i _mm_cvtsi32_si128 (int a) - /// MOVD xmm, reg/m32 + /// MOVD xmm1, r/m32 + /// VMOVD xmm1, r/m32 /// public static Vector128 ConvertScalarToVector128UInt32(uint value) => ConvertScalarToVector128UInt32(value); /// - /// __m128i _mm_cvttps_epi32 (__m128 a) - /// CVTTPS2DQ xmm, xmm/m128 + /// int _mm_cvtsi128_si32 (__m128i a) + /// MOVD r/m32, xmm1 + /// VMOVD r/m32, xmm1 /// - public static Vector128 ConvertToVector128Int32WithTruncation(Vector128 value) => ConvertToVector128Int32WithTruncation(value); + public static int ConvertToInt32(Vector128 value) => ConvertToInt32(value); /// - /// __m128i _mm_cvttpd_epi32 (__m128d a) - /// CVTTPD2DQ xmm, xmm/m128 + /// int _mm_cvtsd_si32 (__m128d a) + /// CVTSD2SI r32, xmm1/m64 + /// VCVTSD2SI r32, xmm1/m64 /// - public static Vector128 ConvertToVector128Int32WithTruncation(Vector128 value) => ConvertToVector128Int32WithTruncation(value); - + public static int ConvertToInt32(Vector128 value) => ConvertToInt32(value); /// /// int _mm_cvttsd_si32 (__m128d a) - /// CVTTSD2SI reg, xmm/m64 + /// CVTTSD2SI r32, xmm1/m64 + /// VCVTTSD2SI r32, xmm1/m64 /// public static int ConvertToInt32WithTruncation(Vector128 value) => ConvertToInt32WithTruncation(value); - - /// - /// __m128d _mm_div_pd (__m128d a, __m128d b) - /// DIVPD xmm, xmm/m128 - /// - public static Vector128 Divide(Vector128 left, Vector128 right) => Divide(left, right); - /// - /// __m128d _mm_div_sd (__m128d a, __m128d b) - /// DIVSD xmm, xmm/m64 - /// - public static Vector128 DivideScalar(Vector128 left, Vector128 right) => DivideScalar(left, right); - - /// - /// int _mm_extract_epi16 (__m128i a, int immediate) - /// PEXTRW reg, xmm, imm8 + /// int _mm_cvtsi128_si32 (__m128i a) + /// MOVD r/m32, xmm1 + /// VMOVD r/m32, xmm1 /// - public static ushort Extract(Vector128 value, [ConstantExpected] byte index) => Extract(value, index); + public static uint ConvertToUInt32(Vector128 value) => ConvertToUInt32(value); /// - /// __m128i _mm_insert_epi16 (__m128i a, int i, int immediate) - /// PINSRW xmm, reg/m16, imm8 + /// __m128d _mm_cvtepi32_pd (__m128i a) + /// CVTDQ2PD xmm1, xmm2/m64 + /// VCVTDQ2PD xmm1, xmm2/m64 + /// VCVTDQ2PD xmm1 {k1}{z}, xmm2/m64/m32bcst /// - public static Vector128 Insert(Vector128 value, short data, [ConstantExpected] byte index) => Insert(value, data, index); + public static Vector128 ConvertToVector128Double(Vector128 value) => ConvertToVector128Double(value); /// - /// __m128i _mm_insert_epi16 (__m128i a, int i, int immediate) - /// PINSRW xmm, reg/m16, imm8 + /// __m128d _mm_cvtps_pd (__m128 a) + /// CVTPS2PD xmm1, xmm2/m64 + /// VCVTPS2PD xmm1, xmm2/m64 + /// VCVTPS2PD xmm1 {k1}{z}, xmm2/m64/m32bcst /// - public static Vector128 Insert(Vector128 value, ushort data, [ConstantExpected] byte index) => Insert(value, data, index); - + public static Vector128 ConvertToVector128Double(Vector128 value) => ConvertToVector128Double(value); /// - /// __m128i _mm_loadu_si128 (__m128i const* mem_address) - /// MOVDQU xmm, m128 + /// __m128i _mm_cvtps_epi32 (__m128 a) + /// CVTPS2DQ xmm1, xmm2/m128 + /// VCVTPS2DQ xmm1, xmm2/m128 + /// VCVTPS2DQ xmm1 {k1}{z}, xmm2/m128/m32bcst /// - public static unsafe Vector128 LoadVector128(sbyte* address) => LoadVector128(address); + public static Vector128 ConvertToVector128Int32(Vector128 value) => ConvertToVector128Int32(value); /// - /// __m128i _mm_loadu_si128 (__m128i const* mem_address) - /// MOVDQU xmm, m128 + /// __m128i _mm_cvtpd_epi32 (__m128d a) + /// CVTPD2DQ xmm1, xmm2/m128 + /// VCVTPD2DQ xmm1, xmm2/m128 + /// VCVTPD2DQ xmm1 {k1}{z}, xmm2/m128/m64bcst /// - public static unsafe Vector128 LoadVector128(byte* address) => LoadVector128(address); + public static Vector128 ConvertToVector128Int32(Vector128 value) => ConvertToVector128Int32(value); /// - /// __m128i _mm_loadu_si128 (__m128i const* mem_address) - /// MOVDQU xmm, m128 + /// __m128i _mm_cvttps_epi32 (__m128 a) + /// CVTTPS2DQ xmm1, xmm2/m128 + /// VCVTTPS2DQ xmm1, xmm2/m128 + /// VCVTTPS2DQ xmm1 {k1}{z}, xmm2/m128/m32bcst /// - public static unsafe Vector128 LoadVector128(short* address) => LoadVector128(address); + public static Vector128 ConvertToVector128Int32WithTruncation(Vector128 value) => ConvertToVector128Int32WithTruncation(value); /// - /// __m128i _mm_loadu_si128 (__m128i const* mem_address) - /// MOVDQU xmm, m128 + /// __m128i _mm_cvttpd_epi32 (__m128d a) + /// CVTTPD2DQ xmm1, xmm2/m128 + /// VCVTTPD2DQ xmm1, xmm2/m128 + /// VCVTTPD2DQ xmm1 {k1}{z}, xmm2/m128/m64bcst /// - public static unsafe Vector128 LoadVector128(ushort* address) => LoadVector128(address); + public static Vector128 ConvertToVector128Int32WithTruncation(Vector128 value) => ConvertToVector128Int32WithTruncation(value); /// - /// __m128i _mm_loadu_si128 (__m128i const* mem_address) - /// MOVDQU xmm, m128 + /// __m128 _mm_cvtepi32_ps (__m128i a) + /// CVTDQ2PS xmm1, xmm2/m128 + /// VCVTDQ2PS xmm1, xmm2/m128 + /// VCVTDQ2PS xmm1 {k1}{z}, xmm2/m128/m32bcst /// - public static unsafe Vector128 LoadVector128(int* address) => LoadVector128(address); + public static Vector128 ConvertToVector128Single(Vector128 value) => ConvertToVector128Single(value); /// - /// __m128i _mm_loadu_si128 (__m128i const* mem_address) - /// MOVDQU xmm, m128 + /// __m128 _mm_cvtpd_ps (__m128d a) + /// CVTPD2PS xmm1, xmm2/m128 + /// VCVTPD2PS xmm1, xmm2/m128 + /// VCVTPD2PS xmm1 {k1}{z}, xmm2/m128/m64bcst /// - public static unsafe Vector128 LoadVector128(uint* address) => LoadVector128(address); + public static Vector128 ConvertToVector128Single(Vector128 value) => ConvertToVector128Single(value); + /// - /// __m128i _mm_loadu_si128 (__m128i const* mem_address) - /// MOVDQU xmm, m128 + /// __m128d _mm_div_pd (__m128d a, __m128d b) + /// DIVPD xmm1, xmm2/m128 + /// VDIVPD xmm1, xmm2, xmm3/m128 + /// VDIVPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// - public static unsafe Vector128 LoadVector128(long* address) => LoadVector128(address); + public static Vector128 Divide(Vector128 left, Vector128 right) => Divide(left, right); + /// - /// __m128i _mm_loadu_si128 (__m128i const* mem_address) - /// MOVDQU xmm, m128 + /// __m128d _mm_div_sd (__m128d a, __m128d b) + /// DIVSD xmm1, xmm2/m64 + /// VDIVSD xmm1, xmm2, xmm3/m64 /// - public static unsafe Vector128 LoadVector128(ulong* address) => LoadVector128(address); + public static Vector128 DivideScalar(Vector128 left, Vector128 right) => DivideScalar(left, right); + /// - /// __m128d _mm_loadu_pd (double const* mem_address) - /// MOVUPD xmm, m128 + /// int _mm_extract_epi16 (__m128i a, int immediate) + /// PEXTRW r/m16, xmm1, imm8 + /// VPEXTRW r/m16, xmm1, imm8 /// - public static unsafe Vector128 LoadVector128(double* address) => LoadVector128(address); + public static ushort Extract(Vector128 value, [ConstantExpected] byte index) => Extract(value, index); /// - /// __m128d _mm_load_sd (double const* mem_address) - /// MOVSD xmm, m64 + /// __m128i _mm_insert_epi16 (__m128i a, int i, int immediate) + /// PINSRW xmm1, r/m16, imm8 + /// VPINSRW xmm1, xmm2, r/m16, imm8 /// - public static unsafe Vector128 LoadScalarVector128(double* address) => LoadScalarVector128(address); + public static Vector128 Insert(Vector128 value, short data, [ConstantExpected] byte index) => Insert(value, data, index); + /// + /// __m128i _mm_insert_epi16 (__m128i a, int i, int immediate) + /// PINSRW xmm1, r/m16, imm8 + /// VPINSRW xmm1, xmm2, r/m16, imm8 + /// + public static Vector128 Insert(Vector128 value, ushort data, [ConstantExpected] byte index) => Insert(value, data, index); /// /// __m128i _mm_load_si128 (__m128i const* mem_address) - /// MOVDQA xmm, m128 + /// MOVDQA xmm1, m128 + /// VMOVDQA xmm1, m128 + /// VMOVDQA32 xmm1 {k1}{z}, m128 /// public static unsafe Vector128 LoadAlignedVector128(sbyte* address) => LoadAlignedVector128(address); /// /// __m128i _mm_load_si128 (__m128i const* mem_address) - /// MOVDQA xmm, m128 + /// MOVDQA xmm1, m128 + /// VMOVDQA xmm1, m128 + /// VMOVDQA32 xmm1 {k1}{z}, m128 /// public static unsafe Vector128 LoadAlignedVector128(byte* address) => LoadAlignedVector128(address); /// /// __m128i _mm_load_si128 (__m128i const* mem_address) - /// MOVDQA xmm, m128 + /// MOVDQA xmm1, m128 + /// VMOVDQA xmm1, m128 + /// VMOVDQA32 xmm1 {k1}{z}, m128 /// public static unsafe Vector128 LoadAlignedVector128(short* address) => LoadAlignedVector128(address); /// /// __m128i _mm_load_si128 (__m128i const* mem_address) - /// MOVDQA xmm, m128 + /// MOVDQA xmm1, m128 + /// VMOVDQA xmm1, m128 + /// VMOVDQA32 xmm1 {k1}{z}, m128 /// public static unsafe Vector128 LoadAlignedVector128(ushort* address) => LoadAlignedVector128(address); /// /// __m128i _mm_load_si128 (__m128i const* mem_address) - /// MOVDQA xmm, m128 + /// MOVDQA xmm1, m128 + /// VMOVDQA xmm1, m128 + /// VMOVDQA32 xmm1 {k1}{z}, m128 /// public static unsafe Vector128 LoadAlignedVector128(int* address) => LoadAlignedVector128(address); /// /// __m128i _mm_load_si128 (__m128i const* mem_address) - /// MOVDQA xmm, m128 + /// MOVDQA xmm1, m128 + /// VMOVDQA xmm1, m128 + /// VMOVDQA32 xmm1 {k1}{z}, m128 /// public static unsafe Vector128 LoadAlignedVector128(uint* address) => LoadAlignedVector128(address); /// /// __m128i _mm_load_si128 (__m128i const* mem_address) - /// MOVDQA xmm, m128 + /// MOVDQA xmm1, m128 + /// VMOVDQA xmm1, m128 + /// VMOVDQA64 xmm1 {k1}{z}, m128 /// public static unsafe Vector128 LoadAlignedVector128(long* address) => LoadAlignedVector128(address); /// /// __m128i _mm_load_si128 (__m128i const* mem_address) - /// MOVDQA xmm, m128 + /// MOVDQA xmm1, m128 + /// VMOVDQA xmm1, m128 + /// VMOVDQA64 xmm1 {k1}{z}, m128 /// public static unsafe Vector128 LoadAlignedVector128(ulong* address) => LoadAlignedVector128(address); /// /// __m128d _mm_load_pd (double const* mem_address) - /// MOVAPD xmm, m128 + /// MOVAPD xmm1, m128 + /// VMOVAPD xmm1, m128 + /// VMOVAPD xmm1 {k1}{z}, m128 /// public static unsafe Vector128 LoadAlignedVector128(double* address) => LoadAlignedVector128(address); @@ -764,70 +851,154 @@ internal X64() { } /// LFENCE /// public static void LoadFence() => LoadFence(); - /// /// __m128d _mm_loadh_pd (__m128d a, double const* mem_addr) - /// MOVHPD xmm, m64 + /// MOVHPD xmm1, m64 + /// VMOVHPD xmm1, xmm2, m64 /// public static unsafe Vector128 LoadHigh(Vector128 lower, double* address) => LoadHigh(lower, address); - /// /// __m128d _mm_loadl_pd (__m128d a, double const* mem_addr) - /// MOVLPD xmm, m64 + /// MOVLPD xmm1, m64 + /// VMOVLPD xmm1, xmm2, m64 /// public static unsafe Vector128 LoadLow(Vector128 upper, double* address) => LoadLow(upper, address); /// /// __m128i _mm_loadu_si32 (void const* mem_addr) - /// MOVD xmm, reg/m32 + /// MOVD xmm1, m32 + /// VMOVD xmm1, m32 /// public static unsafe Vector128 LoadScalarVector128(int* address) => LoadScalarVector128(address); /// /// __m128i _mm_loadu_si32 (void const* mem_addr) - /// MOVD xmm, reg/m32 + /// MOVD xmm1, m32 + /// VMOVD xmm1, m32 /// public static unsafe Vector128 LoadScalarVector128(uint* address) => LoadScalarVector128(address); /// /// __m128i _mm_loadl_epi64 (__m128i const* mem_addr) - /// MOVQ xmm, reg/m64 + /// MOVQ xmm1, m64 + /// VMOVQ xmm1, m64 /// public static unsafe Vector128 LoadScalarVector128(long* address) => LoadScalarVector128(address); /// /// __m128i _mm_loadl_epi64 (__m128i const* mem_addr) - /// MOVQ xmm, reg/m64 + /// MOVQ xmm1, m64 + /// VMOVQ xmm1, m64 /// public static unsafe Vector128 LoadScalarVector128(ulong* address) => LoadScalarVector128(address); + /// + /// __m128d _mm_load_sd (double const* mem_address) + /// MOVSD xmm1, m64 + /// VMOVSD xmm1, m64 + /// VMOVSD xmm1 {k1}, m64 + /// + public static unsafe Vector128 LoadScalarVector128(double* address) => LoadScalarVector128(address); + + /// + /// __m128i _mm_loadu_si128 (__m128i const* mem_address) + /// MOVDQU xmm1, m128 + /// VMOVDQU xmm1, m128 + /// VMOVDQU8 xmm1 {k1}{z}, m128 + /// + public static unsafe Vector128 LoadVector128(sbyte* address) => LoadVector128(address); + /// + /// __m128i _mm_loadu_si128 (__m128i const* mem_address) + /// MOVDQU xmm1, m128 + /// VMOVDQU xmm1, m128 + /// VMOVDQU8 xmm1 {k1}{z}, m128 + /// + public static unsafe Vector128 LoadVector128(byte* address) => LoadVector128(address); + /// + /// __m128i _mm_loadu_si128 (__m128i const* mem_address) + /// MOVDQU xmm1, m128 + /// VMOVDQU xmm1, m128 + /// VMOVDQU16 xmm1 {k1}{z}, m128 + /// + public static unsafe Vector128 LoadVector128(short* address) => LoadVector128(address); + /// + /// __m128i _mm_loadu_si128 (__m128i const* mem_address) + /// MOVDQU xmm1, m128 + /// VMOVDQU xmm1, m128 + /// VMOVDQU16 xmm1 {k1}{z}, m128 + /// + public static unsafe Vector128 LoadVector128(ushort* address) => LoadVector128(address); + /// + /// __m128i _mm_loadu_si128 (__m128i const* mem_address) + /// MOVDQU xmm1, m128 + /// VMOVDQU xmm1, m128 + /// VMOVDQU32 xmm1 {k1}{z}, m128 + /// + public static unsafe Vector128 LoadVector128(int* address) => LoadVector128(address); + /// + /// __m128i _mm_loadu_si128 (__m128i const* mem_address) + /// MOVDQU xmm1, m128 + /// VMOVDQU xmm1, m128 + /// VMOVDQU32 xmm1 {k1}{z}, m128 + /// + public static unsafe Vector128 LoadVector128(uint* address) => LoadVector128(address); + /// + /// __m128i _mm_loadu_si128 (__m128i const* mem_address) + /// MOVDQU xmm1, m128 + /// VMOVDQU xmm1, m128 + /// VMOVDQU64 xmm1 {k1}{z}, m128 + /// + public static unsafe Vector128 LoadVector128(long* address) => LoadVector128(address); + /// + /// __m128i _mm_loadu_si128 (__m128i const* mem_address) + /// MOVDQU xmm1, m128 + /// VMOVDQU xmm1, m128 + /// VMOVDQU64 xmm1 {k1}{z}, m128 + /// + public static unsafe Vector128 LoadVector128(ulong* address) => LoadVector128(address); + /// + /// __m128d _mm_loadu_pd (double const* mem_address) + /// MOVUPD xmm1, m128 + /// VMOVUPD xmm1, m128 + /// VMOVUPD xmm1 {k1}{z}, m128 + /// + public static unsafe Vector128 LoadVector128(double* address) => LoadVector128(address); /// /// void _mm_maskmoveu_si128 (__m128i a, __m128i mask, char* mem_address) - /// MASKMOVDQU xmm, xmm + /// MASKMOVDQU xmm1, xmm2 ; Address: EDI/RDI + /// VMASKMOVDQU xmm1, xmm2 ; Address: EDI/RDI /// public static unsafe void MaskMove(Vector128 source, Vector128 mask, sbyte* address) => MaskMove(source, mask, address); /// /// void _mm_maskmoveu_si128 (__m128i a, __m128i mask, char* mem_address) - /// MASKMOVDQU xmm, xmm + /// MASKMOVDQU xmm1, xmm2 ; Address: EDI/RDI + /// VMASKMOVDQU xmm1, xmm2 ; Address: EDI/RDI /// public static unsafe void MaskMove(Vector128 source, Vector128 mask, byte* address) => MaskMove(source, mask, address); /// /// __m128i _mm_max_epu8 (__m128i a, __m128i b) - /// PMAXUB xmm, xmm/m128 + /// PMAXUB xmm1, xmm2/m128 + /// VPMAXUB xmm1, xmm2, xmm3/m128 + /// VPMAXUB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Max(Vector128 left, Vector128 right) => Max(left, right); /// /// __m128i _mm_max_epi16 (__m128i a, __m128i b) - /// PMAXSW xmm, xmm/m128 + /// PMAXSW xmm1, xmm2/m128 + /// VPMAXSW xmm1, xmm2, xmm3/m128 + /// VPMAXSW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Max(Vector128 left, Vector128 right) => Max(left, right); /// /// __m128d _mm_max_pd (__m128d a, __m128d b) - /// MAXPD xmm, xmm/m128 + /// MAXPD xmm1, xmm2/m128 + /// VMAXPD xmm1, xmm2, xmm3/m128 + /// VMAXPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Max(Vector128 left, Vector128 right) => Max(left, right); /// /// __m128d _mm_max_sd (__m128d a, __m128d b) - /// MAXSD xmm, xmm/m64 + /// MAXSD xmm1, xmm2/m64 + /// VMAXSD xmm1, xmm2, xmm3/m64 /// public static Vector128 MaxScalar(Vector128 left, Vector128 right) => MaxScalar(left, right); @@ -839,631 +1010,796 @@ internal X64() { } /// /// __m128i _mm_min_epu8 (__m128i a, __m128i b) - /// PMINUB xmm, xmm/m128 + /// PMINUB xmm1, xmm2/m128 + /// VPMINUB xmm1, xmm2, xmm3/m128 + /// VPMINUB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Min(Vector128 left, Vector128 right) => Min(left, right); /// /// __m128i _mm_min_epi16 (__m128i a, __m128i b) - /// PMINSW xmm, xmm/m128 + /// PMINSW xmm1, xmm2/m128 + /// VPMINSW xmm1, xmm2, xmm3/m128 + /// VPMINSW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Min(Vector128 left, Vector128 right) => Min(left, right); /// /// __m128d _mm_min_pd (__m128d a, __m128d b) - /// MINPD xmm, xmm/m128 + /// MINPD xmm1, xmm2/m128 + /// VMINPD xmm1, xmm2, xmm3/m128 + /// VMINPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Min(Vector128 left, Vector128 right) => Min(left, right); /// /// __m128d _mm_min_sd (__m128d a, __m128d b) - /// MINSD xmm, xmm/m64 + /// MINSD xmm1, xmm2/m64 + /// VMINSD xmm1, xmm2, xmm3/m64 /// public static Vector128 MinScalar(Vector128 left, Vector128 right) => MinScalar(left, right); - /// - /// __m128d _mm_move_sd (__m128d a, __m128d b) - /// MOVSD xmm, xmm - /// - public static Vector128 MoveScalar(Vector128 upper, Vector128 value) => MoveScalar(upper, value); - /// /// int _mm_movemask_epi8 (__m128i a) - /// PMOVMSKB reg, xmm + /// PMOVMSKB r32, xmm1 + /// VPMOVMSKB r32, xmm1 /// public static int MoveMask(Vector128 value) => MoveMask(value); /// /// int _mm_movemask_epi8 (__m128i a) - /// PMOVMSKB reg, xmm + /// PMOVMSKB r32, xmm1 + /// VPMOVMSKB r32, xmm1 /// public static int MoveMask(Vector128 value) => MoveMask(value); /// /// int _mm_movemask_pd (__m128d a) - /// MOVMSKPD reg, xmm + /// MOVMSKPD r32, xmm1 + /// VMOVMSKPD r32, xmm1 /// public static int MoveMask(Vector128 value) => MoveMask(value); /// /// __m128i _mm_move_epi64 (__m128i a) - /// MOVQ xmm, xmm + /// MOVQ xmm1, xmm2 + /// VMOVQ xmm1, xmm2 /// public static Vector128 MoveScalar(Vector128 value) => MoveScalar(value); /// /// __m128i _mm_move_epi64 (__m128i a) - /// MOVQ xmm, xmm + /// MOVQ xmm1, xmm2 + /// VMOVQ xmm1, xmm2 /// public static Vector128 MoveScalar(Vector128 value) => MoveScalar(value); + /// + /// __m128d _mm_move_sd (__m128d a, __m128d b) + /// MOVSD xmm1, xmm2 + /// VMOVSD xmm1, xmm2, xmm3 + /// VMOVSD xmm1 {k1}{z}, xmm2, xmm3 + /// + public static Vector128 MoveScalar(Vector128 upper, Vector128 value) => MoveScalar(upper, value); /// /// __m128i _mm_mul_epu32 (__m128i a, __m128i b) - /// PMULUDQ xmm, xmm/m128 + /// PMULUDQ xmm1, xmm2/m128 + /// VPMULUDQ xmm1, xmm2, xmm3/m128 + /// VPMULUDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Multiply(Vector128 left, Vector128 right) => Multiply(left, right); /// /// __m128d _mm_mul_pd (__m128d a, __m128d b) - /// MULPD xmm, xmm/m128 + /// MULPD xmm1, xmm2/m128 + /// VMULPD xmm1, xmm2, xmm3/m128 + /// VMULPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Multiply(Vector128 left, Vector128 right) => Multiply(left, right); /// - /// __m128d _mm_mul_sd (__m128d a, __m128d b) - /// MULSD xmm, xmm/m64 + /// __m128i _mm_madd_epi16 (__m128i a, __m128i b) + /// PMADDWD xmm1, xmm2/m128 + /// VPMADDWD xmm1, xmm2, xmm3/m128 + /// VPMADDWD xmm1 {k1}{z}, xmm2, xmm3/m128 /// - public static Vector128 MultiplyScalar(Vector128 left, Vector128 right) => MultiplyScalar(left, right); + public static Vector128 MultiplyAddAdjacent(Vector128 left, Vector128 right) => MultiplyAddAdjacent(left, right); /// /// __m128i _mm_mulhi_epi16 (__m128i a, __m128i b) - /// PMULHW xmm, xmm/m128 + /// PMULHW xmm1, xmm2/m128 + /// VPMULHW xmm1, xmm2, xmm3/m128 + /// VPMULHW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 MultiplyHigh(Vector128 left, Vector128 right) => MultiplyHigh(left, right); /// /// __m128i _mm_mulhi_epu16 (__m128i a, __m128i b) - /// PMULHUW xmm, xmm/m128 + /// PMULHUW xmm1, xmm2/m128 + /// VPMULHUW xmm1, xmm2, xmm3/m128 + /// VPMULHUW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 MultiplyHigh(Vector128 left, Vector128 right) => MultiplyHigh(left, right); - /// - /// __m128i _mm_madd_epi16 (__m128i a, __m128i b) - /// PMADDWD xmm, xmm/m128 - /// - public static Vector128 MultiplyAddAdjacent(Vector128 left, Vector128 right) => MultiplyAddAdjacent(left, right); - /// /// __m128i _mm_mullo_epi16 (__m128i a, __m128i b) - /// PMULLW xmm, xmm/m128 + /// PMULLW xmm1, xmm2/m128 + /// VPMULLW xmm1, xmm2, xmm3/m128 + /// VPMULLW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 MultiplyLow(Vector128 left, Vector128 right) => MultiplyLow(left, right); /// /// __m128i _mm_mullo_epi16 (__m128i a, __m128i b) - /// PMULLW xmm, xmm/m128 + /// PMULLW xmm1, xmm2/m128 + /// VPMULLW xmm1, xmm2, xmm3/m128 + /// VPMULLW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 MultiplyLow(Vector128 left, Vector128 right) => MultiplyLow(left, right); + /// + /// __m128d _mm_mul_sd (__m128d a, __m128d b) + /// MULSD xmm1, xmm2/m64 + /// VMULSD xmm1, xmm2, xmm3/m64 + /// + public static Vector128 MultiplyScalar(Vector128 left, Vector128 right) => MultiplyScalar(left, right); + /// /// __m128i _mm_or_si128 (__m128i a, __m128i b) - /// POR xmm, xmm/m128 + /// POR xmm1, xmm2/m128 + /// VPOR xmm1, xmm2, xmm3/m128 /// public static Vector128 Or(Vector128 left, Vector128 right) => Or(left, right); /// /// __m128i _mm_or_si128 (__m128i a, __m128i b) - /// POR xmm, xmm/m128 + /// POR xmm1, xmm2/m128 + /// VPOR xmm1, xmm2, xmm3/m128 /// public static Vector128 Or(Vector128 left, Vector128 right) => Or(left, right); /// /// __m128i _mm_or_si128 (__m128i a, __m128i b) - /// POR xmm, xmm/m128 + /// POR xmm1, xmm2/m128 + /// VPOR xmm1, xmm2, xmm3/m128 /// public static Vector128 Or(Vector128 left, Vector128 right) => Or(left, right); /// /// __m128i _mm_or_si128 (__m128i a, __m128i b) - /// POR xmm, xmm/m128 + /// POR xmm1, xmm2/m128 + /// VPOR xmm1, xmm2, xmm3/m128 /// public static Vector128 Or(Vector128 left, Vector128 right) => Or(left, right); /// /// __m128i _mm_or_si128 (__m128i a, __m128i b) - /// POR xmm, xmm/m128 + /// POR xmm1, xmm2/m128 + /// VPOR xmm1, xmm2, xmm3/m128 + /// VPORD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Or(Vector128 left, Vector128 right) => Or(left, right); /// /// __m128i _mm_or_si128 (__m128i a, __m128i b) - /// POR xmm, xmm/m128 + /// POR xmm1, xmm2/m128 + /// VPOR xmm1, xmm2, xmm3/m128 + /// VPORD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Or(Vector128 left, Vector128 right) => Or(left, right); /// /// __m128i _mm_or_si128 (__m128i a, __m128i b) - /// POR xmm, xmm/m128 + /// POR xmm1, xmm2/m128 + /// VPOR xmm1, xmm2, xmm3/m128 + /// VPORQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Or(Vector128 left, Vector128 right) => Or(left, right); /// /// __m128i _mm_or_si128 (__m128i a, __m128i b) - /// POR xmm, xmm/m128 + /// POR xmm1, xmm2/m128 + /// VPOR xmm1, xmm2, xmm3/m128 + /// VPORQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Or(Vector128 left, Vector128 right) => Or(left, right); /// /// __m128d _mm_or_pd (__m128d a, __m128d b) - /// ORPD xmm, xmm/m128 + /// ORPD xmm1, xmm2/m128 + /// VORPD xmm1, xmm2, xmm3/m128 + /// VORPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Or(Vector128 left, Vector128 right) => Or(left, right); /// /// __m128i _mm_packs_epi16 (__m128i a, __m128i b) - /// PACKSSWB xmm, xmm/m128 + /// PACKSSWB xmm1, xmm2/m128 + /// VPACKSSWB xmm1, xmm2, xmm3/m128 + /// VPACKSSWB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 PackSignedSaturate(Vector128 left, Vector128 right) => PackSignedSaturate(left, right); /// /// __m128i _mm_packs_epi32 (__m128i a, __m128i b) - /// PACKSSDW xmm, xmm/m128 + /// PACKSSDW xmm1, xmm2/m128 + /// VPACKSSDW xmm1, xmm2, xmm3/m128 + /// VPACKSSDW xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 PackSignedSaturate(Vector128 left, Vector128 right) => PackSignedSaturate(left, right); /// /// __m128i _mm_packus_epi16 (__m128i a, __m128i b) - /// PACKUSWB xmm, xmm/m128 + /// PACKUSWB xmm1, xmm2/m128 + /// VPACKUSWB xmm1, xmm2, xmm3/m128 + /// VPACKUSWB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 PackUnsignedSaturate(Vector128 left, Vector128 right) => PackUnsignedSaturate(left, right); - /// - /// __m128i _mm_sad_epu8 (__m128i a, __m128i b) - /// PSADBW xmm, xmm/m128 - /// - public static Vector128 SumAbsoluteDifferences(Vector128 left, Vector128 right) => SumAbsoluteDifferences(left, right); - - /// - /// __m128i _mm_shuffle_epi32 (__m128i a, int immediate) - /// PSHUFD xmm, xmm/m128, imm8 - /// - public static Vector128 Shuffle(Vector128 value, [ConstantExpected] byte control) => Shuffle(value, control); - /// - /// __m128i _mm_shuffle_epi32 (__m128i a, int immediate) - /// PSHUFD xmm, xmm/m128, imm8 - /// - public static Vector128 Shuffle(Vector128 value, [ConstantExpected] byte control) => Shuffle(value, control); - /// - /// __m128d _mm_shuffle_pd (__m128d a, __m128d b, int immediate) - /// SHUFPD xmm, xmm/m128, imm8 - /// - public static Vector128 Shuffle(Vector128 left, Vector128 right, [ConstantExpected] byte control) => Shuffle(left, right, control); - - /// - /// __m128i _mm_shufflehi_epi16 (__m128i a, int immediate) - /// PSHUFHW xmm, xmm/m128, imm8 - /// - public static Vector128 ShuffleHigh(Vector128 value, [ConstantExpected] byte control) => ShuffleHigh(value, control); - /// - /// __m128i _mm_shufflehi_epi16 (__m128i a, int control) - /// PSHUFHW xmm, xmm/m128, imm8 - /// - public static Vector128 ShuffleHigh(Vector128 value, [ConstantExpected] byte control) => ShuffleHigh(value, control); - - /// - /// __m128i _mm_shufflelo_epi16 (__m128i a, int control) - /// PSHUFLW xmm, xmm/m128, imm8 - /// - public static Vector128 ShuffleLow(Vector128 value, [ConstantExpected] byte control) => ShuffleLow(value, control); - /// - /// __m128i _mm_shufflelo_epi16 (__m128i a, int control) - /// PSHUFLW xmm, xmm/m128, imm8 - /// - public static Vector128 ShuffleLow(Vector128 value, [ConstantExpected] byte control) => ShuffleLow(value, control); - /// /// __m128i _mm_sll_epi16 (__m128i a, __m128i count) - /// PSLLW xmm, xmm/m128 + /// PSLLW xmm1, xmm2/m128 + /// VPSLLW xmm1, xmm2, xmm3/m128 + /// VPSLLW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftLeftLogical(Vector128 value, Vector128 count) => ShiftLeftLogical(value, count); /// /// __m128i _mm_sll_epi16 (__m128i a, __m128i count) - /// PSLLW xmm, xmm/m128 + /// PSLLW xmm1, xmm2/m128 + /// VPSLLW xmm1, xmm2, xmm3/m128 + /// VPSLLW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftLeftLogical(Vector128 value, Vector128 count) => ShiftLeftLogical(value, count); /// /// __m128i _mm_sll_epi32 (__m128i a, __m128i count) - /// PSLLD xmm, xmm/m128 + /// PSLLD xmm1, xmm2/m128 + /// VPSLLD xmm1, xmm2, xmm3/m128 + /// VPSLLD xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftLeftLogical(Vector128 value, Vector128 count) => ShiftLeftLogical(value, count); /// /// __m128i _mm_sll_epi32 (__m128i a, __m128i count) - /// PSLLD xmm, xmm/m128 + /// PSLLD xmm1, xmm2/m128 + /// VPSLLD xmm1, xmm2, xmm3/m128 + /// VPSLLD xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftLeftLogical(Vector128 value, Vector128 count) => ShiftLeftLogical(value, count); /// /// __m128i _mm_sll_epi64 (__m128i a, __m128i count) - /// PSLLQ xmm, xmm/m128 + /// PSLLQ xmm1, xmm2/m128 + /// VPSLLQ xmm1, xmm2, xmm3/m128 + /// VPSLLQ xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftLeftLogical(Vector128 value, Vector128 count) => ShiftLeftLogical(value, count); /// /// __m128i _mm_sll_epi64 (__m128i a, __m128i count) - /// PSLLQ xmm, xmm/m128 + /// PSLLQ xmm1, xmm2/m128 + /// VPSLLQ xmm1, xmm2, xmm3/m128 + /// VPSLLQ xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftLeftLogical(Vector128 value, Vector128 count) => ShiftLeftLogical(value, count); /// /// __m128i _mm_slli_epi16 (__m128i a, int immediate) - /// PSLLW xmm, imm8 + /// PSLLW xmm1, imm8 + /// VPSLLW xmm1, xmm2, imm8 + /// VPSLLW xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftLeftLogical(Vector128 value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count); /// /// __m128i _mm_slli_epi16 (__m128i a, int immediate) - /// PSLLW xmm, imm8 + /// PSLLW xmm1, imm8 + /// VPSLLW xmm1, xmm2, imm8 + /// VPSLLW xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftLeftLogical(Vector128 value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count); /// /// __m128i _mm_slli_epi32 (__m128i a, int immediate) - /// PSLLD xmm, imm8 + /// PSLLD xmm1, imm8 + /// VPSLLD xmm1, xmm2, imm8 + /// VPSLLD xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftLeftLogical(Vector128 value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count); /// /// __m128i _mm_slli_epi32 (__m128i a, int immediate) - /// PSLLD xmm, imm8 + /// PSLLD xmm1, imm8 + /// VPSLLD xmm1, xmm2, imm8 + /// VPSLLD xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftLeftLogical(Vector128 value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count); /// /// __m128i _mm_slli_epi64 (__m128i a, int immediate) - /// PSLLQ xmm, imm8 + /// PSLLQ xmm1, imm8 + /// VPSLLQ xmm1, xmm2, imm8 + /// VPSLLQ xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftLeftLogical(Vector128 value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count); /// /// __m128i _mm_slli_epi64 (__m128i a, int immediate) - /// PSLLQ xmm, imm8 + /// PSLLQ xmm1, imm8 + /// VPSLLQ xmm1, xmm2, imm8 + /// VPSLLQ xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftLeftLogical(Vector128 value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count); /// /// __m128i _mm_bslli_si128 (__m128i a, int imm8) - /// PSLLDQ xmm, imm8 + /// PSLLDQ xmm1, imm8 + /// VPSLLDQ xmm1, xmm2/m128, imm8 /// public static Vector128 ShiftLeftLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes); /// /// __m128i _mm_bslli_si128 (__m128i a, int imm8) - /// PSLLDQ xmm, imm8 + /// PSLLDQ xmm1, imm8 + /// VPSLLDQ xmm1, xmm2/m128, imm8 /// public static Vector128 ShiftLeftLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes); /// /// __m128i _mm_bslli_si128 (__m128i a, int imm8) - /// PSLLDQ xmm, imm8 + /// PSLLDQ xmm1, imm8 + /// VPSLLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftLeftLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes); /// /// __m128i _mm_bslli_si128 (__m128i a, int imm8) - /// PSLLDQ xmm, imm8 + /// PSLLDQ xmm1, imm8 + /// VPSLLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftLeftLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes); /// /// __m128i _mm_bslli_si128 (__m128i a, int imm8) - /// PSLLDQ xmm, imm8 + /// PSLLDQ xmm1, imm8 + /// VPSLLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftLeftLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes); /// /// __m128i _mm_bslli_si128 (__m128i a, int imm8) - /// PSLLDQ xmm, imm8 + /// PSLLDQ xmm1, imm8 + /// VPSLLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftLeftLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes); /// /// __m128i _mm_bslli_si128 (__m128i a, int imm8) - /// PSLLDQ xmm, imm8 + /// PSLLDQ xmm1, imm8 + /// VPSLLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftLeftLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes); /// /// __m128i _mm_bslli_si128 (__m128i a, int imm8) - /// PSLLDQ xmm, imm8 + /// PSLLDQ xmm1, imm8 + /// VPSLLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftLeftLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes); /// /// __m128i _mm_sra_epi16 (__m128i a, __m128i count) - /// PSRAW xmm, xmm/m128 + /// PSRAW xmm1, xmm2/m128 + /// VPSRAW xmm1, xmm2, xmm3/m128 + /// VPSRAW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftRightArithmetic(Vector128 value, Vector128 count) => ShiftRightArithmetic(value, count); /// /// __m128i _mm_sra_epi32 (__m128i a, __m128i count) - /// PSRAD xmm, xmm/m128 + /// PSRAD xmm1, xmm2/m128 + /// VPSRAD xmm1, xmm2, xmm3/m128 + /// VPSRAD xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftRightArithmetic(Vector128 value, Vector128 count) => ShiftRightArithmetic(value, count); /// /// __m128i _mm_srai_epi16 (__m128i a, int immediate) - /// PSRAW xmm, imm8 + /// PSRAW xmm1, imm8 + /// VPSRAW xmm1, xmm2, imm8 + /// VPSRAW xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftRightArithmetic(Vector128 value, [ConstantExpected] byte count) => ShiftRightArithmetic(value, count); /// /// __m128i _mm_srai_epi32 (__m128i a, int immediate) - /// PSRAD xmm, imm8 + /// PSRAD xmm1, imm8 + /// VPSRAD xmm1, xmm2, imm8 + /// VPSRAD xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftRightArithmetic(Vector128 value, [ConstantExpected] byte count) => ShiftRightArithmetic(value, count); /// /// __m128i _mm_srl_epi16 (__m128i a, __m128i count) - /// PSRLW xmm, xmm/m128 + /// PSRLW xmm1, xmm2/m128 + /// VPSRLW xmm1, xmm2, xmm3/m128 + /// VPSRLW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftRightLogical(Vector128 value, Vector128 count) => ShiftRightLogical(value, count); /// /// __m128i _mm_srl_epi16 (__m128i a, __m128i count) - /// PSRLW xmm, xmm/m128 + /// PSRLW xmm1, xmm2/m128 + /// VPSRLW xmm1, xmm2, xmm3/m128 + /// VPSRLW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftRightLogical(Vector128 value, Vector128 count) => ShiftRightLogical(value, count); /// /// __m128i _mm_srl_epi32 (__m128i a, __m128i count) - /// PSRLD xmm, xmm/m128 + /// PSRLD xmm1, xmm2/m128 + /// VPSRLD xmm1, xmm2, xmm3/m128 + /// VPSRLD xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftRightLogical(Vector128 value, Vector128 count) => ShiftRightLogical(value, count); /// /// __m128i _mm_srl_epi32 (__m128i a, __m128i count) - /// PSRLD xmm, xmm/m128 + /// PSRLD xmm1, xmm2/m128 + /// VPSRLD xmm1, xmm2, xmm3/m128 + /// VPSRLD xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftRightLogical(Vector128 value, Vector128 count) => ShiftRightLogical(value, count); /// /// __m128i _mm_srl_epi64 (__m128i a, __m128i count) - /// PSRLQ xmm, xmm/m128 + /// PSRLQ xmm1, xmm2/m128 + /// VPSRLQ xmm1, xmm2, xmm3/m128 + /// VPSRLQ xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftRightLogical(Vector128 value, Vector128 count) => ShiftRightLogical(value, count); /// /// __m128i _mm_srl_epi64 (__m128i a, __m128i count) - /// PSRLQ xmm, xmm/m128 + /// PSRLQ xmm1, xmm2/m128 + /// VPSRLQ xmm1, xmm2, xmm3/m128 + /// VPSRLQ xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 ShiftRightLogical(Vector128 value, Vector128 count) => ShiftRightLogical(value, count); /// /// __m128i _mm_srli_epi16 (__m128i a, int immediate) - /// PSRLW xmm, imm8 + /// PSRLW xmm1, imm8 + /// VPSRLW xmm1, xmm2, imm8 + /// VPSRLW xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftRightLogical(Vector128 value, [ConstantExpected] byte count) => ShiftRightLogical(value, count); /// /// __m128i _mm_srli_epi16 (__m128i a, int immediate) - /// PSRLW xmm, imm8 + /// PSRLW xmm1, imm8 + /// VPSRLW xmm1, xmm2, imm8 + /// VPSRLW xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftRightLogical(Vector128 value, [ConstantExpected] byte count) => ShiftRightLogical(value, count); /// /// __m128i _mm_srli_epi32 (__m128i a, int immediate) - /// PSRLD xmm, imm8 + /// PSRLD xmm1, imm8 + /// VPSRLD xmm1, xmm2, imm8 + /// VPSRLD xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftRightLogical(Vector128 value, [ConstantExpected] byte count) => ShiftRightLogical(value, count); /// /// __m128i _mm_srli_epi32 (__m128i a, int immediate) - /// PSRLD xmm, imm8 + /// PSRLD xmm1, imm8 + /// VPSRLD xmm1, xmm2, imm8 + /// VPSRLD xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftRightLogical(Vector128 value, [ConstantExpected] byte count) => ShiftRightLogical(value, count); /// /// __m128i _mm_srli_epi64 (__m128i a, int immediate) - /// PSRLQ xmm, imm8 + /// PSRLQ xmm1, imm8 + /// VPSRLQ xmm1, xmm2, imm8 + /// VPSRLQ xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftRightLogical(Vector128 value, [ConstantExpected] byte count) => ShiftRightLogical(value, count); /// /// __m128i _mm_srli_epi64 (__m128i a, int immediate) - /// PSRLQ xmm, imm8 + /// PSRLQ xmm1, imm8 + /// VPSRLQ xmm1, xmm2, imm8 + /// VPSRLQ xmm1 {k1}{z}, xmm2, imm8 /// public static Vector128 ShiftRightLogical(Vector128 value, [ConstantExpected] byte count) => ShiftRightLogical(value, count); /// /// __m128i _mm_bsrli_si128 (__m128i a, int imm8) - /// PSRLDQ xmm, imm8 + /// PSRLDQ xmm1, imm8 + /// VPSRLDQ xmm1, xmm2/m128, imm8 /// public static Vector128 ShiftRightLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes); /// /// __m128i _mm_bsrli_si128 (__m128i a, int imm8) - /// PSRLDQ xmm, imm8 + /// PSRLDQ xmm1, imm8 + /// VPSRLDQ xmm1, xmm2/m128, imm8 /// public static Vector128 ShiftRightLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes); /// /// __m128i _mm_bsrli_si128 (__m128i a, int imm8) - /// PSRLDQ xmm, imm8 + /// PSRLDQ xmm1, imm8 + /// VPSRLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftRightLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes); /// /// __m128i _mm_bsrli_si128 (__m128i a, int imm8) - /// PSRLDQ xmm, imm8 + /// PSRLDQ xmm1, imm8 + /// VPSRLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftRightLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes); /// /// __m128i _mm_bsrli_si128 (__m128i a, int imm8) - /// PSRLDQ xmm, imm8 + /// PSRLDQ xmm1, imm8 + /// VPSRLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftRightLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes); /// /// __m128i _mm_bsrli_si128 (__m128i a, int imm8) - /// PSRLDQ xmm, imm8 + /// PSRLDQ xmm1, imm8 + /// VPSRLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftRightLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes); /// /// __m128i _mm_bsrli_si128 (__m128i a, int imm8) - /// PSRLDQ xmm, imm8 + /// PSRLDQ xmm1, imm8 + /// VPSRLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftRightLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes); /// /// __m128i _mm_bsrli_si128 (__m128i a, int imm8) - /// PSRLDQ xmm, imm8 + /// PSRLDQ xmm1, imm8 + /// VPSRLDQ xmm1, xmm2/m128, imm8 + /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors. /// public static Vector128 ShiftRightLogical128BitLane(Vector128 value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes); + /// + /// __m128i _mm_shuffle_epi32 (__m128i a, int immediate) + /// PSHUFD xmm1, xmm2/m128, imm8 + /// VPSHUFD xmm1, xmm2/m128, imm8 + /// VPSHUFD xmm1 {k1}{z}, xmm2/m128/m32bcst, imm8 + /// + public static Vector128 Shuffle(Vector128 value, [ConstantExpected] byte control) => Shuffle(value, control); + /// + /// __m128i _mm_shuffle_epi32 (__m128i a, int immediate) + /// PSHUFD xmm1, xmm2/m128, imm8 + /// VPSHUFD xmm1, xmm2/m128, imm8 + /// VPSHUFD xmm1 {k1}{z}, xmm2/m128/m32bcst, imm8 + /// + public static Vector128 Shuffle(Vector128 value, [ConstantExpected] byte control) => Shuffle(value, control); + /// + /// __m128d _mm_shuffle_pd (__m128d a, __m128d b, int immediate) + /// SHUFPD xmm1, xmm2/m128, imm8 + /// VSHUFPD xmm1, xmm2, xmm3/m128, imm8 + /// VSHUFPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst, imm8 + /// + public static Vector128 Shuffle(Vector128 left, Vector128 right, [ConstantExpected] byte control) => Shuffle(left, right, control); + + /// + /// __m128i _mm_shufflehi_epi16 (__m128i a, int immediate) + /// PSHUFHW xmm1, xmm2/m128, imm8 + /// VPSHUFHW xmm1, xmm2/m128, imm8 + /// VPSHUFHW xmm1 {k1}{z}, xmm2/m128, imm8 + /// + public static Vector128 ShuffleHigh(Vector128 value, [ConstantExpected] byte control) => ShuffleHigh(value, control); + /// + /// __m128i _mm_shufflehi_epi16 (__m128i a, int control) + /// PSHUFHW xmm1, xmm2/m128, imm8 + /// VPSHUFHW xmm1, xmm2/m128, imm8 + /// VPSHUFHW xmm1 {k1}{z}, xmm2/m128, imm8 + /// + public static Vector128 ShuffleHigh(Vector128 value, [ConstantExpected] byte control) => ShuffleHigh(value, control); + + /// + /// __m128i _mm_shufflelo_epi16 (__m128i a, int control) + /// PSHUFLW xmm1, xmm2/m128, imm8 + /// VPSHUFLW xmm1, xmm2/m128, imm8 + /// VPSHUFLW xmm1 {k1}{z}, xmm2/m128, imm8 + /// + public static Vector128 ShuffleLow(Vector128 value, [ConstantExpected] byte control) => ShuffleLow(value, control); + /// + /// __m128i _mm_shufflelo_epi16 (__m128i a, int control) + /// PSHUFLW xmm1, xmm2/m128, imm8 + /// VPSHUFLW xmm1, xmm2/m128, imm8 + /// VPSHUFLW xmm1 {k1}{z}, xmm2/m128, imm8 + /// + public static Vector128 ShuffleLow(Vector128 value, [ConstantExpected] byte control) => ShuffleLow(value, control); + /// /// __m128d _mm_sqrt_pd (__m128d a) - /// SQRTPD xmm, xmm/m128 + /// SQRTPD xmm1, xmm2/m128 + /// VSQRTPD xmm1, xmm2/m128 + /// VSQRTPD xmm1 {k1}{z}, xmm2/m128/m64bcst /// public static Vector128 Sqrt(Vector128 value) => Sqrt(value); /// /// __m128d _mm_sqrt_sd (__m128d a) - /// SQRTSD xmm, xmm/64 + /// SQRTSD xmm1, xmm2/m64 + /// VSQRTSD xmm1, xmm2, xmm3/m64 + /// VSQRTSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// public static Vector128 SqrtScalar(Vector128 value) => SqrtScalar(value); - /// /// __m128d _mm_sqrt_sd (__m128d a, __m128d b) - /// SQRTSD xmm, xmm/64 + /// SQRTSD xmm1, xmm2/m64 + /// VSQRTSD xmm1, xmm2, xmm3/m64 + /// VSQRTSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} /// public static Vector128 SqrtScalar(Vector128 upper, Vector128 value) => SqrtScalar(upper, value); /// - /// void _mm_store_sd (double* mem_addr, __m128d a) - /// MOVSD m64, xmm + /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) + /// MOVDQU m128, xmm1 + /// VMOVDQU m128, xmm1 + /// VMOVDQU8 m128 {k1}{z}, xmm1 /// - public static unsafe void StoreScalar(double* address, Vector128 source) => StoreScalar(address, source); + public static unsafe void Store(sbyte* address, Vector128 source) => Store(address, source); /// - /// void _mm_storeu_si32 (void* mem_addr, __m128i a) - /// MOVD m32, xmm + /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) + /// MOVDQU m128, xmm1 + /// VMOVDQU m128, xmm1 + /// VMOVDQU8 m128 {k1}{z}, xmm1 /// - public static unsafe void StoreScalar(int* address, Vector128 source) => StoreScalar(address, source); + public static unsafe void Store(byte* address, Vector128 source) => Store(address, source); /// - /// void _mm_storel_epi64 (__m128i* mem_addr, __m128i a) - /// MOVQ m64, xmm + /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) + /// MOVDQU m128, xmm1 + /// VMOVDQU m128, xmm1 + /// VMOVDQU16 m128 {k1}{z}, xmm1 /// - public static unsafe void StoreScalar(long* address, Vector128 source) => StoreScalar(address, source); + public static unsafe void Store(short* address, Vector128 source) => Store(address, source); /// - /// void _mm_storeu_si32 (void* mem_addr, __m128i a) - /// MOVD m32, xmm + /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) + /// MOVDQU m128, xmm1 + /// VMOVDQU m128, xmm1 + /// VMOVDQU16 m128 {k1}{z}, xmm1 /// - public static unsafe void StoreScalar(uint* address, Vector128 source) => StoreScalar(address, source); + public static unsafe void Store(ushort* address, Vector128 source) => Store(address, source); /// - /// void _mm_storel_epi64 (__m128i* mem_addr, __m128i a) - /// MOVQ m64, xmm + /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) + /// MOVDQU m128, xmm1 + /// VMOVDQU m128, xmm1 + /// VMOVDQU32 m128 {k1}{z}, xmm1 /// - public static unsafe void StoreScalar(ulong* address, Vector128 source) => StoreScalar(address, source); + public static unsafe void Store(int* address, Vector128 source) => Store(address, source); + /// + /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) + /// MOVDQU m128, xmm1 + /// VMOVDQU m128, xmm1 + /// VMOVDQU32 m128 {k1}{z}, xmm1 + /// + public static unsafe void Store(uint* address, Vector128 source) => Store(address, source); + /// + /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) + /// MOVDQU m128, xmm1 + /// VMOVDQU m128, xmm1 + /// VMOVDQU64 m128 {k1}{z}, xmm1 + /// + public static unsafe void Store(long* address, Vector128 source) => Store(address, source); + /// + /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) + /// MOVDQU m128, xmm1 + /// VMOVDQU m128, xmm1 + /// VMOVDQU64 m128 {k1}{z}, xmm1 + /// + public static unsafe void Store(ulong* address, Vector128 source) => Store(address, source); + /// + /// void _mm_storeu_pd (double* mem_addr, __m128d a) + /// MOVUPD m128, xmm1 + /// VMOVUPD m128, xmm1 + /// VMOVUPD m128 {k1}{z}, xmm1 + /// + public static unsafe void Store(double* address, Vector128 source) => Store(address, source); /// /// void _mm_store_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQA m128, xmm + /// MOVDQA m128, xmm1 + /// VMOVDQA m128, xmm1 + /// VMOVDQA32 m128 {k1}{z}, xmm1 /// public static unsafe void StoreAligned(sbyte* address, Vector128 source) => StoreAligned(address, source); /// /// void _mm_store_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQA m128, xmm + /// MOVDQA m128, xmm1 + /// VMOVDQA m128, xmm1 + /// VMOVDQA32 m128 {k1}{z}, xmm1 /// public static unsafe void StoreAligned(byte* address, Vector128 source) => StoreAligned(address, source); /// /// void _mm_store_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQA m128, xmm + /// MOVDQA m128, xmm1 + /// VMOVDQA m128, xmm1 + /// VMOVDQA32 m128 {k1}{z}, xmm1 /// public static unsafe void StoreAligned(short* address, Vector128 source) => StoreAligned(address, source); /// /// void _mm_store_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQA m128, xmm + /// MOVDQA m128, xmm1 + /// VMOVDQA m128, xmm1 + /// VMOVDQA32 m128 {k1}{z}, xmm1 /// public static unsafe void StoreAligned(ushort* address, Vector128 source) => StoreAligned(address, source); /// /// void _mm_store_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQA m128, xmm + /// MOVDQA m128, xmm1 + /// VMOVDQA m128, xmm1 + /// VMOVDQA32 m128 {k1}{z}, xmm1 /// public static unsafe void StoreAligned(int* address, Vector128 source) => StoreAligned(address, source); /// /// void _mm_store_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQA m128, xmm + /// MOVDQA m128, xmm1 + /// VMOVDQA m128, xmm1 + /// VMOVDQA32 m128 {k1}{z}, xmm1 /// public static unsafe void StoreAligned(uint* address, Vector128 source) => StoreAligned(address, source); /// /// void _mm_store_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQA m128, xmm + /// MOVDQA m128, xmm1 + /// VMOVDQA m128, xmm1 + /// VMOVDQA64 m128 {k1}{z}, xmm1 /// public static unsafe void StoreAligned(long* address, Vector128 source) => StoreAligned(address, source); /// /// void _mm_store_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQA m128, xmm + /// MOVDQA m128, xmm1 + /// VMOVDQA m128, xmm1 + /// VMOVDQA64 m128 {k1}{z}, xmm1 /// public static unsafe void StoreAligned(ulong* address, Vector128 source) => StoreAligned(address, source); /// /// void _mm_store_pd (double* mem_addr, __m128d a) - /// MOVAPD m128, xmm + /// MOVAPD m128, xmm1 + /// VMOVAPD m128, xmm1 + /// VMOVAPD m128 {k1}{z}, xmm1 /// public static unsafe void StoreAligned(double* address, Vector128 source) => StoreAligned(address, source); /// /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a) - /// MOVNTDQ m128, xmm + /// MOVNTDQ m128, xmm1 + /// VMOVNTDQ m128, xmm1 /// public static unsafe void StoreAlignedNonTemporal(sbyte* address, Vector128 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a) - /// MOVNTDQ m128, xmm + /// MOVNTDQ m128, xmm1 + /// VMOVNTDQ m128, xmm1 /// public static unsafe void StoreAlignedNonTemporal(byte* address, Vector128 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a) - /// MOVNTDQ m128, xmm + /// MOVNTDQ m128, xmm1 + /// VMOVNTDQ m128, xmm1 /// public static unsafe void StoreAlignedNonTemporal(short* address, Vector128 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a) - /// MOVNTDQ m128, xmm + /// MOVNTDQ m128, xmm1 + /// VMOVNTDQ m128, xmm1 /// public static unsafe void StoreAlignedNonTemporal(ushort* address, Vector128 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a) - /// MOVNTDQ m128, xmm + /// MOVNTDQ m128, xmm1 + /// VMOVNTDQ m128, xmm1 /// public static unsafe void StoreAlignedNonTemporal(int* address, Vector128 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a) - /// MOVNTDQ m128, xmm + /// MOVNTDQ m128, xmm1 + /// VMOVNTDQ m128, xmm1 /// public static unsafe void StoreAlignedNonTemporal(uint* address, Vector128 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a) - /// MOVNTDQ m128, xmm + /// MOVNTDQ m128, xmm1 + /// VMOVNTDQ m128, xmm1 /// public static unsafe void StoreAlignedNonTemporal(long* address, Vector128 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a) - /// MOVNTDQ m128, xmm + /// MOVNTDQ m128, xmm1 + /// VMOVNTDQ m128, xmm1 /// public static unsafe void StoreAlignedNonTemporal(ulong* address, Vector128 source) => StoreAlignedNonTemporal(address, source); /// /// void _mm_stream_pd (double* mem_addr, __m128d a) - /// MOVNTPD m128, xmm + /// MOVNTPD m128, xmm1 + /// VMOVNTPD m128, xmm1 /// public static unsafe void StoreAlignedNonTemporal(double* address, Vector128 source) => StoreAlignedNonTemporal(address, source); - /// - /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQU m128, xmm - /// - public static unsafe void Store(sbyte* address, Vector128 source) => Store(address, source); - /// - /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQU m128, xmm - /// - public static unsafe void Store(byte* address, Vector128 source) => Store(address, source); - /// - /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQU m128, xmm - /// - public static unsafe void Store(short* address, Vector128 source) => Store(address, source); - /// - /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQU m128, xmm - /// - public static unsafe void Store(ushort* address, Vector128 source) => Store(address, source); - /// - /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQU m128, xmm - /// - public static unsafe void Store(int* address, Vector128 source) => Store(address, source); - /// - /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQU m128, xmm - /// - public static unsafe void Store(uint* address, Vector128 source) => Store(address, source); - /// - /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQU m128, xmm - /// - public static unsafe void Store(long* address, Vector128 source) => Store(address, source); - /// - /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) - /// MOVDQU m128, xmm - /// - public static unsafe void Store(ulong* address, Vector128 source) => Store(address, source); - /// - /// void _mm_storeu_pd (double* mem_addr, __m128d a) - /// MOVUPD m128, xmm - /// - public static unsafe void Store(double* address, Vector128 source) => Store(address, source); - /// /// void _mm_storeh_pd (double* mem_addr, __m128d a) - /// MOVHPD m64, xmm + /// MOVHPD m64, xmm1 + /// VMOVHPD m64, xmm1 /// public static unsafe void StoreHigh(double* address, Vector128 source) => StoreHigh(address, source); - /// /// void _mm_storel_pd (double* mem_addr, __m128d a) - /// MOVLPD m64, xmm + /// MOVLPD m64, xmm1 + /// VMOVLPD m64, xmm1 /// public static unsafe void StoreLow(double* address, Vector128 source) => StoreLow(address, source); @@ -1478,214 +1814,332 @@ internal X64() { } /// public static unsafe void StoreNonTemporal(uint* address, uint value) => StoreNonTemporal(address, value); + /// + /// void _mm_storeu_si32 (void* mem_addr, __m128i a) + /// MOVD m32, xmm1 + /// VMOVD m32, xmm1 + /// + public static unsafe void StoreScalar(int* address, Vector128 source) => StoreScalar(address, source); + /// + /// void _mm_storeu_si32 (void* mem_addr, __m128i a) + /// MOVD m32, xmm1 + /// VMOVD m32, xmm1 + /// + public static unsafe void StoreScalar(uint* address, Vector128 source) => StoreScalar(address, source); + /// + /// void _mm_storel_epi64 (__m128i* mem_addr, __m128i a) + /// MOVQ m64, xmm1 + /// VMOVQ m64, xmm1 + /// + public static unsafe void StoreScalar(long* address, Vector128 source) => StoreScalar(address, source); + /// + /// void _mm_storel_epi64 (__m128i* mem_addr, __m128i a) + /// MOVQ m64, xmm1 + /// VMOVQ m64, xmm1 + /// + public static unsafe void StoreScalar(ulong* address, Vector128 source) => StoreScalar(address, source); + /// + /// void _mm_store_sd (double* mem_addr, __m128d a) + /// MOVSD m64, xmm1 + /// VMOVSD m64, xmm1 + /// VMOVSD m64 {k1}, xmm1 + /// + public static unsafe void StoreScalar(double* address, Vector128 source) => StoreScalar(address, source); + /// /// __m128i _mm_sub_epi8 (__m128i a, __m128i b) - /// PSUBB xmm, xmm/m128 + /// PSUBB xmm1, xmm2/m128 + /// VPSUBB xmm1, xmm2, xmm3/m128 + /// VPSUBB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Subtract(Vector128 left, Vector128 right) => Subtract(left, right); /// /// __m128i _mm_sub_epi8 (__m128i a, __m128i b) - /// PSUBB xmm, xmm/m128 + /// PSUBB xmm1, xmm2/m128 + /// VPSUBB xmm1, xmm2, xmm3/m128 + /// VPSUBB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Subtract(Vector128 left, Vector128 right) => Subtract(left, right); /// /// __m128i _mm_sub_epi16 (__m128i a, __m128i b) - /// PSUBW xmm, xmm/m128 + /// PSUBW xmm1, xmm2/m128 + /// VPSUBW xmm1, xmm2, xmm3/m128 + /// VPSUBW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Subtract(Vector128 left, Vector128 right) => Subtract(left, right); /// /// __m128i _mm_sub_epi16 (__m128i a, __m128i b) - /// PSUBW xmm, xmm/m128 + /// PSUBW xmm1, xmm2/m128 + /// VPSUBW xmm1, xmm2, xmm3/m128 + /// VPSUBW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Subtract(Vector128 left, Vector128 right) => Subtract(left, right); /// /// __m128i _mm_sub_epi32 (__m128i a, __m128i b) - /// PSUBD xmm, xmm/m128 + /// PSUBD xmm1, xmm2/m128 + /// VPSUBD xmm1, xmm2, xmm3/m128 + /// VPSUBD xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Subtract(Vector128 left, Vector128 right) => Subtract(left, right); /// /// __m128i _mm_sub_epi32 (__m128i a, __m128i b) - /// PSUBD xmm, xmm/m128 + /// PSUBD xmm1, xmm2/m128 + /// VPSUBD xmm1, xmm2, xmm3/m128 + /// VPSUBD xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Subtract(Vector128 left, Vector128 right) => Subtract(left, right); /// /// __m128i _mm_sub_epi64 (__m128i a, __m128i b) - /// PSUBQ xmm, xmm/m128 + /// PSUBQ xmm1, xmm2/m128 + /// VPSUBQ xmm1, xmm2, xmm3/m128 + /// VPSUBQ xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Subtract(Vector128 left, Vector128 right) => Subtract(left, right); /// /// __m128i _mm_sub_epi64 (__m128i a, __m128i b) - /// PSUBQ xmm, xmm/m128 + /// PSUBQ xmm1, xmm2/m128 + /// VPSUBQ xmm1, xmm2, xmm3/m128 + /// VPSUBQ xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Subtract(Vector128 left, Vector128 right) => Subtract(left, right); /// /// __m128d _mm_sub_pd (__m128d a, __m128d b) - /// SUBPD xmm, xmm/m128 + /// SUBPD xmm1, xmm2/m128 + /// VSUBPD xmm1, xmm2, xmm3/m128 + /// VSUBPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Subtract(Vector128 left, Vector128 right) => Subtract(left, right); /// /// __m128d _mm_sub_sd (__m128d a, __m128d b) - /// SUBSD xmm, xmm/m64 + /// SUBSD xmm1, xmm2/m64 + /// VSUBSD xmm1, xmm2, xmm3/m64 + /// VSUBSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} /// public static Vector128 SubtractScalar(Vector128 left, Vector128 right) => SubtractScalar(left, right); /// /// __m128i _mm_subs_epi8 (__m128i a, __m128i b) - /// PSUBSB xmm, xmm/m128 + /// PSUBSB xmm1, xmm2/m128 + /// VPSUBSB xmm1, xmm2, xmm3/m128 + /// VPSUBSB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 SubtractSaturate(Vector128 left, Vector128 right) => SubtractSaturate(left, right); /// /// __m128i _mm_subs_epi16 (__m128i a, __m128i b) - /// PSUBSW xmm, xmm/m128 + /// PSUBSW xmm1, xmm2/m128 + /// VPSUBSW xmm1, xmm2, xmm3/m128 + /// VPSUBSW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 SubtractSaturate(Vector128 left, Vector128 right) => SubtractSaturate(left, right); /// /// __m128i _mm_subs_epu8 (__m128i a, __m128i b) - /// PSUBUSB xmm, xmm/m128 + /// PSUBUSB xmm1, xmm2/m128 + /// VPSUBUSB xmm1, xmm2, xmm3/m128 + /// VPSUBUSB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 SubtractSaturate(Vector128 left, Vector128 right) => SubtractSaturate(left, right); /// /// __m128i _mm_subs_epu16 (__m128i a, __m128i b) - /// PSUBUSW xmm, xmm/m128 + /// PSUBUSW xmm1, xmm2/m128 + /// VPSUBUSW xmm1, xmm2, xmm3/m128 + /// VPSUBUSW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 SubtractSaturate(Vector128 left, Vector128 right) => SubtractSaturate(left, right); + /// + /// __m128i _mm_sad_epu8 (__m128i a, __m128i b) + /// PSADBW xmm1, xmm2/m128 + /// VPSADBW xmm1, xmm2, xmm3/m128 + /// VPSADBW xmm1 {k1}{z}, xmm2, xmm3/m128 + /// + public static Vector128 SumAbsoluteDifferences(Vector128 left, Vector128 right) => SumAbsoluteDifferences(left, right); + /// /// __m128i _mm_unpackhi_epi8 (__m128i a, __m128i b) - /// PUNPCKHBW xmm, xmm/m128 + /// PUNPCKHBW xmm1, xmm2/m128 + /// VPUNPCKHBW xmm1, xmm2, xmm3/m128 + /// VPUNPCKHBW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 UnpackHigh(Vector128 left, Vector128 right) => UnpackHigh(left, right); /// /// __m128i _mm_unpackhi_epi8 (__m128i a, __m128i b) - /// PUNPCKHBW xmm, xmm/m128 + /// PUNPCKHBW xmm1, xmm2/m128 + /// VPUNPCKHBW xmm1, xmm2, xmm3/m128 + /// VPUNPCKHBW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 UnpackHigh(Vector128 left, Vector128 right) => UnpackHigh(left, right); /// /// __m128i _mm_unpackhi_epi16 (__m128i a, __m128i b) - /// PUNPCKHWD xmm, xmm/m128 + /// PUNPCKHWD xmm1, xmm2/m128 + /// VPUNPCKHWD xmm1, xmm2, xmm3/m128 + /// VPUNPCKHWD xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 UnpackHigh(Vector128 left, Vector128 right) => UnpackHigh(left, right); /// /// __m128i _mm_unpackhi_epi16 (__m128i a, __m128i b) - /// PUNPCKHWD xmm, xmm/m128 + /// PUNPCKHWD xmm1, xmm2/m128 + /// VPUNPCKHWD xmm1, xmm2, xmm3/m128 + /// VPUNPCKHWD xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 UnpackHigh(Vector128 left, Vector128 right) => UnpackHigh(left, right); /// /// __m128i _mm_unpackhi_epi32 (__m128i a, __m128i b) - /// PUNPCKHDQ xmm, xmm/m128 + /// PUNPCKHDQ xmm1, xmm2/m128 + /// VPUNPCKHDQ xmm1, xmm2, xmm3/m128 + /// VPUNPCKHDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 UnpackHigh(Vector128 left, Vector128 right) => UnpackHigh(left, right); /// /// __m128i _mm_unpackhi_epi32 (__m128i a, __m128i b) - /// PUNPCKHDQ xmm, xmm/m128 + /// PUNPCKHDQ xmm1, xmm2/m128 + /// VPUNPCKHDQ xmm1, xmm2, xmm3/m128 + /// VPUNPCKHDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 UnpackHigh(Vector128 left, Vector128 right) => UnpackHigh(left, right); /// /// __m128i _mm_unpackhi_epi64 (__m128i a, __m128i b) - /// PUNPCKHQDQ xmm, xmm/m128 + /// PUNPCKHQDQ xmm1, xmm2/m128 + /// VPUNPCKHQDQ xmm1, xmm2, xmm3/m128 + /// VPUNPCKHQDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 UnpackHigh(Vector128 left, Vector128 right) => UnpackHigh(left, right); /// /// __m128i _mm_unpackhi_epi64 (__m128i a, __m128i b) - /// PUNPCKHQDQ xmm, xmm/m128 + /// PUNPCKHQDQ xmm1, xmm2/m128 + /// VPUNPCKHQDQ xmm1, xmm2, xmm3/m128 + /// VPUNPCKHQDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 UnpackHigh(Vector128 left, Vector128 right) => UnpackHigh(left, right); /// /// __m128d _mm_unpackhi_pd (__m128d a, __m128d b) - /// UNPCKHPD xmm, xmm/m128 + /// UNPCKHPD xmm1, xmm2/m128 + /// VUNPCKHPD xmm1, xmm2, xmm3/m128 + /// VUNPCKHPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 UnpackHigh(Vector128 left, Vector128 right) => UnpackHigh(left, right); /// /// __m128i _mm_unpacklo_epi8 (__m128i a, __m128i b) - /// PUNPCKLBW xmm, xmm/m128 + /// PUNPCKLBW xmm1, xmm2/m128 + /// VPUNPCKLBW xmm1, xmm2, xmm3/m128 + /// VPUNPCKLBW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 UnpackLow(Vector128 left, Vector128 right) => UnpackLow(left, right); /// /// __m128i _mm_unpacklo_epi8 (__m128i a, __m128i b) - /// PUNPCKLBW xmm, xmm/m128 + /// PUNPCKLBW xmm1, xmm2/m128 + /// VPUNPCKLBW xmm1, xmm2, xmm3/m128 + /// VPUNPCKLBW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 UnpackLow(Vector128 left, Vector128 right) => UnpackLow(left, right); /// /// __m128i _mm_unpacklo_epi16 (__m128i a, __m128i b) - /// PUNPCKLWD xmm, xmm/m128 + /// PUNPCKLWD xmm1, xmm2/m128 + /// VPUNPCKLWD xmm1, xmm2, xmm3/m128 + /// VPUNPCKLWD xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 UnpackLow(Vector128 left, Vector128 right) => UnpackLow(left, right); /// /// __m128i _mm_unpacklo_epi16 (__m128i a, __m128i b) - /// PUNPCKLWD xmm, xmm/m128 + /// PUNPCKLWD xmm1, xmm2/m128 + /// VPUNPCKLWD xmm1, xmm2, xmm3/m128 + /// VPUNPCKLWD xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 UnpackLow(Vector128 left, Vector128 right) => UnpackLow(left, right); /// /// __m128i _mm_unpacklo_epi32 (__m128i a, __m128i b) - /// PUNPCKLDQ xmm, xmm/m128 + /// PUNPCKLDQ xmm1, xmm2/m128 + /// VPUNPCKLDQ xmm1, xmm2, xmm3/m128 + /// VPUNPCKLDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 UnpackLow(Vector128 left, Vector128 right) => UnpackLow(left, right); /// /// __m128i _mm_unpacklo_epi32 (__m128i a, __m128i b) - /// PUNPCKLDQ xmm, xmm/m128 + /// PUNPCKLDQ xmm1, xmm2/m128 + /// VPUNPCKLDQ xmm1, xmm2, xmm3/m128 + /// VPUNPCKLDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 UnpackLow(Vector128 left, Vector128 right) => UnpackLow(left, right); /// /// __m128i _mm_unpacklo_epi64 (__m128i a, __m128i b) - /// PUNPCKLQDQ xmm, xmm/m128 + /// PUNPCKLQDQ xmm1, xmm2/m128 + /// VPUNPCKLQDQ xmm1, xmm2, xmm3/m128 + /// VPUNPCKLQDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 UnpackLow(Vector128 left, Vector128 right) => UnpackLow(left, right); /// /// __m128i _mm_unpacklo_epi64 (__m128i a, __m128i b) - /// PUNPCKLQDQ xmm, xmm/m128 + /// PUNPCKLQDQ xmm1, xmm2/m128 + /// VPUNPCKLQDQ xmm1, xmm2, xmm3/m128 + /// VPUNPCKLQDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 UnpackLow(Vector128 left, Vector128 right) => UnpackLow(left, right); /// /// __m128d _mm_unpacklo_pd (__m128d a, __m128d b) - /// UNPCKLPD xmm, xmm/m128 + /// UNPCKLPD xmm1, xmm2/m128 + /// VUNPCKLPD xmm1, xmm2, xmm3/m128 + /// VUNPCKLPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 UnpackLow(Vector128 left, Vector128 right) => UnpackLow(left, right); /// /// __m128i _mm_xor_si128 (__m128i a, __m128i b) - /// PXOR xmm, xmm/m128 + /// PXOR xmm1, xmm2/m128 + /// VPXOR xmm1, xmm2, xmm3/m128 /// public static Vector128 Xor(Vector128 left, Vector128 right) => Xor(left, right); /// /// __m128i _mm_xor_si128 (__m128i a, __m128i b) - /// PXOR xmm, xmm/m128 + /// PXOR xmm1, xmm2/m128 + /// VPXOR xmm1, xmm2, xmm3/m128 /// public static Vector128 Xor(Vector128 left, Vector128 right) => Xor(left, right); /// /// __m128i _mm_xor_si128 (__m128i a, __m128i b) - /// PXOR xmm, xmm/m128 + /// PXOR xmm1, xmm2/m128 + /// VPXOR xmm1, xmm2, xmm3/m128 /// public static Vector128 Xor(Vector128 left, Vector128 right) => Xor(left, right); /// /// __m128i _mm_xor_si128 (__m128i a, __m128i b) - /// PXOR xmm, xmm/m128 + /// PXOR xmm1, xmm2/m128 + /// VPXOR xmm1, xmm2, xmm3/m128 /// public static Vector128 Xor(Vector128 left, Vector128 right) => Xor(left, right); /// /// __m128i _mm_xor_si128 (__m128i a, __m128i b) - /// PXOR xmm, xmm/m128 + /// PXOR xmm1, xmm2/m128 + /// VPXOR xmm1, xmm2, xmm3/m128 + /// VPXORD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Xor(Vector128 left, Vector128 right) => Xor(left, right); /// /// __m128i _mm_xor_si128 (__m128i a, __m128i b) - /// PXOR xmm, xmm/m128 + /// PXOR xmm1, xmm2/m128 + /// VPXOR xmm1, xmm2, xmm3/m128 + /// VPXORD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Xor(Vector128 left, Vector128 right) => Xor(left, right); /// /// __m128i _mm_xor_si128 (__m128i a, __m128i b) - /// PXOR xmm, xmm/m128 + /// PXOR xmm1, xmm2/m128 + /// VPXOR xmm1, xmm2, xmm3/m128 + /// VPXORQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Xor(Vector128 left, Vector128 right) => Xor(left, right); /// /// __m128i _mm_xor_si128 (__m128i a, __m128i b) - /// PXOR xmm, xmm/m128 + /// PXOR xmm1, xmm2/m128 + /// VPXOR xmm1, xmm2, xmm3/m128 + /// VPXORQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Xor(Vector128 left, Vector128 right) => Xor(left, right); /// /// __m128d _mm_xor_pd (__m128d a, __m128d b) - /// XORPD xmm, xmm/m128 + /// XORPD xmm1, xmm2/m128 + /// VXORPD xmm1, xmm2, xmm3/m128 + /// VXORPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Xor(Vector128 left, Vector128 right) => Xor(left, right); } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse3.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse3.PlatformNotSupported.cs index 368557dcba4731..ffc067b654bb56 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse3.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse3.PlatformNotSupported.cs @@ -26,71 +26,119 @@ internal X64() { } /// /// __m128 _mm_addsub_ps (__m128 a, __m128 b) - /// ADDSUBPS xmm, xmm/m128 + /// ADDSUBPS xmm1, xmm2/m128 + /// VADDSUBPS xmm1, xmm2, xmm3/m128 /// public static Vector128 AddSubtract(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_addsub_pd (__m128d a, __m128d b) - /// ADDSUBPD xmm, xmm/m128 + /// ADDSUBPD xmm1, xmm2/m128 + /// VADDSUBPD xmm1, xmm2, xmm3/m128 /// public static Vector128 AddSubtract(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_hadd_ps (__m128 a, __m128 b) - /// HADDPS xmm, xmm/m128 + /// HADDPS xmm1, xmm2/m128 + /// VHADDPS xmm1, xmm2, xmm3/m128 /// public static Vector128 HorizontalAdd(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_hadd_pd (__m128d a, __m128d b) - /// HADDPD xmm, xmm/m128 + /// HADDPD xmm1, xmm2/m128 + /// VHADDPD xmm1, xmm2, xmm3/m128 /// public static Vector128 HorizontalAdd(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_hsub_ps (__m128 a, __m128 b) - /// HSUBPS xmm, xmm/m128 + /// HSUBPS xmm1, xmm2/m128 + /// VHSUBPS xmm1, xmm2, xmm3/m128 /// public static Vector128 HorizontalSubtract(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_hsub_pd (__m128d a, __m128d b) - /// HSUBPD xmm, xmm/m128 + /// HSUBPD xmm1, xmm2/m128 + /// VHSUBPD xmm1, xmm2, xmm3/m128 /// public static Vector128 HorizontalSubtract(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_loaddup_pd (double const* mem_addr) - /// MOVDDUP xmm, m64 + /// MOVDDUP xmm1, m64 + /// VMOVDDUP xmm1, m64 + /// VMOVDDUP xmm1 {k1}{z}, m64 /// public static unsafe Vector128 LoadAndDuplicateToVector128(double* address) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr) - /// LDDQU xmm, m128 + /// LDDQU xmm1, m128 + /// VLDDQU xmm1, m128 /// public static unsafe Vector128 LoadDquVector128(sbyte* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr) + /// LDDQU xmm1, m128 + /// VLDDQU xmm1, m128 + /// public static unsafe Vector128 LoadDquVector128(byte* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr) + /// LDDQU xmm1, m128 + /// VLDDQU xmm1, m128 + /// public static unsafe Vector128 LoadDquVector128(short* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr) + /// LDDQU xmm1, m128 + /// VLDDQU xmm1, m128 + /// public static unsafe Vector128 LoadDquVector128(ushort* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr) + /// LDDQU xmm1, m128 + /// VLDDQU xmm1, m128 + /// public static unsafe Vector128 LoadDquVector128(int* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr) + /// LDDQU xmm1, m128 + /// VLDDQU xmm1, m128 + /// public static unsafe Vector128 LoadDquVector128(uint* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr) + /// LDDQU xmm1, m128 + /// VLDDQU xmm1, m128 + /// public static unsafe Vector128 LoadDquVector128(long* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr) + /// LDDQU xmm1, m128 + /// VLDDQU xmm1, m128 + /// public static unsafe Vector128 LoadDquVector128(ulong* address) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_movedup_pd (__m128d a) - /// MOVDDUP xmm, xmm/m64 + /// MOVDDUP xmm1, xmm2/m64 + /// VMOVDDUP xmm1, xmm2/m64 + /// VMOVDDUP xmm1 {k1}{z}, xmm2/m64 /// public static Vector128 MoveAndDuplicate(Vector128 source) { throw new PlatformNotSupportedException(); } - /// /// __m128 _mm_movehdup_ps (__m128 a) - /// MOVSHDUP xmm, xmm/m128 + /// MOVSHDUP xmm1, xmm2/m128 + /// VMOVSHDUP xmm1, xmm2/m128 + /// VMOVSHDUP xmm1 {k1}{z}, xmm2/m128 /// public static Vector128 MoveHighAndDuplicate(Vector128 source) { throw new PlatformNotSupportedException(); } - /// /// __m128 _mm_moveldup_ps (__m128 a) - /// MOVSLDUP xmm, xmm/m128 + /// MOVSLDUP xmm1, xmm2/m128 + /// VMOVSLDUP xmm1, xmm2/m128 + /// VMOVSLDUP xmm1 {k1}{z}, xmm2/m128 /// public static Vector128 MoveLowAndDuplicate(Vector128 source) { throw new PlatformNotSupportedException(); } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse3.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse3.cs index 2ec3f9b04d4f14..85fff23f4cd9cc 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse3.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse3.cs @@ -26,71 +26,119 @@ internal X64() { } /// /// __m128 _mm_addsub_ps (__m128 a, __m128 b) - /// ADDSUBPS xmm, xmm/m128 + /// ADDSUBPS xmm1, xmm2/m128 + /// VADDSUBPS xmm1, xmm2, xmm3/m128 /// public static Vector128 AddSubtract(Vector128 left, Vector128 right) => AddSubtract(left, right); /// /// __m128d _mm_addsub_pd (__m128d a, __m128d b) - /// ADDSUBPD xmm, xmm/m128 + /// ADDSUBPD xmm1, xmm2/m128 + /// VADDSUBPD xmm1, xmm2, xmm3/m128 /// public static Vector128 AddSubtract(Vector128 left, Vector128 right) => AddSubtract(left, right); /// /// __m128 _mm_hadd_ps (__m128 a, __m128 b) - /// HADDPS xmm, xmm/m128 + /// HADDPS xmm1, xmm2/m128 + /// VHADDPS xmm1, xmm2, xmm3/m128 /// public static Vector128 HorizontalAdd(Vector128 left, Vector128 right) => HorizontalAdd(left, right); /// /// __m128d _mm_hadd_pd (__m128d a, __m128d b) - /// HADDPD xmm, xmm/m128 + /// HADDPD xmm1, xmm2/m128 + /// VHADDPD xmm1, xmm2, xmm3/m128 /// public static Vector128 HorizontalAdd(Vector128 left, Vector128 right) => HorizontalAdd(left, right); /// /// __m128 _mm_hsub_ps (__m128 a, __m128 b) - /// HSUBPS xmm, xmm/m128 + /// HSUBPS xmm1, xmm2/m128 + /// VHSUBPS xmm1, xmm2, xmm3/m128 /// public static Vector128 HorizontalSubtract(Vector128 left, Vector128 right) => HorizontalSubtract(left, right); /// /// __m128d _mm_hsub_pd (__m128d a, __m128d b) - /// HSUBPD xmm, xmm/m128 + /// HSUBPD xmm1, xmm2/m128 + /// VHSUBPD xmm1, xmm2, xmm3/m128 /// public static Vector128 HorizontalSubtract(Vector128 left, Vector128 right) => HorizontalSubtract(left, right); /// /// __m128d _mm_loaddup_pd (double const* mem_addr) - /// MOVDDUP xmm, m64 + /// MOVDDUP xmm1, m64 + /// VMOVDDUP xmm1, m64 + /// VMOVDDUP xmm1 {k1}{z}, m64 /// public static unsafe Vector128 LoadAndDuplicateToVector128(double* address) => LoadAndDuplicateToVector128(address); /// /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr) - /// LDDQU xmm, m128 + /// LDDQU xmm1, m128 + /// VLDDQU xmm1, m128 /// public static unsafe Vector128 LoadDquVector128(sbyte* address) => LoadDquVector128(address); + /// + /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr) + /// LDDQU xmm1, m128 + /// VLDDQU xmm1, m128 + /// public static unsafe Vector128 LoadDquVector128(byte* address) => LoadDquVector128(address); + /// + /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr) + /// LDDQU xmm1, m128 + /// VLDDQU xmm1, m128 + /// public static unsafe Vector128 LoadDquVector128(short* address) => LoadDquVector128(address); + /// + /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr) + /// LDDQU xmm1, m128 + /// VLDDQU xmm1, m128 + /// public static unsafe Vector128 LoadDquVector128(ushort* address) => LoadDquVector128(address); + /// + /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr) + /// LDDQU xmm1, m128 + /// VLDDQU xmm1, m128 + /// public static unsafe Vector128 LoadDquVector128(int* address) => LoadDquVector128(address); + /// + /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr) + /// LDDQU xmm1, m128 + /// VLDDQU xmm1, m128 + /// public static unsafe Vector128 LoadDquVector128(uint* address) => LoadDquVector128(address); + /// + /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr) + /// LDDQU xmm1, m128 + /// VLDDQU xmm1, m128 + /// public static unsafe Vector128 LoadDquVector128(long* address) => LoadDquVector128(address); + /// + /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr) + /// LDDQU xmm1, m128 + /// VLDDQU xmm1, m128 + /// public static unsafe Vector128 LoadDquVector128(ulong* address) => LoadDquVector128(address); /// /// __m128d _mm_movedup_pd (__m128d a) - /// MOVDDUP xmm, xmm/m64 + /// MOVDDUP xmm1, xmm2/m64 + /// VMOVDDUP xmm1, xmm2/m64 + /// VMOVDDUP xmm1 {k1}{z}, xmm2/m64 /// public static Vector128 MoveAndDuplicate(Vector128 source) => MoveAndDuplicate(source); - /// /// __m128 _mm_movehdup_ps (__m128 a) - /// MOVSHDUP xmm, xmm/m128 + /// MOVSHDUP xmm1, xmm2/m128 + /// VMOVSHDUP xmm1, xmm2/m128 + /// VMOVSHDUP xmm1 {k1}{z}, xmm2/m128 /// public static Vector128 MoveHighAndDuplicate(Vector128 source) => MoveHighAndDuplicate(source); - /// /// __m128 _mm_moveldup_ps (__m128 a) - /// MOVSLDUP xmm, xmm/m128 + /// MOVSLDUP xmm1, xmm2/m128 + /// VMOVSLDUP xmm1, xmm2/m128 + /// VMOVSLDUP xmm1 {k1}{z}, xmm2/m128 /// public static Vector128 MoveLowAndDuplicate(Vector128 source) => MoveLowAndDuplicate(source); } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse41.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse41.PlatformNotSupported.cs index c3a8d1216d477e..2f5dd6a2edaac7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse41.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse41.PlatformNotSupported.cs @@ -26,26 +26,30 @@ internal X64() { } /// /// __int64 _mm_extract_epi64 (__m128i a, const int imm8) - /// PEXTRQ reg/m64, xmm, imm8 + /// PEXTRQ r/m64, xmm1, imm8 + /// VPEXTRQ r/m64, xmm1, imm8 /// This intrinsic is only available on 64-bit processes /// public static long Extract(Vector128 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } /// /// __int64 _mm_extract_epi64 (__m128i a, const int imm8) - /// PEXTRQ reg/m64, xmm, imm8 + /// PEXTRQ r/m64, xmm1, imm8 + /// VPEXTRQ r/m64, xmm1, imm8 /// This intrinsic is only available on 64-bit processes /// public static ulong Extract(Vector128 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_insert_epi64 (__m128i a, __int64 i, const int imm8) - /// PINSRQ xmm, reg/m64, imm8 + /// PINSRQ xmm1, r/m64, imm8 + /// VPINSRQ xmm1, xmm2, r/m64, imm8 /// This intrinsic is only available on 64-bit processes /// public static Vector128 Insert(Vector128 value, long data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_insert_epi64 (__m128i a, __int64 i, const int imm8) - /// PINSRQ xmm, reg/m64, imm8 + /// PINSRQ xmm1, r/m64, imm8 + /// VPINSRQ xmm1, xmm2, r/m64, imm8 /// This intrinsic is only available on 64-bit processes /// public static Vector128 Insert(Vector128 value, ulong data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } @@ -53,744 +57,944 @@ internal X64() { } /// /// __m128i _mm_blend_epi16 (__m128i a, __m128i b, const int imm8) - /// PBLENDW xmm, xmm/m128 imm8 + /// PBLENDW xmm1, xmm2/m128 imm8 + /// VPBLENDW xmm1, xmm2, xmm3/m128 imm8 /// public static Vector128 Blend(Vector128 left, Vector128 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_blend_epi16 (__m128i a, __m128i b, const int imm8) - /// PBLENDW xmm, xmm/m128 imm8 + /// PBLENDW xmm1, xmm2/m128 imm8 + /// VPBLENDW xmm1, xmm2, xmm3/m128 imm8 /// public static Vector128 Blend(Vector128 left, Vector128 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - /// /// __m128 _mm_blend_ps (__m128 a, __m128 b, const int imm8) - /// BLENDPS xmm, xmm/m128, imm8 + /// BLENDPS xmm1, xmm2/m128, imm8 + /// VBLENDPS xmm1, xmm2, xmm3/m128, imm8 /// public static Vector128 Blend(Vector128 left, Vector128 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } - /// /// __m128d _mm_blend_pd (__m128d a, __m128d b, const int imm8) - /// BLENDPD xmm, xmm/m128, imm8 + /// BLENDPD xmm1, xmm2/m128, imm8 + /// VBLENDPD xmm1, xmm2, xmm3/m128, imm8 /// public static Vector128 Blend(Vector128 left, Vector128 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask) - /// PBLENDVB xmm, xmm/m128, xmm + /// PBLENDVB xmm1, xmm2/m128, <XMM0> + /// VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4 /// public static Vector128 BlendVariable(Vector128 left, Vector128 right, Vector128 mask) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask) - /// PBLENDVB xmm, xmm/m128, xmm + /// PBLENDVB xmm1, xmm2/m128, <XMM0> + /// VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4 /// public static Vector128 BlendVariable(Vector128 left, Vector128 right, Vector128 mask) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask) - /// PBLENDVB xmm, xmm/m128, xmm + /// PBLENDVB xmm1, xmm2/m128, <XMM0> + /// VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4 /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector128 BlendVariable(Vector128 left, Vector128 right, Vector128 mask) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask) - /// PBLENDVB xmm, xmm/m128, xmm + /// PBLENDVB xmm1, xmm2/m128, <XMM0> + /// VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4 /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector128 BlendVariable(Vector128 left, Vector128 right, Vector128 mask) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask) - /// PBLENDVB xmm, xmm/m128, xmm + /// PBLENDVB xmm1, xmm2/m128, <XMM0> + /// VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4 /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector128 BlendVariable(Vector128 left, Vector128 right, Vector128 mask) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask) - /// PBLENDVB xmm, xmm/m128, xmm + /// PBLENDVB xmm1, xmm2/m128, <XMM0> + /// VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4 /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector128 BlendVariable(Vector128 left, Vector128 right, Vector128 mask) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask) - /// PBLENDVB xmm, xmm/m128, xmm + /// PBLENDVB xmm1, xmm2/m128, <XMM0> + /// VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4 /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector128 BlendVariable(Vector128 left, Vector128 right, Vector128 mask) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask) - /// PBLENDVB xmm, xmm/m128, xmm + /// PBLENDVB xmm1, xmm2/m128, <XMM0> + /// VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4 /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector128 BlendVariable(Vector128 left, Vector128 right, Vector128 mask) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_blendv_ps (__m128 a, __m128 b, __m128 mask) - /// BLENDVPS xmm, xmm/m128, xmm0 + /// BLENDVPS xmm1, xmm2/m128, <XMM0> + /// VBLENDVPS xmm1, xmm2, xmm3/m128, xmm4 /// public static Vector128 BlendVariable(Vector128 left, Vector128 right, Vector128 mask) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_blendv_pd (__m128d a, __m128d b, __m128d mask) - /// BLENDVPD xmm, xmm/m128, xmm0 + /// BLENDVPD xmm1, xmm2/m128, <XMM0> + /// VBLENDVPD xmm1, xmm2, xmm3/m128, xmm4 /// public static Vector128 BlendVariable(Vector128 left, Vector128 right, Vector128 mask) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_ceil_ps (__m128 a) - /// ROUNDPS xmm, xmm/m128, imm8(10) + /// ROUNDPS xmm1, xmm2/m128, imm8(10) + /// VROUNDPS xmm1, xmm2/m128, imm8(10) /// public static Vector128 Ceiling(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_ceil_pd (__m128d a) - /// ROUNDPD xmm, xmm/m128, imm8(10) + /// ROUNDPD xmm1, xmm2/m128, imm8(10) + /// VROUNDPD xmm1, xmm2/m128, imm8(10) /// public static Vector128 Ceiling(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_ceil_sd (__m128d a) - /// ROUNDSD xmm, xmm/m128, imm8(10) + /// __m128 _mm_ceil_ss (__m128 a) + /// ROUNDSS xmm1, xmm2/m128, imm8(10) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(10) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 CeilingScalar(Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 CeilingScalar(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_ceil_ss (__m128 a) - /// ROUNDSD xmm, xmm/m128, imm8(10) + /// __m128 _mm_ceil_ss (__m128 a, __m128 b) + /// ROUNDSS xmm1, xmm2/m128, imm8(10) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(10) + /// + public static Vector128 CeilingScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } + /// + /// __m128d _mm_ceil_sd (__m128d a) + /// ROUNDSD xmm1, xmm2/m128, imm8(10) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(10) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 CeilingScalar(Vector128 value) { throw new PlatformNotSupportedException(); } - + public static Vector128 CeilingScalar(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_ceil_sd (__m128d a, __m128d b) - /// ROUNDSD xmm, xmm/m128, imm8(10) + /// ROUNDSD xmm1, xmm2/m128, imm8(10) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(10) /// public static Vector128 CeilingScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } - /// - /// __m128 _mm_ceil_ss (__m128 a, __m128 b) - /// ROUNDSS xmm, xmm/m128, imm8(10) - /// - public static Vector128 CeilingScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cmpeq_epi64 (__m128i a, __m128i b) - /// PCMPEQQ xmm, xmm/m128 + /// PCMPEQQ xmm1, xmm2/m128 + /// VPCMPEQQ xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cmpeq_epi64 (__m128i a, __m128i b) - /// PCMPEQQ xmm, xmm/m128 + /// PCMPEQQ xmm1, xmm2/m128 + /// VPCMPEQQ xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cvtepi8_epi16 (__m128i a) - /// PMOVSXBW xmm, xmm + /// PMOVSXBW xmm1, xmm2/m64 + /// VPMOVSXBW xmm1, xmm2/m64 + /// VPMOVSXBW xmm1 {k1}{z}, xmm2/m64 /// public static Vector128 ConvertToVector128Int16(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cvtepu8_epi16 (__m128i a) - /// PMOVZXBW xmm, xmm + /// PMOVZXBW xmm1, xmm2/m64 + /// VPMOVZXBW xmm1, xmm2/m64 + /// VPMOVZXBW xmm1 {k1}{z}, xmm2/m64 /// public static Vector128 ConvertToVector128Int16(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cvtepi8_epi32 (__m128i a) - /// PMOVSXBD xmm, xmm + /// PMOVSXBD xmm1, xmm2/m32 + /// VPMOVSXBD xmm1, xmm2/m32 + /// VPMOVSXBD xmm1 {k1}{z}, xmm2/m32 /// public static Vector128 ConvertToVector128Int32(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cvtepu8_epi32 (__m128i a) - /// PMOVZXBD xmm, xmm + /// PMOVZXBD xmm1, xmm2/m32 + /// VPMOVZXBD xmm1, xmm2/m32 + /// VPMOVZXBD xmm1 {k1}{z}, xmm2/m32 /// public static Vector128 ConvertToVector128Int32(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cvtepi16_epi32 (__m128i a) - /// PMOVSXWD xmm, xmm + /// PMOVSXWD xmm1, xmm2/m64 + /// VPMOVSXWD xmm1, xmm2/m64 + /// VPMOVSXWD xmm1 {k1}{z}, xmm2/m64 /// public static Vector128 ConvertToVector128Int32(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cvtepu16_epi32 (__m128i a) - /// PMOVZXWD xmm, xmm + /// PMOVZXWD xmm1, xmm2/m64 + /// VPMOVZXWD xmm1, xmm2/m64 + /// VPMOVZXWD xmm1 {k1}{z}, xmm2/m64 /// public static Vector128 ConvertToVector128Int32(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cvtepi8_epi64 (__m128i a) - /// PMOVSXBQ xmm, xmm + /// PMOVSXBQ xmm1, xmm2/m16 + /// VPMOVSXBQ xmm1, xmm2/m16 + /// VPMOVSXBQ xmm1 {k1}{z}, xmm2/m16 /// public static Vector128 ConvertToVector128Int64(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cvtepu8_epi64 (__m128i a) - /// PMOVZXBQ xmm, xmm + /// PMOVZXBQ xmm1, xmm2/m16 + /// VPMOVZXBQ xmm1, xmm2/m16 + /// VPMOVZXBQ xmm1 {k1}{z}, xmm2/m16 /// public static Vector128 ConvertToVector128Int64(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cvtepi16_epi64 (__m128i a) - /// PMOVSXWQ xmm, xmm + /// PMOVSXWQ xmm1, xmm2/m32 + /// VPMOVSXWQ xmm1, xmm2/m32 + /// VPMOVSXWQ xmm1 {k1}{z}, xmm2/m32 /// public static Vector128 ConvertToVector128Int64(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cvtepu16_epi64 (__m128i a) - /// PMOVZXWQ xmm, xmm + /// PMOVZXWQ xmm1, xmm2/m32 + /// VPMOVZXWQ xmm1, xmm2/m32 + /// VPMOVZXWQ xmm1 {k1}{z}, xmm2/m32 /// public static Vector128 ConvertToVector128Int64(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cvtepi32_epi64 (__m128i a) - /// PMOVSXDQ xmm, xmm + /// PMOVSXDQ xmm1, xmm2/m64 + /// VPMOVSXDQ xmm1, xmm2/m64 + /// VPMOVSXDQ xmm1 {k1}{z}, xmm2/m64 /// public static Vector128 ConvertToVector128Int64(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_cvtepu32_epi64 (__m128i a) - /// PMOVZXDQ xmm, xmm + /// PMOVZXDQ xmm1, xmm2/m64 + /// VPMOVZXDQ xmm1, xmm2/m64 + /// VPMOVZXDQ xmm1 {k1}{z}, xmm2/m64 /// public static Vector128 ConvertToVector128Int64(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// PMOVSXBW xmm, m64 + /// PMOVSXBW xmm1, m64 + /// VPMOVSXBW xmm1, m64 + /// VPMOVSXBW xmm1 {k1}{z}, m64 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int16(sbyte* address) { throw new PlatformNotSupportedException(); } /// - /// PMOVZXBW xmm, m64 + /// PMOVZXBW xmm1, m64 + /// VPMOVZXBW xmm1, m64 + /// VPMOVZXBW xmm1 {k1}{z}, m64 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int16(byte* address) { throw new PlatformNotSupportedException(); } /// - /// PMOVSXBD xmm, m32 + /// PMOVSXBD xmm1, m32 + /// VPMOVSXBD xmm1, m32 + /// VPMOVSXBD xmm1 {k1}{z}, m32 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int32(sbyte* address) { throw new PlatformNotSupportedException(); } /// - /// PMOVZXBD xmm, m32 + /// PMOVZXBD xmm1, m32 + /// VPMOVZXBD xmm1, m32 + /// VPMOVZXBD xmm1 {k1}{z}, m32 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int32(byte* address) { throw new PlatformNotSupportedException(); } /// - /// PMOVSXWD xmm, m64 + /// PMOVSXWD xmm1, m64 + /// VPMOVSXWD xmm1, m64 + /// VPMOVSXWD xmm1 {k1}{z}, m64 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int32(short* address) { throw new PlatformNotSupportedException(); } /// - /// PMOVZXWD xmm, m64 + /// PMOVZXWD xmm1, m64 + /// VPMOVZXWD xmm1, m64 + /// VPMOVZXWD xmm1 {k1}{z}, m64 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int32(ushort* address) { throw new PlatformNotSupportedException(); } /// - /// PMOVSXBQ xmm, m16 + /// PMOVSXBQ xmm1, m16 + /// VPMOVSXBQ xmm1, m16 + /// VPMOVSXBQ xmm1 {k1}{z}, m16 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int64(sbyte* address) { throw new PlatformNotSupportedException(); } /// - /// PMOVZXBQ xmm, m16 + /// PMOVZXBQ xmm1, m16 + /// VPMOVZXBQ xmm1, m16 + /// VPMOVZXBQ xmm1 {k1}{z}, m16 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int64(byte* address) { throw new PlatformNotSupportedException(); } /// - /// PMOVSXWQ xmm, m32 + /// PMOVSXWQ xmm1, m32 + /// VPMOVSXWQ xmm1, m32 + /// VPMOVSXWQ xmm1 {k1}{z}, m32 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int64(short* address) { throw new PlatformNotSupportedException(); } /// - /// PMOVZXWQ xmm, m32 + /// PMOVZXWQ xmm1, m32 + /// VPMOVZXWQ xmm1, m32 + /// VPMOVZXWQ xmm1 {k1}{z}, m32 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int64(ushort* address) { throw new PlatformNotSupportedException(); } /// - /// PMOVSXDQ xmm, m64 + /// PMOVSXDQ xmm1, m64 + /// VPMOVSXDQ xmm1, m64 + /// VPMOVSXDQ xmm1 {k1}{z}, m64 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int64(int* address) { throw new PlatformNotSupportedException(); } /// - /// PMOVZXDQ xmm, m64 + /// PMOVZXDQ xmm1, m64 + /// VPMOVZXDQ xmm1, m64 + /// VPMOVZXDQ xmm1 {k1}{z}, m64 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int64(uint* address) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_dp_ps (__m128 a, __m128 b, const int imm8) - /// DPPS xmm, xmm/m128, imm8 + /// DPPS xmm1, xmm2/m128, imm8 + /// VDPPS xmm1, xmm2, xmm3/m128, imm8 /// public static Vector128 DotProduct(Vector128 left, Vector128 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_dp_pd (__m128d a, __m128d b, const int imm8) - /// DPPD xmm, xmm/m128, imm8 + /// DPPD xmm1, xmm2/m128, imm8 + /// VDPPD xmm1, xmm2, xmm3/m128, imm8 /// public static Vector128 DotProduct(Vector128 left, Vector128 right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); } /// /// int _mm_extract_epi8 (__m128i a, const int imm8) - /// PEXTRB reg/m8, xmm, imm8 + /// PEXTRB r/m8, xmm1, imm8 + /// VPEXTRB r/m8, xmm1, imm8 /// public static byte Extract(Vector128 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } /// /// int _mm_extract_epi32 (__m128i a, const int imm8) - /// PEXTRD reg/m32, xmm, imm8 + /// PEXTRD r/m32, xmm1, imm8 + /// VPEXTRD r/m32, xmm1, imm8 /// public static int Extract(Vector128 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } /// /// int _mm_extract_epi32 (__m128i a, const int imm8) - /// PEXTRD reg/m32, xmm, imm8 + /// PEXTRD r/m32, xmm1, imm8 + /// VPEXTRD r/m32, xmm1, imm8 /// public static uint Extract(Vector128 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } /// /// int _mm_extract_ps (__m128 a, const int imm8) - /// EXTRACTPS xmm, xmm/m32, imm8 + /// EXTRACTPS r/m32, xmm1, imm8 + /// VEXTRACTPS r/m32, xmm1, imm8 /// public static float Extract(Vector128 value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_floor_ps (__m128 a) - /// ROUNDPS xmm, xmm/m128, imm8(9) + /// ROUNDPS xmm1, xmm2/m128, imm8(9) + /// VROUNDPS xmm1, xmm2/m128, imm8(9) /// public static Vector128 Floor(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_floor_pd (__m128d a) - /// ROUNDPD xmm, xmm/m128, imm8(9) + /// ROUNDPD xmm1, xmm2/m128, imm8(9) + /// VROUNDPD xmm1, xmm2/m128, imm8(9) /// public static Vector128 Floor(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_floor_sd (__m128d a) - /// ROUNDSD xmm, xmm/m128, imm8(9) + /// __m128 _mm_floor_ss (__m128 a) + /// ROUNDSS xmm1, xmm2/m128, imm8(9) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(9) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 FloorScalar(Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 FloorScalar(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_floor_ss (__m128 a) - /// ROUNDSS xmm, xmm/m128, imm8(9) + /// __m128 _mm_floor_ss (__m128 a, __m128 b) + /// ROUNDSS xmm1, xmm2/m128, imm8(9) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(9) + /// + public static Vector128 FloorScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } + /// + /// __m128d _mm_floor_sd (__m128d a) + /// ROUNDSD xmm1, xmm2/m128, imm8(9) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(9) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 FloorScalar(Vector128 value) { throw new PlatformNotSupportedException(); } - + public static Vector128 FloorScalar(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128d _mm_floor_sd (__m128d a, __m128d b) - /// ROUNDSD xmm, xmm/m128, imm8(9) + /// ROUNDSD xmm1, xmm2/m128, imm8(9) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(9) /// public static Vector128 FloorScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } - /// - /// __m128 _mm_floor_ss (__m128 a, __m128 b) - /// ROUNDSS xmm, xmm/m128, imm8(9) - /// - public static Vector128 FloorScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_insert_epi8 (__m128i a, int i, const int imm8) - /// PINSRB xmm, reg/m8, imm8 + /// PINSRB xmm1, r/m8, imm8 + /// VPINSRB xmm1, xmm2, r/m8, imm8 /// public static Vector128 Insert(Vector128 value, sbyte data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_insert_epi8 (__m128i a, int i, const int imm8) - /// PINSRB xmm, reg/m8, imm8 + /// PINSRB xmm1, r/m8, imm8 + /// VPINSRB xmm1, xmm2, r/m8, imm8 /// public static Vector128 Insert(Vector128 value, byte data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_insert_epi32 (__m128i a, int i, const int imm8) - /// PINSRD xmm, reg/m32, imm8 + /// PINSRD xmm1, r/m32, imm8 + /// VPINSRD xmm1, xmm2, r/m32, imm8 /// public static Vector128 Insert(Vector128 value, int data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_insert_epi32 (__m128i a, int i, const int imm8) - /// PINSRD xmm, reg/m32, imm8 + /// PINSRD xmm1, r/m32, imm8 + /// VPINSRD xmm1, xmm2, r/m32, imm8 /// public static Vector128 Insert(Vector128 value, uint data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_insert_ps (__m128 a, __m128 b, const int imm8) - /// INSERTPS xmm, xmm/m32, imm8 + /// INSERTPS xmm1, xmm2/m32, imm8 + /// VINSERTPS xmm1, xmm2, xmm3/m32, imm8 /// public static Vector128 Insert(Vector128 value, Vector128 data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) + /// MOVNTDQA xmm1, m128 + /// VMOVNTDQA xmm1, m128 + /// + public static unsafe Vector128 LoadAlignedVector128NonTemporal(sbyte* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) + /// MOVNTDQA xmm1, m128 + /// VMOVNTDQA xmm1, m128 + /// + public static unsafe Vector128 LoadAlignedVector128NonTemporal(byte* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) + /// MOVNTDQA xmm1, m128 + /// VMOVNTDQA xmm1, m128 + /// + public static unsafe Vector128 LoadAlignedVector128NonTemporal(short* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) + /// MOVNTDQA xmm1, m128 + /// VMOVNTDQA xmm1, m128 + /// + public static unsafe Vector128 LoadAlignedVector128NonTemporal(ushort* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) + /// MOVNTDQA xmm1, m128 + /// VMOVNTDQA xmm1, m128 + /// + public static unsafe Vector128 LoadAlignedVector128NonTemporal(int* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) + /// MOVNTDQA xmm1, m128 + /// VMOVNTDQA xmm1, m128 + /// + public static unsafe Vector128 LoadAlignedVector128NonTemporal(uint* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) + /// MOVNTDQA xmm1, m128 + /// VMOVNTDQA xmm1, m128 + /// + public static unsafe Vector128 LoadAlignedVector128NonTemporal(long* address) { throw new PlatformNotSupportedException(); } + /// + /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) + /// MOVNTDQA xmm1, m128 + /// VMOVNTDQA xmm1, m128 + /// + public static unsafe Vector128 LoadAlignedVector128NonTemporal(ulong* address) { throw new PlatformNotSupportedException(); } + /// /// __m128i _mm_max_epi8 (__m128i a, __m128i b) - /// PMAXSB xmm, xmm/m128 + /// PMAXSB xmm1, xmm2/m128 + /// VPMAXSB xmm1, xmm2, xmm3/m128 + /// VPMAXSB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Max(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_max_epu16 (__m128i a, __m128i b) - /// PMAXUW xmm, xmm/m128 + /// PMAXUW xmm1, xmm2/m128 + /// VPMAXUW xmm1, xmm2, xmm3/m128 + /// VPMAXUW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Max(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_max_epi32 (__m128i a, __m128i b) - /// PMAXSD xmm, xmm/m128 + /// PMAXSD xmm1, xmm2/m128 + /// VPMAXSD xmm1, xmm2, xmm3/m128 + /// VPMAXSD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Max(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_max_epu32 (__m128i a, __m128i b) - /// PMAXUD xmm, xmm/m128 + /// PMAXUD xmm1, xmm2/m128 + /// VPMAXUD xmm1, xmm2, xmm3/m128 + /// VPMAXUD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Max(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_min_epi8 (__m128i a, __m128i b) - /// PMINSB xmm, xmm/m128 + /// PMINSB xmm1, xmm2/m128 + /// VPMINSB xmm1, xmm2, xmm3/m128 + /// VPMINSB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Min(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_min_epu16 (__m128i a, __m128i b) - /// PMINUW xmm, xmm/m128 + /// PMINUW xmm1, xmm2/m128 + /// VPMINUW xmm1, xmm2, xmm3/m128 + /// VPMINUW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Min(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_min_epi32 (__m128i a, __m128i b) - /// PMINSD xmm, xmm/m128 + /// PMINSD xmm1, xmm2/m128 + /// VPMINSD xmm1, xmm2, xmm3/m128 + /// VPMINSD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Min(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_min_epu32 (__m128i a, __m128i b) - /// PMINUD xmm, xmm/m128 + /// PMINUD xmm1, xmm2/m128 + /// VPMINUD xmm1, xmm2, xmm3/m128 + /// VPMINUD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Min(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_minpos_epu16 (__m128i a) - /// PHMINPOSUW xmm, xmm/m128 + /// PHMINPOSUW xmm1, xmm2/m128 + /// VPHMINPOSUW xmm1, xmm2/m128 /// public static Vector128 MinHorizontal(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_mpsadbw_epu8 (__m128i a, __m128i b, const int imm8) - /// MPSADBW xmm, xmm/m128, imm8 + /// MPSADBW xmm1, xmm2/m128, imm8 + /// VMPSADBW xmm1, xmm2, xmm3/m128, imm8 /// public static Vector128 MultipleSumAbsoluteDifferences(Vector128 left, Vector128 right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_mul_epi32 (__m128i a, __m128i b) - /// PMULDQ xmm, xmm/m128 + /// PMULDQ xmm1, xmm2/m128 + /// VPMULDQ xmm1, xmm2, xmm3/m128 + /// VPMULDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Multiply(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_mullo_epi32 (__m128i a, __m128i b) - /// PMULLD xmm, xmm/m128 + /// PMULLD xmm1, xmm2/m128 + /// VPMULLD xmm1, xmm2, xmm3/m128 + /// VPMULLD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 MultiplyLow(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_mullo_epi32 (__m128i a, __m128i b) - /// PMULLD xmm, xmm/m128 + /// PMULLD xmm1, xmm2/m128 + /// VPMULLD xmm1, xmm2, xmm3/m128 + /// VPMULLD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 MultiplyLow(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_packus_epi32 (__m128i a, __m128i b) - /// PACKUSDW xmm, xmm/m128 + /// PACKUSDW xmm1, xmm2/m128 + /// VPACKUSDW xmm1, xmm2, xmm3/m128 + /// VPACKUSDW xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 PackUnsignedSaturate(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_round_ps (__m128 a, int rounding) - /// ROUNDPS xmm, xmm/m128, imm8(8) - /// _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC - /// - public static Vector128 RoundToNearestInteger(Vector128 value) { throw new PlatformNotSupportedException(); } - /// - /// _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC; ROUNDPS xmm, xmm/m128, imm8(9) - /// - public static Vector128 RoundToNegativeInfinity(Vector128 value) { throw new PlatformNotSupportedException(); } - /// - /// _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC; ROUNDPS xmm, xmm/m128, imm8(10) - /// - public static Vector128 RoundToPositiveInfinity(Vector128 value) { throw new PlatformNotSupportedException(); } - /// - /// _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC; ROUNDPS xmm, xmm/m128, imm8(11) - /// - public static Vector128 RoundToZero(Vector128 value) { throw new PlatformNotSupportedException(); } - /// - /// _MM_FROUND_CUR_DIRECTION; ROUNDPS xmm, xmm/m128, imm8(4) + /// __m128 _mm_round_ps (__m128 a, _MM_FROUND_CUR_DIRECTION) + /// ROUNDPS xmm1, xmm2/m128, imm8(4) + /// VROUNDPS xmm1, xmm2/m128, imm8(4) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// public static Vector128 RoundCurrentDirection(Vector128 value) { throw new PlatformNotSupportedException(); } - - /// - /// __m128d _mm_round_pd (__m128d a, int rounding) - /// ROUNDPD xmm, xmm/m128, imm8(8) - /// _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC - /// - public static Vector128 RoundToNearestInteger(Vector128 value) { throw new PlatformNotSupportedException(); } - /// - /// _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC; ROUNDPD xmm, xmm/m128, imm8(9) - /// - public static Vector128 RoundToNegativeInfinity(Vector128 value) { throw new PlatformNotSupportedException(); } - /// - /// _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC; ROUNDPD xmm, xmm/m128, imm8(10) - /// - public static Vector128 RoundToPositiveInfinity(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC; ROUNDPD xmm, xmm/m128, imm8(11) - /// - public static Vector128 RoundToZero(Vector128 value) { throw new PlatformNotSupportedException(); } - /// - /// _MM_FROUND_CUR_DIRECTION; ROUNDPD xmm, xmm/m128, imm8(4) + /// __m128d _mm_round_pd (__m128d a, _MM_FROUND_CUR_DIRECTION) + /// ROUNDPD xmm1, xmm2/m128, imm8(4) + /// VROUNDPD xmm1, xmm2/m128, imm8(4) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// public static Vector128 RoundCurrentDirection(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_CUR_DIRECTION) - /// ROUNDSD xmm, xmm/m128, imm8(4) + /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_CUR_DIRECTION) + /// ROUNDSS xmm1, xmm2/m128, imm8(4) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(4) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundCurrentDirectionScalar(Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundCurrentDirectionScalar(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) - /// ROUNDSD xmm, xmm/m128, imm8(8) + /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_CUR_DIRECTION) + /// ROUNDSS xmm1, xmm2/m128, imm8(4) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(4) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToNearestIntegerScalar(Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundCurrentDirectionScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) - /// ROUNDSD xmm, xmm/m128, imm8(9) + /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_CUR_DIRECTION) + /// ROUNDSD xmm1, xmm2/m128, imm8(4) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(4) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToNegativeInfinityScalar(Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundCurrentDirectionScalar(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) - /// ROUNDSD xmm, xmm/m128, imm8(10) + /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_CUR_DIRECTION) + /// ROUNDSD xmm1, xmm2/m128, imm8(4) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(4) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToPositiveInfinityScalar(Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundCurrentDirectionScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } + /// - /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) - /// ROUNDSD xmm, xmm/m128, imm8(11) + /// __m128 _mm_round_ps (__m128 a, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) + /// ROUNDPS xmm1, xmm2/m128, imm8(8) + /// VROUNDPS xmm1, xmm2/m128, imm8(8) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToZeroScalar(Vector128 value) { throw new PlatformNotSupportedException(); } - + public static Vector128 RoundToNearestInteger(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_CUR_DIRECTION) - /// ROUNDSD xmm, xmm/m128, imm8(4) + /// __m128 _mm_round_pd (__m128 a, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) + /// ROUNDPD xmm1, xmm2/m128, imm8(8) + /// VROUNDPD xmm1, xmm2/m128, imm8(8) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundCurrentDirectionScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundToNearestInteger(Vector128 value) { throw new PlatformNotSupportedException(); } + /// - /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) - /// ROUNDSD xmm, xmm/m128, imm8(8) + /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) + /// ROUNDSS xmm1, xmm2/m128, imm8(8) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(8) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToNearestIntegerScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundToNearestIntegerScalar(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) - /// ROUNDSD xmm, xmm/m128, imm8(9) + /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) + /// ROUNDSS xmm1, xmm2/m128, imm8(8) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(8) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToNegativeInfinityScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundToNearestIntegerScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) - /// ROUNDSD xmm, xmm/m128, imm8(10) + /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) + /// ROUNDSD xmm1, xmm2/m128, imm8(8) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(8) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToPositiveInfinityScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundToNearestIntegerScalar(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) - /// ROUNDSD xmm, xmm/m128, imm8(11) + /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) + /// ROUNDSD xmm1, xmm2/m128, imm8(8) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(8) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToZeroScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundToNearestIntegerScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_CUR_DIRECTION) - /// ROUNDSS xmm, xmm/m128, imm8(4) + /// __m128 _mm_round_ps (__m128 a, _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) + /// ROUNDPS xmm1, xmm2/m128, imm8(9) + /// VROUNDPS xmm1, xmm2/m128, imm8(9) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundCurrentDirectionScalar(Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundToNegativeInfinity(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) - /// ROUNDSS xmm, xmm/m128, imm8(8) + /// __m128 _mm_round_pd (__m128 a, _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) + /// ROUNDPD xmm1, xmm2/m128, imm8(9) + /// VROUNDPD xmm1, xmm2/m128, imm8(9) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToNearestIntegerScalar(Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundToNegativeInfinity(Vector128 value) { throw new PlatformNotSupportedException(); } + /// /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) - /// ROUNDSS xmm, xmm/m128, imm8(9) + /// ROUNDSS xmm1, xmm2/m128, imm8(9) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(9) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// public static Vector128 RoundToNegativeInfinityScalar(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) - /// ROUNDSS xmm, xmm/m128, imm8(10) + /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) + /// ROUNDSS xmm1, xmm2/m128, imm8(9) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(9) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToPositiveInfinityScalar(Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundToNegativeInfinityScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC) - /// ROUNDSS xmm, xmm/m128, imm8(11) + /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) + /// ROUNDSD xmm1, xmm2/m128, imm8(9) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(9) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToZeroScalar(Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundToNegativeInfinityScalar(Vector128 value) { throw new PlatformNotSupportedException(); } + /// + /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) + /// ROUNDSD xmm1, xmm2/m128, imm8(9) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(9) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. + /// + public static Vector128 RoundToNegativeInfinityScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_CUR_DIRECTION) - /// ROUNDSS xmm, xmm/m128, imm8(4) + /// __m128 _mm_round_ps (__m128 a, _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) + /// ROUNDPS xmm1, xmm2/m128, imm8(10) + /// VROUNDPS xmm1, xmm2/m128, imm8(10) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundCurrentDirectionScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundToPositiveInfinity(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) - /// ROUNDSS xmm, xmm/m128, imm8(8) + /// __m128 _mm_round_pd (__m128 a, _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) + /// ROUNDPD xmm1, xmm2/m128, imm8(10) + /// VROUNDPD xmm1, xmm2/m128, imm8(10) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToNearestIntegerScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundToPositiveInfinity(Vector128 value) { throw new PlatformNotSupportedException(); } + /// - /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) - /// ROUNDSS xmm, xmm/m128, imm8(9) + /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) + /// ROUNDSS xmm1, xmm2/m128, imm8(10) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(10) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToNegativeInfinityScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundToPositiveInfinityScalar(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) - /// ROUNDSS xmm, xmm/m128, imm8(10) + /// ROUNDSS xmm1, xmm2/m128, imm8(10) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(10) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// public static Vector128 RoundToPositiveInfinityScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC) - /// ROUNDSS xmm, xmm/m128, imm8(11) - /// - public static Vector128 RoundToZeroScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } - - /// - /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) - /// MOVNTDQA xmm, m128 + /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) + /// ROUNDSD xmm1, xmm2/m128, imm8(10) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(10) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static unsafe Vector128 LoadAlignedVector128NonTemporal(sbyte* address) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundToPositiveInfinityScalar(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) - /// MOVNTDQA xmm, m128 + /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) + /// ROUNDSD xmm1, xmm2/m128, imm8(10) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(10) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static unsafe Vector128 LoadAlignedVector128NonTemporal(byte* address) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundToPositiveInfinityScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } + /// - /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) - /// MOVNTDQA xmm, m128 + /// __m128 _mm_round_ps (__m128 a, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) + /// ROUNDPS xmm1, xmm2/m128, imm8(11) + /// VROUNDPS xmm1, xmm2/m128, imm8(11) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static unsafe Vector128 LoadAlignedVector128NonTemporal(short* address) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundToZero(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) - /// MOVNTDQA xmm, m128 + /// __m128 _mm_round_pd (__m128 a, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) + /// ROUNDPD xmm1, xmm2/m128, imm8(11) + /// VROUNDPD xmm1, xmm2/m128, imm8(11) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static unsafe Vector128 LoadAlignedVector128NonTemporal(ushort* address) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundToZero(Vector128 value) { throw new PlatformNotSupportedException(); } + /// - /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) - /// MOVNTDQA xmm, m128 + /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC) + /// ROUNDSS xmm1, xmm2/m128, imm8(11) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(11) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static unsafe Vector128 LoadAlignedVector128NonTemporal(int* address) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundToZeroScalar(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) - /// MOVNTDQA xmm, m128 + /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC) + /// ROUNDSS xmm1, xmm2/m128, imm8(11) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(11) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static unsafe Vector128 LoadAlignedVector128NonTemporal(uint* address) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundToZeroScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) - /// MOVNTDQA xmm, m128 + /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC) + /// ROUNDSD xmm1, xmm2/m128, imm8(11) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(11) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static unsafe Vector128 LoadAlignedVector128NonTemporal(long* address) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundToZeroScalar(Vector128 value) { throw new PlatformNotSupportedException(); } /// - /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) - /// MOVNTDQA xmm, m128 + /// __m128d _mm_round_sd (__m128d a, __m128 b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC) + /// ROUNDSD xmm1, xmm2/m128, imm8(11) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(11) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static unsafe Vector128 LoadAlignedVector128NonTemporal(ulong* address) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundToZeroScalar(Vector128 upper, Vector128 value) { throw new PlatformNotSupportedException(); } /// /// int _mm_testc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; CF=1 + /// VPTEST xmm1, xmm2/m128 ; CF=1 /// public static bool TestC(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; CF=1 + /// VPTEST xmm1, xmm2/m128 ; CF=1 /// public static bool TestC(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; CF=1 + /// VPTEST xmm1, xmm2/m128 ; CF=1 /// public static bool TestC(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; CF=1 + /// VPTEST xmm1, xmm2/m128 ; CF=1 /// public static bool TestC(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; CF=1 + /// VPTEST xmm1, xmm2/m128 ; CF=1 /// public static bool TestC(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; CF=1 + /// VPTEST xmm1, xmm2/m128 ; CF=1 /// public static bool TestC(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; CF=1 + /// VPTEST xmm1, xmm2/m128 ; CF=1 /// public static bool TestC(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; CF=1 + /// VPTEST xmm1, xmm2/m128 ; CF=1 /// public static bool TestC(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testnzc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 + /// VPTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testnzc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 + /// VPTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testnzc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 + /// VPTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testnzc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 + /// VPTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testnzc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 + /// VPTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testnzc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 + /// VPTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testnzc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 + /// VPTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testnzc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 + /// VPTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testz_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=1 + /// VPTEST xmm1, xmm2/m128 ; ZF=1 /// public static bool TestZ(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testz_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=1 + /// VPTEST xmm1, xmm2/m128 ; ZF=1 /// public static bool TestZ(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testz_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=1 + /// VPTEST xmm1, xmm2/m128 ; ZF=1 /// public static bool TestZ(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testz_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=1 + /// VPTEST xmm1, xmm2/m128 ; ZF=1 /// public static bool TestZ(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testz_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=1 + /// VPTEST xmm1, xmm2/m128 ; ZF=1 /// public static bool TestZ(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testz_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=1 + /// VPTEST xmm1, xmm2/m128 ; ZF=1 /// public static bool TestZ(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testz_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=1 + /// VPTEST xmm1, xmm2/m128 ; ZF=1 /// public static bool TestZ(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// int _mm_testz_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=1 + /// VPTEST xmm1, xmm2/m128 ; ZF=1 /// public static bool TestZ(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse41.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse41.cs index 46e8b7aeb94698..b69727e61c76bc 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse41.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse41.cs @@ -26,26 +26,30 @@ internal X64() { } /// /// __int64 _mm_extract_epi64 (__m128i a, const int imm8) - /// PEXTRQ reg/m64, xmm, imm8 + /// PEXTRQ r/m64, xmm1, imm8 + /// VPEXTRQ r/m64, xmm1, imm8 /// This intrinsic is only available on 64-bit processes /// public static long Extract(Vector128 value, [ConstantExpected] byte index) => Extract(value, index); /// /// __int64 _mm_extract_epi64 (__m128i a, const int imm8) - /// PEXTRQ reg/m64, xmm, imm8 + /// PEXTRQ r/m64, xmm1, imm8 + /// VPEXTRQ r/m64, xmm1, imm8 /// This intrinsic is only available on 64-bit processes /// public static ulong Extract(Vector128 value, [ConstantExpected] byte index) => Extract(value, index); /// /// __m128i _mm_insert_epi64 (__m128i a, __int64 i, const int imm8) - /// PINSRQ xmm, reg/m64, imm8 + /// PINSRQ xmm1, r/m64, imm8 + /// VPINSRQ xmm1, xmm2, r/m64, imm8 /// This intrinsic is only available on 64-bit processes /// public static Vector128 Insert(Vector128 value, long data, [ConstantExpected] byte index) => Insert(value, data, index); /// /// __m128i _mm_insert_epi64 (__m128i a, __int64 i, const int imm8) - /// PINSRQ xmm, reg/m64, imm8 + /// PINSRQ xmm1, r/m64, imm8 + /// VPINSRQ xmm1, xmm2, r/m64, imm8 /// This intrinsic is only available on 64-bit processes /// public static Vector128 Insert(Vector128 value, ulong data, [ConstantExpected] byte index) => Insert(value, data, index); @@ -53,744 +57,944 @@ internal X64() { } /// /// __m128i _mm_blend_epi16 (__m128i a, __m128i b, const int imm8) - /// PBLENDW xmm, xmm/m128 imm8 + /// PBLENDW xmm1, xmm2/m128 imm8 + /// VPBLENDW xmm1, xmm2, xmm3/m128 imm8 /// public static Vector128 Blend(Vector128 left, Vector128 right, [ConstantExpected] byte control) => Blend(left, right, control); - /// /// __m128i _mm_blend_epi16 (__m128i a, __m128i b, const int imm8) - /// PBLENDW xmm, xmm/m128 imm8 + /// PBLENDW xmm1, xmm2/m128 imm8 + /// VPBLENDW xmm1, xmm2, xmm3/m128 imm8 /// public static Vector128 Blend(Vector128 left, Vector128 right, [ConstantExpected] byte control) => Blend(left, right, control); - /// /// __m128 _mm_blend_ps (__m128 a, __m128 b, const int imm8) - /// BLENDPS xmm, xmm/m128, imm8 + /// BLENDPS xmm1, xmm2/m128, imm8 + /// VBLENDPS xmm1, xmm2, xmm3/m128, imm8 /// public static Vector128 Blend(Vector128 left, Vector128 right, [ConstantExpected] byte control) => Blend(left, right, control); - /// /// __m128d _mm_blend_pd (__m128d a, __m128d b, const int imm8) - /// BLENDPD xmm, xmm/m128, imm8 + /// BLENDPD xmm1, xmm2/m128, imm8 + /// VBLENDPD xmm1, xmm2, xmm3/m128, imm8 /// public static Vector128 Blend(Vector128 left, Vector128 right, [ConstantExpected] byte control) => Blend(left, right, control); /// /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask) - /// PBLENDVB xmm, xmm/m128, xmm + /// PBLENDVB xmm1, xmm2/m128, <XMM0> + /// VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4 /// public static Vector128 BlendVariable(Vector128 left, Vector128 right, Vector128 mask) => BlendVariable(left, right, mask); /// /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask) - /// PBLENDVB xmm, xmm/m128, xmm + /// PBLENDVB xmm1, xmm2/m128, <XMM0> + /// VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4 /// public static Vector128 BlendVariable(Vector128 left, Vector128 right, Vector128 mask) => BlendVariable(left, right, mask); /// /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask) - /// PBLENDVB xmm, xmm/m128, xmm + /// PBLENDVB xmm1, xmm2/m128, <XMM0> + /// VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4 /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector128 BlendVariable(Vector128 left, Vector128 right, Vector128 mask) => BlendVariable(left, right, mask); /// /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask) - /// PBLENDVB xmm, xmm/m128, xmm + /// PBLENDVB xmm1, xmm2/m128, <XMM0> + /// VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4 /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector128 BlendVariable(Vector128 left, Vector128 right, Vector128 mask) => BlendVariable(left, right, mask); /// /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask) - /// PBLENDVB xmm, xmm/m128, xmm + /// PBLENDVB xmm1, xmm2/m128, <XMM0> + /// VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4 /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector128 BlendVariable(Vector128 left, Vector128 right, Vector128 mask) => BlendVariable(left, right, mask); /// /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask) - /// PBLENDVB xmm, xmm/m128, xmm + /// PBLENDVB xmm1, xmm2/m128, <XMM0> + /// VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4 /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector128 BlendVariable(Vector128 left, Vector128 right, Vector128 mask) => BlendVariable(left, right, mask); /// /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask) - /// PBLENDVB xmm, xmm/m128, xmm + /// PBLENDVB xmm1, xmm2/m128, <XMM0> + /// VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4 /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector128 BlendVariable(Vector128 left, Vector128 right, Vector128 mask) => BlendVariable(left, right, mask); /// /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask) - /// PBLENDVB xmm, xmm/m128, xmm + /// PBLENDVB xmm1, xmm2/m128, <XMM0> + /// VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4 /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements. /// public static Vector128 BlendVariable(Vector128 left, Vector128 right, Vector128 mask) => BlendVariable(left, right, mask); /// /// __m128 _mm_blendv_ps (__m128 a, __m128 b, __m128 mask) - /// BLENDVPS xmm, xmm/m128, xmm0 + /// BLENDVPS xmm1, xmm2/m128, <XMM0> + /// VBLENDVPS xmm1, xmm2, xmm3/m128, xmm4 /// public static Vector128 BlendVariable(Vector128 left, Vector128 right, Vector128 mask) => BlendVariable(left, right, mask); /// /// __m128d _mm_blendv_pd (__m128d a, __m128d b, __m128d mask) - /// BLENDVPD xmm, xmm/m128, xmm0 + /// BLENDVPD xmm1, xmm2/m128, <XMM0> + /// VBLENDVPD xmm1, xmm2, xmm3/m128, xmm4 /// public static Vector128 BlendVariable(Vector128 left, Vector128 right, Vector128 mask) => BlendVariable(left, right, mask); /// /// __m128 _mm_ceil_ps (__m128 a) - /// ROUNDPS xmm, xmm/m128, imm8(10) + /// ROUNDPS xmm1, xmm2/m128, imm8(10) + /// VROUNDPS xmm1, xmm2/m128, imm8(10) /// public static Vector128 Ceiling(Vector128 value) => Ceiling(value); /// /// __m128d _mm_ceil_pd (__m128d a) - /// ROUNDPD xmm, xmm/m128, imm8(10) + /// ROUNDPD xmm1, xmm2/m128, imm8(10) + /// VROUNDPD xmm1, xmm2/m128, imm8(10) /// public static Vector128 Ceiling(Vector128 value) => Ceiling(value); /// - /// __m128d _mm_ceil_sd (__m128d a) - /// ROUNDSD xmm, xmm/m128, imm8(10) + /// __m128 _mm_ceil_ss (__m128 a) + /// ROUNDSS xmm1, xmm2/m128, imm8(10) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(10) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 CeilingScalar(Vector128 value) => CeilingScalar(value); + public static Vector128 CeilingScalar(Vector128 value) => CeilingScalar(value); /// - /// __m128 _mm_ceil_ss (__m128 a) - /// ROUNDSD xmm, xmm/m128, imm8(10) + /// __m128 _mm_ceil_ss (__m128 a, __m128 b) + /// ROUNDSS xmm1, xmm2/m128, imm8(10) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(10) + /// + public static Vector128 CeilingScalar(Vector128 upper, Vector128 value) => CeilingScalar(upper, value); + /// + /// __m128d _mm_ceil_sd (__m128d a) + /// ROUNDSD xmm1, xmm2/m128, imm8(10) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(10) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 CeilingScalar(Vector128 value) => CeilingScalar(value); - + public static Vector128 CeilingScalar(Vector128 value) => CeilingScalar(value); /// /// __m128d _mm_ceil_sd (__m128d a, __m128d b) - /// ROUNDSD xmm, xmm/m128, imm8(10) + /// ROUNDSD xmm1, xmm2/m128, imm8(10) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(10) /// public static Vector128 CeilingScalar(Vector128 upper, Vector128 value) => CeilingScalar(upper, value); - /// - /// __m128 _mm_ceil_ss (__m128 a, __m128 b) - /// ROUNDSS xmm, xmm/m128, imm8(10) - /// - public static Vector128 CeilingScalar(Vector128 upper, Vector128 value) => CeilingScalar(upper, value); /// /// __m128i _mm_cmpeq_epi64 (__m128i a, __m128i b) - /// PCMPEQQ xmm, xmm/m128 + /// PCMPEQQ xmm1, xmm2/m128 + /// VPCMPEQQ xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareEqual(Vector128 left, Vector128 right) => CompareEqual(left, right); /// /// __m128i _mm_cmpeq_epi64 (__m128i a, __m128i b) - /// PCMPEQQ xmm, xmm/m128 + /// PCMPEQQ xmm1, xmm2/m128 + /// VPCMPEQQ xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareEqual(Vector128 left, Vector128 right) => CompareEqual(left, right); /// /// __m128i _mm_cvtepi8_epi16 (__m128i a) - /// PMOVSXBW xmm, xmm + /// PMOVSXBW xmm1, xmm2/m64 + /// VPMOVSXBW xmm1, xmm2/m64 + /// VPMOVSXBW xmm1 {k1}{z}, xmm2/m64 /// public static Vector128 ConvertToVector128Int16(Vector128 value) => ConvertToVector128Int16(value); /// /// __m128i _mm_cvtepu8_epi16 (__m128i a) - /// PMOVZXBW xmm, xmm + /// PMOVZXBW xmm1, xmm2/m64 + /// VPMOVZXBW xmm1, xmm2/m64 + /// VPMOVZXBW xmm1 {k1}{z}, xmm2/m64 /// public static Vector128 ConvertToVector128Int16(Vector128 value) => ConvertToVector128Int16(value); /// /// __m128i _mm_cvtepi8_epi32 (__m128i a) - /// PMOVSXBD xmm, xmm + /// PMOVSXBD xmm1, xmm2/m32 + /// VPMOVSXBD xmm1, xmm2/m32 + /// VPMOVSXBD xmm1 {k1}{z}, xmm2/m32 /// public static Vector128 ConvertToVector128Int32(Vector128 value) => ConvertToVector128Int32(value); /// /// __m128i _mm_cvtepu8_epi32 (__m128i a) - /// PMOVZXBD xmm, xmm + /// PMOVZXBD xmm1, xmm2/m32 + /// VPMOVZXBD xmm1, xmm2/m32 + /// VPMOVZXBD xmm1 {k1}{z}, xmm2/m32 /// public static Vector128 ConvertToVector128Int32(Vector128 value) => ConvertToVector128Int32(value); /// /// __m128i _mm_cvtepi16_epi32 (__m128i a) - /// PMOVSXWD xmm, xmm + /// PMOVSXWD xmm1, xmm2/m64 + /// VPMOVSXWD xmm1, xmm2/m64 + /// VPMOVSXWD xmm1 {k1}{z}, xmm2/m64 /// public static Vector128 ConvertToVector128Int32(Vector128 value) => ConvertToVector128Int32(value); /// /// __m128i _mm_cvtepu16_epi32 (__m128i a) - /// PMOVZXWD xmm, xmm + /// PMOVZXWD xmm1, xmm2/m64 + /// VPMOVZXWD xmm1, xmm2/m64 + /// VPMOVZXWD xmm1 {k1}{z}, xmm2/m64 /// public static Vector128 ConvertToVector128Int32(Vector128 value) => ConvertToVector128Int32(value); /// /// __m128i _mm_cvtepi8_epi64 (__m128i a) - /// PMOVSXBQ xmm, xmm + /// PMOVSXBQ xmm1, xmm2/m16 + /// VPMOVSXBQ xmm1, xmm2/m16 + /// VPMOVSXBQ xmm1 {k1}{z}, xmm2/m16 /// public static Vector128 ConvertToVector128Int64(Vector128 value) => ConvertToVector128Int64(value); /// /// __m128i _mm_cvtepu8_epi64 (__m128i a) - /// PMOVZXBQ xmm, xmm + /// PMOVZXBQ xmm1, xmm2/m16 + /// VPMOVZXBQ xmm1, xmm2/m16 + /// VPMOVZXBQ xmm1 {k1}{z}, xmm2/m16 /// public static Vector128 ConvertToVector128Int64(Vector128 value) => ConvertToVector128Int64(value); /// /// __m128i _mm_cvtepi16_epi64 (__m128i a) - /// PMOVSXWQ xmm, xmm + /// PMOVSXWQ xmm1, xmm2/m32 + /// VPMOVSXWQ xmm1, xmm2/m32 + /// VPMOVSXWQ xmm1 {k1}{z}, xmm2/m32 /// public static Vector128 ConvertToVector128Int64(Vector128 value) => ConvertToVector128Int64(value); /// /// __m128i _mm_cvtepu16_epi64 (__m128i a) - /// PMOVZXWQ xmm, xmm + /// PMOVZXWQ xmm1, xmm2/m32 + /// VPMOVZXWQ xmm1, xmm2/m32 + /// VPMOVZXWQ xmm1 {k1}{z}, xmm2/m32 /// public static Vector128 ConvertToVector128Int64(Vector128 value) => ConvertToVector128Int64(value); /// /// __m128i _mm_cvtepi32_epi64 (__m128i a) - /// PMOVSXDQ xmm, xmm + /// PMOVSXDQ xmm1, xmm2/m64 + /// VPMOVSXDQ xmm1, xmm2/m64 + /// VPMOVSXDQ xmm1 {k1}{z}, xmm2/m64 /// public static Vector128 ConvertToVector128Int64(Vector128 value) => ConvertToVector128Int64(value); /// /// __m128i _mm_cvtepu32_epi64 (__m128i a) - /// PMOVZXDQ xmm, xmm + /// PMOVZXDQ xmm1, xmm2/m64 + /// VPMOVZXDQ xmm1, xmm2/m64 + /// VPMOVZXDQ xmm1 {k1}{z}, xmm2/m64 /// public static Vector128 ConvertToVector128Int64(Vector128 value) => ConvertToVector128Int64(value); /// - /// PMOVSXBW xmm, m64 + /// PMOVSXBW xmm1, m64 + /// VPMOVSXBW xmm1, m64 + /// VPMOVSXBW xmm1 {k1}{z}, m64 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int16(sbyte* address) => ConvertToVector128Int16(address); /// - /// PMOVZXBW xmm, m64 + /// PMOVZXBW xmm1, m64 + /// VPMOVZXBW xmm1, m64 + /// VPMOVZXBW xmm1 {k1}{z}, m64 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int16(byte* address) => ConvertToVector128Int16(address); /// - /// PMOVSXBD xmm, m32 + /// PMOVSXBD xmm1, m32 + /// VPMOVSXBD xmm1, m32 + /// VPMOVSXBD xmm1 {k1}{z}, m32 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int32(sbyte* address) => ConvertToVector128Int32(address); /// - /// PMOVZXBD xmm, m32 + /// PMOVZXBD xmm1, m32 + /// VPMOVZXBD xmm1, m32 + /// VPMOVZXBD xmm1 {k1}{z}, m32 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int32(byte* address) => ConvertToVector128Int32(address); /// - /// PMOVSXWD xmm, m64 + /// PMOVSXWD xmm1, m64 + /// VPMOVSXWD xmm1, m64 + /// VPMOVSXWD xmm1 {k1}{z}, m64 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int32(short* address) => ConvertToVector128Int32(address); /// - /// PMOVZXWD xmm, m64 + /// PMOVZXWD xmm1, m64 + /// VPMOVZXWD xmm1, m64 + /// VPMOVZXWD xmm1 {k1}{z}, m64 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int32(ushort* address) => ConvertToVector128Int32(address); /// - /// PMOVSXBQ xmm, m16 + /// PMOVSXBQ xmm1, m16 + /// VPMOVSXBQ xmm1, m16 + /// VPMOVSXBQ xmm1 {k1}{z}, m16 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int64(sbyte* address) => ConvertToVector128Int64(address); /// - /// PMOVZXBQ xmm, m16 + /// PMOVZXBQ xmm1, m16 + /// VPMOVZXBQ xmm1, m16 + /// VPMOVZXBQ xmm1 {k1}{z}, m16 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int64(byte* address) => ConvertToVector128Int64(address); /// - /// PMOVSXWQ xmm, m32 + /// PMOVSXWQ xmm1, m32 + /// VPMOVSXWQ xmm1, m32 + /// VPMOVSXWQ xmm1 {k1}{z}, m32 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int64(short* address) => ConvertToVector128Int64(address); /// - /// PMOVZXWQ xmm, m32 + /// PMOVZXWQ xmm1, m32 + /// VPMOVZXWQ xmm1, m32 + /// VPMOVZXWQ xmm1 {k1}{z}, m32 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int64(ushort* address) => ConvertToVector128Int64(address); /// - /// PMOVSXDQ xmm, m64 + /// PMOVSXDQ xmm1, m64 + /// VPMOVSXDQ xmm1, m64 + /// VPMOVSXDQ xmm1 {k1}{z}, m64 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int64(int* address) => ConvertToVector128Int64(address); /// - /// PMOVZXDQ xmm, m64 + /// PMOVZXDQ xmm1, m64 + /// VPMOVZXDQ xmm1, m64 + /// VPMOVZXDQ xmm1 {k1}{z}, m64 /// The native signature does not exist. We provide this additional overload for completeness. /// public static unsafe Vector128 ConvertToVector128Int64(uint* address) => ConvertToVector128Int64(address); /// /// __m128 _mm_dp_ps (__m128 a, __m128 b, const int imm8) - /// DPPS xmm, xmm/m128, imm8 + /// DPPS xmm1, xmm2/m128, imm8 + /// VDPPS xmm1, xmm2, xmm3/m128, imm8 /// public static Vector128 DotProduct(Vector128 left, Vector128 right, [ConstantExpected] byte control) => DotProduct(left, right, control); /// /// __m128d _mm_dp_pd (__m128d a, __m128d b, const int imm8) - /// DPPD xmm, xmm/m128, imm8 + /// DPPD xmm1, xmm2/m128, imm8 + /// VDPPD xmm1, xmm2, xmm3/m128, imm8 /// public static Vector128 DotProduct(Vector128 left, Vector128 right, [ConstantExpected] byte control) => DotProduct(left, right, control); /// /// int _mm_extract_epi8 (__m128i a, const int imm8) - /// PEXTRB reg/m8, xmm, imm8 + /// PEXTRB r/m8, xmm1, imm8 + /// VPEXTRB r/m8, xmm1, imm8 /// public static byte Extract(Vector128 value, [ConstantExpected] byte index) => Extract(value, index); /// /// int _mm_extract_epi32 (__m128i a, const int imm8) - /// PEXTRD reg/m32, xmm, imm8 + /// PEXTRD r/m32, xmm1, imm8 + /// VPEXTRD r/m32, xmm1, imm8 /// public static int Extract(Vector128 value, [ConstantExpected] byte index) => Extract(value, index); /// /// int _mm_extract_epi32 (__m128i a, const int imm8) - /// PEXTRD reg/m32, xmm, imm8 + /// PEXTRD r/m32, xmm1, imm8 + /// VPEXTRD r/m32, xmm1, imm8 /// public static uint Extract(Vector128 value, [ConstantExpected] byte index) => Extract(value, index); /// /// int _mm_extract_ps (__m128 a, const int imm8) - /// EXTRACTPS xmm, xmm/m32, imm8 + /// EXTRACTPS r/m32, xmm1, imm8 + /// VEXTRACTPS r/m32, xmm1, imm8 /// public static float Extract(Vector128 value, [ConstantExpected] byte index) => Extract(value, index); /// /// __m128 _mm_floor_ps (__m128 a) - /// ROUNDPS xmm, xmm/m128, imm8(9) + /// ROUNDPS xmm1, xmm2/m128, imm8(9) + /// VROUNDPS xmm1, xmm2/m128, imm8(9) /// public static Vector128 Floor(Vector128 value) => Floor(value); /// /// __m128d _mm_floor_pd (__m128d a) - /// ROUNDPD xmm, xmm/m128, imm8(9) + /// ROUNDPD xmm1, xmm2/m128, imm8(9) + /// VROUNDPD xmm1, xmm2/m128, imm8(9) /// public static Vector128 Floor(Vector128 value) => Floor(value); /// - /// __m128d _mm_floor_sd (__m128d a) - /// ROUNDSD xmm, xmm/m128, imm8(9) + /// __m128 _mm_floor_ss (__m128 a) + /// ROUNDSS xmm1, xmm2/m128, imm8(9) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(9) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 FloorScalar(Vector128 value) => FloorScalar(value); + public static Vector128 FloorScalar(Vector128 value) => FloorScalar(value); /// - /// __m128 _mm_floor_ss (__m128 a) - /// ROUNDSS xmm, xmm/m128, imm8(9) + /// __m128 _mm_floor_ss (__m128 a, __m128 b) + /// ROUNDSS xmm1, xmm2/m128, imm8(9) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(9) + /// + public static Vector128 FloorScalar(Vector128 upper, Vector128 value) => FloorScalar(upper, value); + /// + /// __m128d _mm_floor_sd (__m128d a) + /// ROUNDSD xmm1, xmm2/m128, imm8(9) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(9) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 FloorScalar(Vector128 value) => FloorScalar(value); - + public static Vector128 FloorScalar(Vector128 value) => FloorScalar(value); /// /// __m128d _mm_floor_sd (__m128d a, __m128d b) - /// ROUNDSD xmm, xmm/m128, imm8(9) + /// ROUNDSD xmm1, xmm2/m128, imm8(9) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(9) /// public static Vector128 FloorScalar(Vector128 upper, Vector128 value) => FloorScalar(upper, value); - /// - /// __m128 _mm_floor_ss (__m128 a, __m128 b) - /// ROUNDSS xmm, xmm/m128, imm8(9) - /// - public static Vector128 FloorScalar(Vector128 upper, Vector128 value) => FloorScalar(upper, value); /// /// __m128i _mm_insert_epi8 (__m128i a, int i, const int imm8) - /// PINSRB xmm, reg/m8, imm8 + /// PINSRB xmm1, r/m8, imm8 + /// VPINSRB xmm1, xmm2, r/m8, imm8 /// public static Vector128 Insert(Vector128 value, sbyte data, [ConstantExpected] byte index) => Insert(value, data, index); /// /// __m128i _mm_insert_epi8 (__m128i a, int i, const int imm8) - /// PINSRB xmm, reg/m8, imm8 + /// PINSRB xmm1, r/m8, imm8 + /// VPINSRB xmm1, xmm2, r/m8, imm8 /// public static Vector128 Insert(Vector128 value, byte data, [ConstantExpected] byte index) => Insert(value, data, index); /// /// __m128i _mm_insert_epi32 (__m128i a, int i, const int imm8) - /// PINSRD xmm, reg/m32, imm8 + /// PINSRD xmm1, r/m32, imm8 + /// VPINSRD xmm1, xmm2, r/m32, imm8 /// public static Vector128 Insert(Vector128 value, int data, [ConstantExpected] byte index) => Insert(value, data, index); /// /// __m128i _mm_insert_epi32 (__m128i a, int i, const int imm8) - /// PINSRD xmm, reg/m32, imm8 + /// PINSRD xmm1, r/m32, imm8 + /// VPINSRD xmm1, xmm2, r/m32, imm8 /// public static Vector128 Insert(Vector128 value, uint data, [ConstantExpected] byte index) => Insert(value, data, index); /// /// __m128 _mm_insert_ps (__m128 a, __m128 b, const int imm8) - /// INSERTPS xmm, xmm/m32, imm8 + /// INSERTPS xmm1, xmm2/m32, imm8 + /// VINSERTPS xmm1, xmm2, xmm3/m32, imm8 /// public static Vector128 Insert(Vector128 value, Vector128 data, [ConstantExpected] byte index) => Insert(value, data, index); + /// + /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) + /// MOVNTDQA xmm1, m128 + /// VMOVNTDQA xmm1, m128 + /// + public static unsafe Vector128 LoadAlignedVector128NonTemporal(sbyte* address) => LoadAlignedVector128NonTemporal(address); + /// + /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) + /// MOVNTDQA xmm1, m128 + /// VMOVNTDQA xmm1, m128 + /// + public static unsafe Vector128 LoadAlignedVector128NonTemporal(byte* address) => LoadAlignedVector128NonTemporal(address); + /// + /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) + /// MOVNTDQA xmm1, m128 + /// VMOVNTDQA xmm1, m128 + /// + public static unsafe Vector128 LoadAlignedVector128NonTemporal(short* address) => LoadAlignedVector128NonTemporal(address); + /// + /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) + /// MOVNTDQA xmm1, m128 + /// VMOVNTDQA xmm1, m128 + /// + public static unsafe Vector128 LoadAlignedVector128NonTemporal(ushort* address) => LoadAlignedVector128NonTemporal(address); + /// + /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) + /// MOVNTDQA xmm1, m128 + /// VMOVNTDQA xmm1, m128 + /// + public static unsafe Vector128 LoadAlignedVector128NonTemporal(int* address) => LoadAlignedVector128NonTemporal(address); + /// + /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) + /// MOVNTDQA xmm1, m128 + /// VMOVNTDQA xmm1, m128 + /// + public static unsafe Vector128 LoadAlignedVector128NonTemporal(uint* address) => LoadAlignedVector128NonTemporal(address); + /// + /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) + /// MOVNTDQA xmm1, m128 + /// VMOVNTDQA xmm1, m128 + /// + public static unsafe Vector128 LoadAlignedVector128NonTemporal(long* address) => LoadAlignedVector128NonTemporal(address); + /// + /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) + /// MOVNTDQA xmm1, m128 + /// VMOVNTDQA xmm1, m128 + /// + public static unsafe Vector128 LoadAlignedVector128NonTemporal(ulong* address) => LoadAlignedVector128NonTemporal(address); + /// /// __m128i _mm_max_epi8 (__m128i a, __m128i b) - /// PMAXSB xmm, xmm/m128 + /// PMAXSB xmm1, xmm2/m128 + /// VPMAXSB xmm1, xmm2, xmm3/m128 + /// VPMAXSB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Max(Vector128 left, Vector128 right) => Max(left, right); /// /// __m128i _mm_max_epu16 (__m128i a, __m128i b) - /// PMAXUW xmm, xmm/m128 + /// PMAXUW xmm1, xmm2/m128 + /// VPMAXUW xmm1, xmm2, xmm3/m128 + /// VPMAXUW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Max(Vector128 left, Vector128 right) => Max(left, right); /// /// __m128i _mm_max_epi32 (__m128i a, __m128i b) - /// PMAXSD xmm, xmm/m128 + /// PMAXSD xmm1, xmm2/m128 + /// VPMAXSD xmm1, xmm2, xmm3/m128 + /// VPMAXSD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Max(Vector128 left, Vector128 right) => Max(left, right); /// /// __m128i _mm_max_epu32 (__m128i a, __m128i b) - /// PMAXUD xmm, xmm/m128 + /// PMAXUD xmm1, xmm2/m128 + /// VPMAXUD xmm1, xmm2, xmm3/m128 + /// VPMAXUD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Max(Vector128 left, Vector128 right) => Max(left, right); /// /// __m128i _mm_min_epi8 (__m128i a, __m128i b) - /// PMINSB xmm, xmm/m128 + /// PMINSB xmm1, xmm2/m128 + /// VPMINSB xmm1, xmm2, xmm3/m128 + /// VPMINSB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Min(Vector128 left, Vector128 right) => Min(left, right); /// /// __m128i _mm_min_epu16 (__m128i a, __m128i b) - /// PMINUW xmm, xmm/m128 + /// PMINUW xmm1, xmm2/m128 + /// VPMINUW xmm1, xmm2, xmm3/m128 + /// VPMINUW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Min(Vector128 left, Vector128 right) => Min(left, right); /// /// __m128i _mm_min_epi32 (__m128i a, __m128i b) - /// PMINSD xmm, xmm/m128 + /// PMINSD xmm1, xmm2/m128 + /// VPMINSD xmm1, xmm2, xmm3/m128 + /// VPMINSD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Min(Vector128 left, Vector128 right) => Min(left, right); /// /// __m128i _mm_min_epu32 (__m128i a, __m128i b) - /// PMINUD xmm, xmm/m128 + /// PMINUD xmm1, xmm2/m128 + /// VPMINUD xmm1, xmm2, xmm3/m128 + /// VPMINUD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 Min(Vector128 left, Vector128 right) => Min(left, right); /// /// __m128i _mm_minpos_epu16 (__m128i a) - /// PHMINPOSUW xmm, xmm/m128 + /// PHMINPOSUW xmm1, xmm2/m128 + /// VPHMINPOSUW xmm1, xmm2/m128 /// public static Vector128 MinHorizontal(Vector128 value) => MinHorizontal(value); /// /// __m128i _mm_mpsadbw_epu8 (__m128i a, __m128i b, const int imm8) - /// MPSADBW xmm, xmm/m128, imm8 + /// MPSADBW xmm1, xmm2/m128, imm8 + /// VMPSADBW xmm1, xmm2, xmm3/m128, imm8 /// public static Vector128 MultipleSumAbsoluteDifferences(Vector128 left, Vector128 right, [ConstantExpected] byte mask) => MultipleSumAbsoluteDifferences(left, right, mask); /// /// __m128i _mm_mul_epi32 (__m128i a, __m128i b) - /// PMULDQ xmm, xmm/m128 + /// PMULDQ xmm1, xmm2/m128 + /// VPMULDQ xmm1, xmm2, xmm3/m128 + /// VPMULDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst /// public static Vector128 Multiply(Vector128 left, Vector128 right) => Multiply(left, right); /// /// __m128i _mm_mullo_epi32 (__m128i a, __m128i b) - /// PMULLD xmm, xmm/m128 + /// PMULLD xmm1, xmm2/m128 + /// VPMULLD xmm1, xmm2, xmm3/m128 + /// VPMULLD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 MultiplyLow(Vector128 left, Vector128 right) => MultiplyLow(left, right); /// /// __m128i _mm_mullo_epi32 (__m128i a, __m128i b) - /// PMULLD xmm, xmm/m128 + /// PMULLD xmm1, xmm2/m128 + /// VPMULLD xmm1, xmm2, xmm3/m128 + /// VPMULLD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 MultiplyLow(Vector128 left, Vector128 right) => MultiplyLow(left, right); /// /// __m128i _mm_packus_epi32 (__m128i a, __m128i b) - /// PACKUSDW xmm, xmm/m128 + /// PACKUSDW xmm1, xmm2/m128 + /// VPACKUSDW xmm1, xmm2, xmm3/m128 + /// VPACKUSDW xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst /// public static Vector128 PackUnsignedSaturate(Vector128 left, Vector128 right) => PackUnsignedSaturate(left, right); /// - /// __m128 _mm_round_ps (__m128 a, int rounding) - /// ROUNDPS xmm, xmm/m128, imm8(8) - /// _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC - /// - public static Vector128 RoundToNearestInteger(Vector128 value) => RoundToNearestInteger(value); - /// - /// _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC; ROUNDPS xmm, xmm/m128, imm8(9) - /// - public static Vector128 RoundToNegativeInfinity(Vector128 value) => RoundToNegativeInfinity(value); - /// - /// _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC; ROUNDPS xmm, xmm/m128, imm8(10) - /// - public static Vector128 RoundToPositiveInfinity(Vector128 value) => RoundToPositiveInfinity(value); - /// - /// _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC; ROUNDPS xmm, xmm/m128, imm8(11) - /// - public static Vector128 RoundToZero(Vector128 value) => RoundToZero(value); - /// - /// _MM_FROUND_CUR_DIRECTION; ROUNDPS xmm, xmm/m128, imm8(4) + /// __m128 _mm_round_ps (__m128 a, _MM_FROUND_CUR_DIRECTION) + /// ROUNDPS xmm1, xmm2/m128, imm8(4) + /// VROUNDPS xmm1, xmm2/m128, imm8(4) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// public static Vector128 RoundCurrentDirection(Vector128 value) => RoundCurrentDirection(value); - - /// - /// __m128d _mm_round_pd (__m128d a, int rounding) - /// ROUNDPD xmm, xmm/m128, imm8(8) - /// _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC - /// - public static Vector128 RoundToNearestInteger(Vector128 value) => RoundToNearestInteger(value); - /// - /// _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC; ROUNDPD xmm, xmm/m128, imm8(9) - /// - public static Vector128 RoundToNegativeInfinity(Vector128 value) => RoundToNegativeInfinity(value); - /// - /// _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC; ROUNDPD xmm, xmm/m128, imm8(10) - /// - public static Vector128 RoundToPositiveInfinity(Vector128 value) => RoundToPositiveInfinity(value); /// - /// _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC; ROUNDPD xmm, xmm/m128, imm8(11) - /// - public static Vector128 RoundToZero(Vector128 value) => RoundToZero(value); - /// - /// _MM_FROUND_CUR_DIRECTION; ROUNDPD xmm, xmm/m128, imm8(4) + /// __m128d _mm_round_pd (__m128d a, _MM_FROUND_CUR_DIRECTION) + /// ROUNDPD xmm1, xmm2/m128, imm8(4) + /// VROUNDPD xmm1, xmm2/m128, imm8(4) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// public static Vector128 RoundCurrentDirection(Vector128 value) => RoundCurrentDirection(value); /// - /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_CUR_DIRECTION) - /// ROUNDSD xmm, xmm/m128, imm8(4) + /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_CUR_DIRECTION) + /// ROUNDSS xmm1, xmm2/m128, imm8(4) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(4) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundCurrentDirectionScalar(Vector128 value) => RoundCurrentDirectionScalar(value); + public static Vector128 RoundCurrentDirectionScalar(Vector128 value) => RoundCurrentDirectionScalar(value); /// - /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) - /// ROUNDSD xmm, xmm/m128, imm8(8) + /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_CUR_DIRECTION) + /// ROUNDSS xmm1, xmm2/m128, imm8(4) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(4) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToNearestIntegerScalar(Vector128 value) => RoundToNearestIntegerScalar(value); + public static Vector128 RoundCurrentDirectionScalar(Vector128 upper, Vector128 value) => RoundCurrentDirectionScalar(upper, value); /// - /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) - /// ROUNDSD xmm, xmm/m128, imm8(9) + /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_CUR_DIRECTION) + /// ROUNDSD xmm1, xmm2/m128, imm8(4) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(4) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToNegativeInfinityScalar(Vector128 value) => RoundToNegativeInfinityScalar(value); + public static Vector128 RoundCurrentDirectionScalar(Vector128 value) => RoundCurrentDirectionScalar(value); /// - /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) - /// ROUNDSD xmm, xmm/m128, imm8(10) + /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_CUR_DIRECTION) + /// ROUNDSD xmm1, xmm2/m128, imm8(4) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(4) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToPositiveInfinityScalar(Vector128 value) => RoundToPositiveInfinityScalar(value); + public static Vector128 RoundCurrentDirectionScalar(Vector128 upper, Vector128 value) => RoundCurrentDirectionScalar(upper, value); + /// - /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) - /// ROUNDSD xmm, xmm/m128, imm8(11) + /// __m128 _mm_round_ps (__m128 a, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) + /// ROUNDPS xmm1, xmm2/m128, imm8(8) + /// VROUNDPS xmm1, xmm2/m128, imm8(8) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToZeroScalar(Vector128 value) => RoundToZeroScalar(value); - + public static Vector128 RoundToNearestInteger(Vector128 value) => RoundToNearestInteger(value); /// - /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_CUR_DIRECTION) - /// ROUNDSD xmm, xmm/m128, imm8(4) + /// __m128 _mm_round_pd (__m128 a, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) + /// ROUNDPD xmm1, xmm2/m128, imm8(8) + /// VROUNDPD xmm1, xmm2/m128, imm8(8) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundCurrentDirectionScalar(Vector128 upper, Vector128 value) => RoundCurrentDirectionScalar(upper, value); + public static Vector128 RoundToNearestInteger(Vector128 value) => RoundToNearestInteger(value); + /// - /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) - /// ROUNDSD xmm, xmm/m128, imm8(8) + /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) + /// ROUNDSS xmm1, xmm2/m128, imm8(8) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(8) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToNearestIntegerScalar(Vector128 upper, Vector128 value) => RoundToNearestIntegerScalar(upper, value); + public static Vector128 RoundToNearestIntegerScalar(Vector128 value) => RoundToNearestIntegerScalar(value); /// - /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) - /// ROUNDSD xmm, xmm/m128, imm8(9) + /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) + /// ROUNDSS xmm1, xmm2/m128, imm8(8) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(8) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToNegativeInfinityScalar(Vector128 upper, Vector128 value) => RoundToNegativeInfinityScalar(upper, value); + public static Vector128 RoundToNearestIntegerScalar(Vector128 upper, Vector128 value) => RoundToNearestIntegerScalar(upper, value); /// - /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) - /// ROUNDSD xmm, xmm/m128, imm8(10) + /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) + /// ROUNDSD xmm1, xmm2/m128, imm8(8) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(8) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToPositiveInfinityScalar(Vector128 upper, Vector128 value) => RoundToPositiveInfinityScalar(upper, value); + public static Vector128 RoundToNearestIntegerScalar(Vector128 value) => RoundToNearestIntegerScalar(value); /// - /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) - /// ROUNDSD xmm, xmm/m128, imm8(11) + /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) + /// ROUNDSD xmm1, xmm2/m128, imm8(8) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(8) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToZeroScalar(Vector128 upper, Vector128 value) => RoundToZeroScalar(upper, value); + public static Vector128 RoundToNearestIntegerScalar(Vector128 upper, Vector128 value) => RoundToNearestIntegerScalar(upper, value); /// - /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_CUR_DIRECTION) - /// ROUNDSS xmm, xmm/m128, imm8(4) + /// __m128 _mm_round_ps (__m128 a, _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) + /// ROUNDPS xmm1, xmm2/m128, imm8(9) + /// VROUNDPS xmm1, xmm2/m128, imm8(9) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundCurrentDirectionScalar(Vector128 value) => RoundCurrentDirectionScalar(value); + public static Vector128 RoundToNegativeInfinity(Vector128 value) => RoundToNegativeInfinity(value); /// - /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) - /// ROUNDSS xmm, xmm/m128, imm8(8) + /// __m128 _mm_round_pd (__m128 a, _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) + /// ROUNDPD xmm1, xmm2/m128, imm8(9) + /// VROUNDPD xmm1, xmm2/m128, imm8(9) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToNearestIntegerScalar(Vector128 value) => RoundToNearestIntegerScalar(value); + public static Vector128 RoundToNegativeInfinity(Vector128 value) => RoundToNegativeInfinity(value); + /// /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) - /// ROUNDSS xmm, xmm/m128, imm8(9) + /// ROUNDSS xmm1, xmm2/m128, imm8(9) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(9) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// public static Vector128 RoundToNegativeInfinityScalar(Vector128 value) => RoundToNegativeInfinityScalar(value); /// - /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) - /// ROUNDSS xmm, xmm/m128, imm8(10) + /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) + /// ROUNDSS xmm1, xmm2/m128, imm8(9) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(9) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToPositiveInfinityScalar(Vector128 value) => RoundToPositiveInfinityScalar(value); + public static Vector128 RoundToNegativeInfinityScalar(Vector128 upper, Vector128 value) => RoundToNegativeInfinityScalar(upper, value); /// - /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC) - /// ROUNDSS xmm, xmm/m128, imm8(11) + /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) + /// ROUNDSD xmm1, xmm2/m128, imm8(9) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(9) /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToZeroScalar(Vector128 value) => RoundToZeroScalar(value); + public static Vector128 RoundToNegativeInfinityScalar(Vector128 value) => RoundToNegativeInfinityScalar(value); + /// + /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) + /// ROUNDSD xmm1, xmm2/m128, imm8(9) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(9) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. + /// + public static Vector128 RoundToNegativeInfinityScalar(Vector128 upper, Vector128 value) => RoundToNegativeInfinityScalar(upper, value); /// - /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_CUR_DIRECTION) - /// ROUNDSS xmm, xmm/m128, imm8(4) + /// __m128 _mm_round_ps (__m128 a, _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) + /// ROUNDPS xmm1, xmm2/m128, imm8(10) + /// VROUNDPS xmm1, xmm2/m128, imm8(10) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundCurrentDirectionScalar(Vector128 upper, Vector128 value) => RoundCurrentDirectionScalar(upper, value); + public static Vector128 RoundToPositiveInfinity(Vector128 value) => RoundToPositiveInfinity(value); /// - /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) - /// ROUNDSS xmm, xmm/m128, imm8(8) + /// __m128 _mm_round_pd (__m128 a, _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) + /// ROUNDPD xmm1, xmm2/m128, imm8(10) + /// VROUNDPD xmm1, xmm2/m128, imm8(10) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToNearestIntegerScalar(Vector128 upper, Vector128 value) => RoundToNearestIntegerScalar(upper, value); + public static Vector128 RoundToPositiveInfinity(Vector128 value) => RoundToPositiveInfinity(value); + /// - /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) - /// ROUNDSS xmm, xmm/m128, imm8(9) + /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) + /// ROUNDSS xmm1, xmm2/m128, imm8(10) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(10) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static Vector128 RoundToNegativeInfinityScalar(Vector128 upper, Vector128 value) => RoundToNegativeInfinityScalar(upper, value); + public static Vector128 RoundToPositiveInfinityScalar(Vector128 value) => RoundToPositiveInfinityScalar(value); /// /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) - /// ROUNDSS xmm, xmm/m128, imm8(10) + /// ROUNDSS xmm1, xmm2/m128, imm8(10) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(10) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// public static Vector128 RoundToPositiveInfinityScalar(Vector128 upper, Vector128 value) => RoundToPositiveInfinityScalar(upper, value); /// - /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC) - /// ROUNDSS xmm, xmm/m128, imm8(11) - /// - public static Vector128 RoundToZeroScalar(Vector128 upper, Vector128 value) => RoundToZeroScalar(upper, value); - - /// - /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) - /// MOVNTDQA xmm, m128 + /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) + /// ROUNDSD xmm1, xmm2/m128, imm8(10) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(10) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static unsafe Vector128 LoadAlignedVector128NonTemporal(sbyte* address) => LoadAlignedVector128NonTemporal(address); + public static Vector128 RoundToPositiveInfinityScalar(Vector128 value) => RoundToPositiveInfinityScalar(value); /// - /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) - /// MOVNTDQA xmm, m128 + /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) + /// ROUNDSD xmm1, xmm2/m128, imm8(10) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(10) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static unsafe Vector128 LoadAlignedVector128NonTemporal(byte* address) => LoadAlignedVector128NonTemporal(address); + public static Vector128 RoundToPositiveInfinityScalar(Vector128 upper, Vector128 value) => RoundToPositiveInfinityScalar(upper, value); + /// - /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) - /// MOVNTDQA xmm, m128 + /// __m128 _mm_round_ps (__m128 a, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) + /// ROUNDPS xmm1, xmm2/m128, imm8(11) + /// VROUNDPS xmm1, xmm2/m128, imm8(11) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static unsafe Vector128 LoadAlignedVector128NonTemporal(short* address) => LoadAlignedVector128NonTemporal(address); + public static Vector128 RoundToZero(Vector128 value) => RoundToZero(value); /// - /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) - /// MOVNTDQA xmm, m128 + /// __m128 _mm_round_pd (__m128 a, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) + /// ROUNDPD xmm1, xmm2/m128, imm8(11) + /// VROUNDPD xmm1, xmm2/m128, imm8(11) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static unsafe Vector128 LoadAlignedVector128NonTemporal(ushort* address) => LoadAlignedVector128NonTemporal(address); + public static Vector128 RoundToZero(Vector128 value) => RoundToZero(value); + /// - /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) - /// MOVNTDQA xmm, m128 + /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC) + /// ROUNDSS xmm1, xmm2/m128, imm8(11) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(11) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static unsafe Vector128 LoadAlignedVector128NonTemporal(int* address) => LoadAlignedVector128NonTemporal(address); + public static Vector128 RoundToZeroScalar(Vector128 value) => RoundToZeroScalar(value); /// - /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) - /// MOVNTDQA xmm, m128 + /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC) + /// ROUNDSS xmm1, xmm2/m128, imm8(11) + /// VROUNDSS xmm1, xmm2, xmm3/m128, imm8(11) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static unsafe Vector128 LoadAlignedVector128NonTemporal(uint* address) => LoadAlignedVector128NonTemporal(address); + public static Vector128 RoundToZeroScalar(Vector128 upper, Vector128 value) => RoundToZeroScalar(upper, value); /// - /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) - /// MOVNTDQA xmm, m128 + /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC) + /// ROUNDSD xmm1, xmm2/m128, imm8(11) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(11) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static unsafe Vector128 LoadAlignedVector128NonTemporal(long* address) => LoadAlignedVector128NonTemporal(address); + public static Vector128 RoundToZeroScalar(Vector128 value) => RoundToZeroScalar(value); /// - /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr) - /// MOVNTDQA xmm, m128 + /// __m128d _mm_round_sd (__m128d a, __m128 b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC) + /// ROUNDSD xmm1, xmm2/m128, imm8(11) + /// VROUNDSD xmm1, xmm2, xmm3/m128, imm8(11) + /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic. /// - public static unsafe Vector128 LoadAlignedVector128NonTemporal(ulong* address) => LoadAlignedVector128NonTemporal(address); + public static Vector128 RoundToZeroScalar(Vector128 upper, Vector128 value) => RoundToZeroScalar(upper, value); /// /// int _mm_testc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; CF=1 + /// VPTEST xmm1, xmm2/m128 ; CF=1 /// public static bool TestC(Vector128 left, Vector128 right) => TestC(left, right); /// /// int _mm_testc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; CF=1 + /// VPTEST xmm1, xmm2/m128 ; CF=1 /// public static bool TestC(Vector128 left, Vector128 right) => TestC(left, right); /// /// int _mm_testc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; CF=1 + /// VPTEST xmm1, xmm2/m128 ; CF=1 /// public static bool TestC(Vector128 left, Vector128 right) => TestC(left, right); /// /// int _mm_testc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; CF=1 + /// VPTEST xmm1, xmm2/m128 ; CF=1 /// public static bool TestC(Vector128 left, Vector128 right) => TestC(left, right); /// /// int _mm_testc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; CF=1 + /// VPTEST xmm1, xmm2/m128 ; CF=1 /// public static bool TestC(Vector128 left, Vector128 right) => TestC(left, right); /// /// int _mm_testc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; CF=1 + /// VPTEST xmm1, xmm2/m128 ; CF=1 /// public static bool TestC(Vector128 left, Vector128 right) => TestC(left, right); /// /// int _mm_testc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; CF=1 + /// VPTEST xmm1, xmm2/m128 ; CF=1 /// public static bool TestC(Vector128 left, Vector128 right) => TestC(left, right); /// /// int _mm_testc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; CF=1 + /// VPTEST xmm1, xmm2/m128 ; CF=1 /// public static bool TestC(Vector128 left, Vector128 right) => TestC(left, right); /// /// int _mm_testnzc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 + /// VPTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector128 left, Vector128 right) => TestNotZAndNotC(left, right); /// /// int _mm_testnzc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 + /// VPTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector128 left, Vector128 right) => TestNotZAndNotC(left, right); /// /// int _mm_testnzc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 + /// VPTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector128 left, Vector128 right) => TestNotZAndNotC(left, right); /// /// int _mm_testnzc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 + /// VPTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector128 left, Vector128 right) => TestNotZAndNotC(left, right); /// /// int _mm_testnzc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 + /// VPTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector128 left, Vector128 right) => TestNotZAndNotC(left, right); /// /// int _mm_testnzc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 + /// VPTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector128 left, Vector128 right) => TestNotZAndNotC(left, right); /// /// int _mm_testnzc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 + /// VPTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector128 left, Vector128 right) => TestNotZAndNotC(left, right); /// /// int _mm_testnzc_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 + /// VPTEST xmm1, xmm2/m128 ; ZF=0 && CF=0 /// public static bool TestNotZAndNotC(Vector128 left, Vector128 right) => TestNotZAndNotC(left, right); /// /// int _mm_testz_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=1 + /// VPTEST xmm1, xmm2/m128 ; ZF=1 /// public static bool TestZ(Vector128 left, Vector128 right) => TestZ(left, right); /// /// int _mm_testz_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=1 + /// VPTEST xmm1, xmm2/m128 ; ZF=1 /// public static bool TestZ(Vector128 left, Vector128 right) => TestZ(left, right); /// /// int _mm_testz_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=1 + /// VPTEST xmm1, xmm2/m128 ; ZF=1 /// public static bool TestZ(Vector128 left, Vector128 right) => TestZ(left, right); /// /// int _mm_testz_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=1 + /// VPTEST xmm1, xmm2/m128 ; ZF=1 /// public static bool TestZ(Vector128 left, Vector128 right) => TestZ(left, right); /// /// int _mm_testz_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=1 + /// VPTEST xmm1, xmm2/m128 ; ZF=1 /// public static bool TestZ(Vector128 left, Vector128 right) => TestZ(left, right); /// /// int _mm_testz_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=1 + /// VPTEST xmm1, xmm2/m128 ; ZF=1 /// public static bool TestZ(Vector128 left, Vector128 right) => TestZ(left, right); /// /// int _mm_testz_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=1 + /// VPTEST xmm1, xmm2/m128 ; ZF=1 /// public static bool TestZ(Vector128 left, Vector128 right) => TestZ(left, right); /// /// int _mm_testz_si128 (__m128i a, __m128i b) - /// PTEST xmm, xmm/m128 + /// PTEST xmm1, xmm2/m128 ; ZF=1 + /// VPTEST xmm1, xmm2/m128 ; ZF=1 /// public static bool TestZ(Vector128 left, Vector128 right) => TestZ(left, right); } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse42.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse42.PlatformNotSupported.cs index e06b3545be9bf6..a92a80e1ae1c0d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse42.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse42.PlatformNotSupported.cs @@ -25,7 +25,7 @@ internal X64() { } /// /// unsigned __int64 _mm_crc32_u64 (unsigned __int64 crc, unsigned __int64 v) - /// CRC32 reg, reg/m64 + /// CRC32 r64, r/m64 /// This intrinsic is only available on 64-bit processes /// public static ulong Crc32(ulong crc, ulong data) { throw new PlatformNotSupportedException(); } @@ -33,23 +33,24 @@ internal X64() { } /// /// __m128i _mm_cmpgt_epi64 (__m128i a, __m128i b) - /// PCMPGTQ xmm, xmm/m128 + /// PCMPGTQ xmm1, xmm2/m128 + /// VPCMPGTQ xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareGreaterThan(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// unsigned int _mm_crc32_u8 (unsigned int crc, unsigned char v) - /// CRC32 reg, reg/m8 + /// CRC32 r32, r/m8 /// public static uint Crc32(uint crc, byte data) { throw new PlatformNotSupportedException(); } /// /// unsigned int _mm_crc32_u16 (unsigned int crc, unsigned short v) - /// CRC32 reg, reg/m16 + /// CRC32 r32, r/m16 /// public static uint Crc32(uint crc, ushort data) { throw new PlatformNotSupportedException(); } /// /// unsigned int _mm_crc32_u32 (unsigned int crc, unsigned int v) - /// CRC32 reg, reg/m32 + /// CRC32 r32, r/m32 /// public static uint Crc32(uint crc, uint data) { throw new PlatformNotSupportedException(); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse42.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse42.cs index 83ec7c0a536d59..7eb1c84f5913e0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse42.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse42.cs @@ -25,7 +25,7 @@ internal X64() { } /// /// unsigned __int64 _mm_crc32_u64 (unsigned __int64 crc, unsigned __int64 v) - /// CRC32 reg, reg/m64 + /// CRC32 r64, r/m64 /// This intrinsic is only available on 64-bit processes /// public static ulong Crc32(ulong crc, ulong data) => Crc32(crc, data); @@ -33,23 +33,24 @@ internal X64() { } /// /// __m128i _mm_cmpgt_epi64 (__m128i a, __m128i b) - /// PCMPGTQ xmm, xmm/m128 + /// PCMPGTQ xmm1, xmm2/m128 + /// VPCMPGTQ xmm1, xmm2, xmm3/m128 /// public static Vector128 CompareGreaterThan(Vector128 left, Vector128 right) => CompareGreaterThan(left, right); /// /// unsigned int _mm_crc32_u8 (unsigned int crc, unsigned char v) - /// CRC32 reg, reg/m8 + /// CRC32 r32, r/m8 /// public static uint Crc32(uint crc, byte data) => Crc32(crc, data); /// /// unsigned int _mm_crc32_u16 (unsigned int crc, unsigned short v) - /// CRC32 reg, reg/m16 + /// CRC32 r32, r/m16 /// public static uint Crc32(uint crc, ushort data) => Crc32(crc, data); /// /// unsigned int _mm_crc32_u32 (unsigned int crc, unsigned int v) - /// CRC32 reg, reg/m32 + /// CRC32 r32, r/m32 /// public static uint Crc32(uint crc, uint data) => Crc32(crc, data); } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Ssse3.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Ssse3.PlatformNotSupported.cs index 8ea7840fd6f0f7..d5a1abc545a16a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Ssse3.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Ssse3.PlatformNotSupported.cs @@ -27,145 +27,175 @@ internal X64() { } /// /// __m128i _mm_abs_epi8 (__m128i a) - /// PABSB xmm, xmm/m128 + /// PABSB xmm1, xmm2/m128 + /// VPABSB xmm1, xmm2/m128 + /// VPABSB xmm1 {k1}{z}, xmm2/m128 /// public static Vector128 Abs(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_abs_epi16 (__m128i a) - /// PABSW xmm, xmm/m128 + /// PABSW xmm1, xmm2/m128 + /// VPABSW xmm1, xmm2/m128 + /// VPABSW xmm1 {k1}{z}, xmm2/m128 /// public static Vector128 Abs(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_abs_epi32 (__m128i a) - /// PABSD xmm, xmm/m128 + /// PABSD xmm1, xmm2/m128 + /// VPABSD xmm1, xmm2/m128 + /// VPABSD xmm1 {k1}{z}, xmm2/m128/m32bcst /// public static Vector128 Abs(Vector128 value) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count) - /// PALIGNR xmm, xmm/m128, imm8 + /// PALIGNR xmm1, xmm2/m128, imm8 + /// VPALIGNR xmm1, xmm2, xmm3/m128, imm8 + /// VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8 /// public static Vector128 AlignRight(Vector128 left, Vector128 right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count) - /// PALIGNR xmm, xmm/m128, imm8 - /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors. + /// PALIGNR xmm1, xmm2/m128, imm8 + /// VPALIGNR xmm1, xmm2, xmm3/m128, imm8 + /// VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8 /// public static Vector128 AlignRight(Vector128 left, Vector128 right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count) - /// PALIGNR xmm, xmm/m128, imm8 + /// PALIGNR xmm1, xmm2/m128, imm8 + /// VPALIGNR xmm1, xmm2, xmm3/m128, imm8 + /// VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8 /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector128 AlignRight(Vector128 left, Vector128 right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count) - /// PALIGNR xmm, xmm/m128, imm8 + /// PALIGNR xmm1, xmm2/m128, imm8 + /// VPALIGNR xmm1, xmm2, xmm3/m128, imm8 + /// VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8 /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector128 AlignRight(Vector128 left, Vector128 right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count) - /// PALIGNR xmm, xmm/m128, imm8 + /// PALIGNR xmm1, xmm2/m128, imm8 + /// VPALIGNR xmm1, xmm2, xmm3/m128, imm8 + /// VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8 /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector128 AlignRight(Vector128 left, Vector128 right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count) - /// PALIGNR xmm, xmm/m128, imm8 + /// PALIGNR xmm1, xmm2/m128, imm8 + /// VPALIGNR xmm1, xmm2, xmm3/m128, imm8 + /// VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8 /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector128 AlignRight(Vector128 left, Vector128 right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count) - /// PALIGNR xmm, xmm/m128, imm8 + /// PALIGNR xmm1, xmm2/m128, imm8 + /// VPALIGNR xmm1, xmm2, xmm3/m128, imm8 + /// VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8 /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector128 AlignRight(Vector128 left, Vector128 right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count) - /// PALIGNR xmm, xmm/m128, imm8 + /// PALIGNR xmm1, xmm2/m128, imm8 + /// VPALIGNR xmm1, xmm2, xmm3/m128, imm8 + /// VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8 /// public static Vector128 AlignRight(Vector128 left, Vector128 right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_hadd_epi16 (__m128i a, __m128i b) - /// PHADDW xmm, xmm/m128 + /// PHADDW xmm1, xmm2/m128 + /// VPHADDW xmm1, xmm2, xmm3/m128 /// public static Vector128 HorizontalAdd(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_hadd_epi32 (__m128i a, __m128i b) - /// PHADDD xmm, xmm/m128 + /// PHADDD xmm1, xmm2/m128 + /// VPHADDD xmm1, xmm2, xmm3/m128 /// public static Vector128 HorizontalAdd(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_hadds_epi16 (__m128i a, __m128i b) - /// PHADDSW xmm, xmm/m128 + /// PHADDSW xmm1, xmm2/m128 + /// VPHADDSW xmm1, xmm2, xmm3/m128 /// public static Vector128 HorizontalAddSaturate(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_hsub_epi16 (__m128i a, __m128i b) - /// PHSUBW xmm, xmm/m128 + /// PHSUBW xmm1, xmm2/m128 + /// VPHSUBW xmm1, xmm2, xmm3/m128 /// public static Vector128 HorizontalSubtract(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_hsub_epi32 (__m128i a, __m128i b) - /// PHSUBD xmm, xmm/m128 + /// PHSUBD xmm1, xmm2/m128 + /// VPHSUBD xmm1, xmm2, xmm3/m128 /// public static Vector128 HorizontalSubtract(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_hsubs_epi16 (__m128i a, __m128i b) - /// PHSUBSW xmm, xmm/m128 + /// PHSUBSW xmm1, xmm2/m128 + /// VPHSUBSW xmm1, xmm2, xmm3/m128 /// public static Vector128 HorizontalSubtractSaturate(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_maddubs_epi16 (__m128i a, __m128i b) - /// PMADDUBSW xmm, xmm/m128 + /// PMADDUBSW xmm1, xmm2/m128 + /// VPMADDUBSW xmm1, xmm2, xmm3/m128 + /// VPMADDUBSW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 MultiplyAddAdjacent(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_mulhrs_epi16 (__m128i a, __m128i b) - /// PMULHRSW xmm, xmm/m128 + /// PMULHRSW xmm1, xmm2/m128 + /// VPMULHRSW xmm1, xmm2, xmm3/m128 + /// VPMULHRSW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 MultiplyHighRoundScale(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_shuffle_epi8 (__m128i a, __m128i b) - /// PSHUFB xmm, xmm/m128 + /// PSHUFB xmm1, xmm2/m128 + /// VPSHUFB xmm1, xmm2, xmm3/m128 + /// VPSHUFB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Shuffle(Vector128 value, Vector128 mask) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_shuffle_epi8 (__m128i a, __m128i b) - /// PSHUFB xmm, xmm/m128 + /// PSHUFB xmm1, xmm2/m128 + /// VPSHUFB xmm1, xmm2, xmm3/m128 + /// VPSHUFB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Shuffle(Vector128 value, Vector128 mask) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_sign_epi8 (__m128i a, __m128i b) - /// PSIGNB xmm, xmm/m128 + /// PSIGNB xmm1, xmm2/m128 + /// VPSIGNB xmm1, xmm2, xmm3/m128 /// public static Vector128 Sign(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_sign_epi16 (__m128i a, __m128i b) - /// PSIGNW xmm, xmm/m128 + /// PSIGNW xmm1, xmm2/m128 + /// VPSIGNW xmm1, xmm2, xmm3/m128 /// public static Vector128 Sign(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_sign_epi32 (__m128i a, __m128i b) - /// PSIGND xmm, xmm/m128 + /// PSIGND xmm1, xmm2/m128 + /// VPSIGND xmm1, xmm2, xmm3/m128 /// public static Vector128 Sign(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Ssse3.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Ssse3.cs index 5ea6532cc33890..30acb5e323328b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Ssse3.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Ssse3.cs @@ -27,146 +27,176 @@ internal X64() { } /// /// __m128i _mm_abs_epi8 (__m128i a) - /// PABSB xmm, xmm/m128 + /// PABSB xmm1, xmm2/m128 + /// VPABSB xmm1, xmm2/m128 + /// VPABSB xmm1 {k1}{z}, xmm2/m128 /// public static Vector128 Abs(Vector128 value) => Abs(value); /// /// __m128i _mm_abs_epi16 (__m128i a) - /// PABSW xmm, xmm/m128 + /// PABSW xmm1, xmm2/m128 + /// VPABSW xmm1, xmm2/m128 + /// VPABSW xmm1 {k1}{z}, xmm2/m128 /// public static Vector128 Abs(Vector128 value) => Abs(value); /// /// __m128i _mm_abs_epi32 (__m128i a) - /// PABSD xmm, xmm/m128 + /// PABSD xmm1, xmm2/m128 + /// VPABSD xmm1, xmm2/m128 + /// VPABSD xmm1 {k1}{z}, xmm2/m128/m32bcst /// public static Vector128 Abs(Vector128 value) => Abs(value); /// /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count) - /// PALIGNR xmm, xmm/m128, imm8 + /// PALIGNR xmm1, xmm2/m128, imm8 + /// VPALIGNR xmm1, xmm2, xmm3/m128, imm8 + /// VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8 /// public static Vector128 AlignRight(Vector128 left, Vector128 right, [ConstantExpected] byte mask) => AlignRight(left, right, mask); - /// /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count) - /// PALIGNR xmm, xmm/m128, imm8 - /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors. + /// PALIGNR xmm1, xmm2/m128, imm8 + /// VPALIGNR xmm1, xmm2, xmm3/m128, imm8 + /// VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8 /// public static Vector128 AlignRight(Vector128 left, Vector128 right, [ConstantExpected] byte mask) => AlignRight(left, right, mask); - /// /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count) - /// PALIGNR xmm, xmm/m128, imm8 + /// PALIGNR xmm1, xmm2/m128, imm8 + /// VPALIGNR xmm1, xmm2, xmm3/m128, imm8 + /// VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8 /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector128 AlignRight(Vector128 left, Vector128 right, [ConstantExpected] byte mask) => AlignRight(left, right, mask); - /// /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count) - /// PALIGNR xmm, xmm/m128, imm8 + /// PALIGNR xmm1, xmm2/m128, imm8 + /// VPALIGNR xmm1, xmm2, xmm3/m128, imm8 + /// VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8 /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector128 AlignRight(Vector128 left, Vector128 right, [ConstantExpected] byte mask) => AlignRight(left, right, mask); - /// /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count) - /// PALIGNR xmm, xmm/m128, imm8 + /// PALIGNR xmm1, xmm2/m128, imm8 + /// VPALIGNR xmm1, xmm2, xmm3/m128, imm8 + /// VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8 /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector128 AlignRight(Vector128 left, Vector128 right, [ConstantExpected] byte mask) => AlignRight(left, right, mask); - /// /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count) - /// PALIGNR xmm, xmm/m128, imm8 + /// PALIGNR xmm1, xmm2/m128, imm8 + /// VPALIGNR xmm1, xmm2, xmm3/m128, imm8 + /// VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8 /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector128 AlignRight(Vector128 left, Vector128 right, [ConstantExpected] byte mask) => AlignRight(left, right, mask); - /// /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count) - /// PALIGNR xmm, xmm/m128, imm8 + /// PALIGNR xmm1, xmm2/m128, imm8 + /// VPALIGNR xmm1, xmm2, xmm3/m128, imm8 + /// VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8 /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector128 AlignRight(Vector128 left, Vector128 right, [ConstantExpected] byte mask) => AlignRight(left, right, mask); - /// /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count) - /// PALIGNR xmm, xmm/m128, imm8 + /// PALIGNR xmm1, xmm2/m128, imm8 + /// VPALIGNR xmm1, xmm2, xmm3/m128, imm8 + /// VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8 /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors. /// public static Vector128 AlignRight(Vector128 left, Vector128 right, [ConstantExpected] byte mask) => AlignRight(left, right, mask); /// /// __m128i _mm_hadd_epi16 (__m128i a, __m128i b) - /// PHADDW xmm, xmm/m128 + /// PHADDW xmm1, xmm2/m128 + /// VPHADDW xmm1, xmm2, xmm3/m128 /// public static Vector128 HorizontalAdd(Vector128 left, Vector128 right) => HorizontalAdd(left, right); /// /// __m128i _mm_hadd_epi32 (__m128i a, __m128i b) - /// PHADDD xmm, xmm/m128 + /// PHADDD xmm1, xmm2/m128 + /// VPHADDD xmm1, xmm2, xmm3/m128 /// public static Vector128 HorizontalAdd(Vector128 left, Vector128 right) => HorizontalAdd(left, right); /// /// __m128i _mm_hadds_epi16 (__m128i a, __m128i b) - /// PHADDSW xmm, xmm/m128 + /// PHADDSW xmm1, xmm2/m128 + /// VPHADDSW xmm1, xmm2, xmm3/m128 /// public static Vector128 HorizontalAddSaturate(Vector128 left, Vector128 right) => HorizontalAddSaturate(left, right); /// /// __m128i _mm_hsub_epi16 (__m128i a, __m128i b) - /// PHSUBW xmm, xmm/m128 + /// PHSUBW xmm1, xmm2/m128 + /// VPHSUBW xmm1, xmm2, xmm3/m128 /// public static Vector128 HorizontalSubtract(Vector128 left, Vector128 right) => HorizontalSubtract(left, right); /// /// __m128i _mm_hsub_epi32 (__m128i a, __m128i b) - /// PHSUBD xmm, xmm/m128 + /// PHSUBD xmm1, xmm2/m128 + /// VPHSUBD xmm1, xmm2, xmm3/m128 /// public static Vector128 HorizontalSubtract(Vector128 left, Vector128 right) => HorizontalSubtract(left, right); /// /// __m128i _mm_hsubs_epi16 (__m128i a, __m128i b) - /// PHSUBSW xmm, xmm/m128 + /// PHSUBSW xmm1, xmm2/m128 + /// VPHSUBSW xmm1, xmm2, xmm3/m128 /// public static Vector128 HorizontalSubtractSaturate(Vector128 left, Vector128 right) => HorizontalSubtractSaturate(left, right); /// /// __m128i _mm_maddubs_epi16 (__m128i a, __m128i b) - /// PMADDUBSW xmm, xmm/m128 + /// PMADDUBSW xmm1, xmm2/m128 + /// VPMADDUBSW xmm1, xmm2, xmm3/m128 + /// VPMADDUBSW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 MultiplyAddAdjacent(Vector128 left, Vector128 right) => MultiplyAddAdjacent(left, right); /// /// __m128i _mm_mulhrs_epi16 (__m128i a, __m128i b) - /// PMULHRSW xmm, xmm/m128 + /// PMULHRSW xmm1, xmm2/m128 + /// VPMULHRSW xmm1, xmm2, xmm3/m128 + /// VPMULHRSW xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 MultiplyHighRoundScale(Vector128 left, Vector128 right) => MultiplyHighRoundScale(left, right); /// /// __m128i _mm_shuffle_epi8 (__m128i a, __m128i b) - /// PSHUFB xmm, xmm/m128 + /// PSHUFB xmm1, xmm2/m128 + /// VPSHUFB xmm1, xmm2, xmm3/m128 + /// VPSHUFB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Shuffle(Vector128 value, Vector128 mask) => Shuffle(value, mask); - /// /// __m128i _mm_shuffle_epi8 (__m128i a, __m128i b) - /// PSHUFB xmm, xmm/m128 + /// PSHUFB xmm1, xmm2/m128 + /// VPSHUFB xmm1, xmm2, xmm3/m128 + /// VPSHUFB xmm1 {k1}{z}, xmm2, xmm3/m128 /// public static Vector128 Shuffle(Vector128 value, Vector128 mask) => Shuffle(value, mask); /// /// __m128i _mm_sign_epi8 (__m128i a, __m128i b) - /// PSIGNB xmm, xmm/m128 + /// PSIGNB xmm1, xmm2/m128 + /// VPSIGNB xmm1, xmm2, xmm3/m128 /// public static Vector128 Sign(Vector128 left, Vector128 right) => Sign(left, right); /// /// __m128i _mm_sign_epi16 (__m128i a, __m128i b) - /// PSIGNW xmm, xmm/m128 + /// PSIGNW xmm1, xmm2/m128 + /// VPSIGNW xmm1, xmm2, xmm3/m128 /// public static Vector128 Sign(Vector128 left, Vector128 right) => Sign(left, right); /// /// __m128i _mm_sign_epi32 (__m128i a, __m128i b) - /// PSIGND xmm, xmm/m128 + /// PSIGND xmm1, xmm2/m128 + /// VPSIGND xmm1, xmm2, xmm3/m128 /// public static Vector128 Sign(Vector128 left, Vector128 right) => Sign(left, right); } From 2a6e66c019d32d782684ea18af3272dbcf3d101b Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 15 Apr 2023 19:20:32 -0700 Subject: [PATCH 2/5] Fixing a formatting issue --- .../Runtime/Intrinsics/X86/Sse.PlatformNotSupported.cs | 7 ------- .../Runtime/Intrinsics/X86/Sse2.PlatformNotSupported.cs | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse.PlatformNotSupported.cs index 7541f8e61a7699..c512bc4246f1a0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse.PlatformNotSupported.cs @@ -80,13 +80,6 @@ internal X64() { } /// public static Vector128 AndNot(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// - /// __m128 _mm_cmpeq_ps (__m128 a, __m128 b) - /// CMPPS xmm1, xmm2/m128, imm8(0) - /// VCMPPS xmm1, xmm2, xmm3/m128, imm8(0) - /// - public static Vector128 CompareEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - /// /// __m128 _mm_cmpeq_ps (__m128 a, __m128 b) /// CMPPS xmm1, xmm2/m128, imm8(0) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse2.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse2.PlatformNotSupported.cs index b19e56740ca2a0..a52b19af01dc96 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse2.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse2.PlatformNotSupported.cs @@ -577,7 +577,7 @@ internal X64() { } /// VCOMISD xmm1, xmm2/m64{sae} ; ZF=0 || PF=1 /// public static bool CompareScalarOrderedNotEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } - + /// /// __m128d _mm_cmpunord_sd (__m128d a, __m128d b) /// CMPDS xmm1, xmm2/m64, imm8(3) From cea04312e905224446bb9e976f446cd8dcf603ae Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 16 Apr 2023 05:56:43 -0700 Subject: [PATCH 3/5] Ensure all changes are actually committed --- .../src/System/Runtime/Intrinsics/X86/Fma.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Fma.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Fma.cs index 5f94aa053be3e5..f1549932e8a36a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Fma.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Fma.cs @@ -221,8 +221,8 @@ internal X64() { } public static Vector128 MultiplySubtractNegatedScalar(Vector128 a, Vector128 b, Vector128 c) => MultiplySubtractNegatedScalar(a, b, c); /// /// __m128d _mm_fnmsub_sd (__m128d a, __m128d b, __m128d c) - /// VFNMSUBSD xmm1, xmm2, xmm3/m32 - /// VFNMSUBSD xmm1 {k1}{z}, xmm2, xmm3/m32{er} + /// VFNMSUBSD xmm1, xmm2, xmm3/m64 + /// VFNMSUBSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} /// public static Vector128 MultiplySubtractNegatedScalar(Vector128 a, Vector128 b, Vector128 c) => MultiplySubtractNegatedScalar(a, b, c); } From a733a0b92e34628a1a1fe274f10dd61f37665f7c Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 16 Apr 2023 14:10:22 -0700 Subject: [PATCH 4/5] Apply suggestions from code review Co-authored-by: Clinton Ingram --- .../System/Runtime/Intrinsics/X86/Avx.PlatformNotSupported.cs | 4 ++-- .../src/System/Runtime/Intrinsics/X86/Avx.cs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.PlatformNotSupported.cs index 0f2b983cfd2745..663071e37e6698 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.PlatformNotSupported.cs @@ -219,7 +219,7 @@ internal X64() { } public static Vector256 CompareOrdered(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256 _mm256_cmpunord_ps (__m256 a, __m256 b) - /// CMPPS ymm1, ymm2/m256, imm8(3) + /// VCMPPS ymm1, ymm2/m256, imm8(3) /// The above native signature does not exist. We provide this additional overload for completeness. /// public static Vector256 CompareUnordered(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } @@ -302,7 +302,7 @@ internal X64() { } public static Vector256 CompareOrdered(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256d _mm256_cmpunord_pd (__m256d a, __m256d b) - /// CMPPD ymm1, ymm2/m256, imm8(3) + /// VCMPPD ymm1, ymm2/m256, imm8(3) /// The above native signature does not exist. We provide this additional overload for completeness. /// public static Vector256 CompareUnordered(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.cs index 7a65fb5f7b1e1e..c096d893ae019f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.cs @@ -220,7 +220,7 @@ internal X64() { } public static Vector256 CompareOrdered(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedNonSignaling); /// /// __m256 _mm256_cmpunord_ps (__m256 a, __m256 b) - /// CMPPS ymm1, ymm2/m256, imm8(3) + /// VCMPPS ymm1, ymm2/m256, imm8(3) /// The above native signature does not exist. We provide this additional overload for completeness. /// public static Vector256 CompareUnordered(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNonSignaling); @@ -303,7 +303,7 @@ internal X64() { } public static Vector256 CompareOrdered(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.OrderedNonSignaling); /// /// __m256d _mm256_cmpunord_pd (__m256d a, __m256d b) - /// CMPPD ymm1, ymm2/m256, imm8(3) + /// VCMPPD ymm1, ymm2/m256, imm8(3) /// The above native signature does not exist. We provide this additional overload for completeness. /// public static Vector256 CompareUnordered(Vector256 left, Vector256 right) => Compare(left, right, FloatComparisonMode.UnorderedNonSignaling); From abeb1177cae24c5e0264e765a4e67ba4bcc54114 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 18 Apr 2023 15:42:44 -0700 Subject: [PATCH 5/5] Add back a using that got removed in the merge --- .../src/System/Runtime/Intrinsics/X86/Avx512BW.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.cs index a7a0c084fe5d24..01811c5a108dbf 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512BW.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; namespace System.Runtime.Intrinsics.X86