From f27618fc5ddeb694feebd48b689f2624c98b6dd2 Mon Sep 17 00:00:00 2001 From: "Canino, Anthony" Date: Tue, 26 Mar 2024 09:41:37 -0700 Subject: [PATCH 01/13] Adding an APX doc. --- apx-doc/README.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 apx-doc/README.md diff --git a/apx-doc/README.md b/apx-doc/README.md new file mode 100644 index 00000000000000..8997da56fbdb8a --- /dev/null +++ b/apx-doc/README.md @@ -0,0 +1,3 @@ +# APX Integration in .NET + +Let's keep documentation on APX integration and notes on things here. I will evolve this as necessary. \ No newline at end of file From c7db95cee6e8ce5830944901db00bc23ada362b5 Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Mon, 6 May 2024 17:58:57 -0700 Subject: [PATCH 02/13] Update XSAVE logics in vm and GC to accomadate APX updates Update the asm code in context2.S hard coded the memory offset with an assumption that we are using standrad form Seems the logic in context2.S is for custom stack, we may not need to follow the standard XSAVE buffer layout. Directly adding the APX section after AVX512. Updates in threadsuspend.cpp script-gen changes Extending CPUID flag from int to long to hold more ISAs. Update the cpuid check logic, make sure CR4.OSXSAVE and XCR0.APX_F are checked. resolve comments improve the logics in isa_detection in gc. add missing method definitions under unix context. code clean up --- src/coreclr/gc/vxsort/do_vxsort.h | 1 + src/coreclr/gc/vxsort/isa_detection.cpp | 44 +++-- src/coreclr/inc/corinfoinstructionset.h | 158 ++++++++-------- src/coreclr/inc/jiteeversionguid.h | 10 +- src/coreclr/inc/readytoruninstructionset.h | 1 + src/coreclr/nativeaot/Runtime/startup.cpp | 4 +- .../Runtime/windows/PalRedhawkMinWin.cpp | 8 +- src/coreclr/pal/inc/pal.h | 23 +++ src/coreclr/pal/src/arch/amd64/asmconstants.h | 5 +- src/coreclr/pal/src/arch/amd64/context2.S | 20 +++ src/coreclr/pal/src/include/pal/context.h | 28 ++- src/coreclr/pal/src/thread/context.cpp | 71 ++++++++ .../Compiler/HardwareIntrinsicHelpers.cs | 12 +- .../tools/Common/InstructionSetHelpers.cs | 2 +- .../Runtime/ReadyToRunInstructionSet.cs | 1 + .../Runtime/ReadyToRunInstructionSetHelper.cs | 4 + .../JitInterface/CorInfoInstructionSet.cs | 169 ++++++++++-------- .../ThunkGenerator/InstructionSetDesc.txt | 2 + .../ExpectedIsaFeaturesRootProvider.cs | 4 +- .../Compiler/HardwareIntrinsicHelpers.Aot.cs | 9 +- .../tools/aot/jitinterface/jitwrapper.cpp | 2 +- src/coreclr/vm/codeman.cpp | 7 +- src/coreclr/vm/threadsuspend.cpp | 9 +- src/native/minipal/cpufeatures.c | 55 +++++- 24 files changed, 460 insertions(+), 189 deletions(-) diff --git a/src/coreclr/gc/vxsort/do_vxsort.h b/src/coreclr/gc/vxsort/do_vxsort.h index edd803f310f492..9cb89136472286 100644 --- a/src/coreclr/gc/vxsort/do_vxsort.h +++ b/src/coreclr/gc/vxsort/do_vxsort.h @@ -6,6 +6,7 @@ enum class InstructionSet { AVX2 = 0, AVX512F = 1, + APX = 2, }; void InitSupportedInstructionSet (int32_t configSetting); diff --git a/src/coreclr/gc/vxsort/isa_detection.cpp b/src/coreclr/gc/vxsort/isa_detection.cpp index 93c7288663c42f..9fde70d4813c63 100644 --- a/src/coreclr/gc/vxsort/isa_detection.cpp +++ b/src/coreclr/gc/vxsort/isa_detection.cpp @@ -14,7 +14,8 @@ enum class SupportedISA { None = 0, AVX2 = 1 << (int)InstructionSet::AVX2, - AVX512F = 1 << (int)InstructionSet::AVX512F + AVX512F = 1 << (int)InstructionSet::AVX512F, + APX = 1 << (int)InstructionSet::APX }; #if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) @@ -39,6 +40,7 @@ SupportedISA DetermineSupportedISA() AVX2 = 1<< 5, AVX512F = 1<<16, AVX512DQ = 1<<17, + APX = 1<<21, }; int reg[COUNT]; @@ -58,6 +60,16 @@ SupportedISA DetermineSupportedISA() // get OS XState info DWORD64 FeatureMask = GetEnabledXStateFeatures(); + int IsaFlags = (int)SupportedISA::None; + + __cpuidex(reg, 7, 1); + if((reg[EDX] & APX) && + (xcr0 & 0x80000) && + (FeatureMask & (XSTATE_MASK_APX)) == (XSTATE_MASK_APX)) + { + IsaFlags |= (int)SupportedISA::APX; + } + // get processor extended feature flag info __cpuidex(reg, 7, 0); @@ -66,7 +78,7 @@ SupportedISA DetermineSupportedISA() (xcr0 & 0xe6) == 0xe6 && (FeatureMask & (XSTATE_MASK_AVX | XSTATE_MASK_AVX512)) == (XSTATE_MASK_AVX | XSTATE_MASK_AVX512)) { - return (SupportedISA)((int)SupportedISA::AVX2 | (int)SupportedISA::AVX512F); + IsaFlags |= (int)SupportedISA::AVX512F; } // check if AVX2 is supported by both processor and OS @@ -74,10 +86,10 @@ SupportedISA DetermineSupportedISA() (xcr0 & 0x06) == 0x06 && (FeatureMask & XSTATE_MASK_AVX) == XSTATE_MASK_AVX) { - return SupportedISA::AVX2; + IsaFlags |= (int)SupportedISA::AVX2; } - return SupportedISA::None; + return (SupportedISA)IsaFlags; } #elif defined(TARGET_UNIX) @@ -85,17 +97,25 @@ SupportedISA DetermineSupportedISA() SupportedISA DetermineSupportedISA() { __builtin_cpu_init(); - if (__builtin_cpu_supports("avx2")) + + int IsaFlags = 0; + + if (__builtin_cpu_supports("apx")) { - if (__builtin_cpu_supports("avx512f")) - return (SupportedISA)((int)SupportedISA::AVX2 | (int)SupportedISA::AVX512F); - else - return SupportedISA::AVX2; + IsaFlags |= (int)SupportedISA::APX; } - else + + if (__builtin_cpu_supports("avx512")) { - return SupportedISA::None; + IsaFlags |= (int)SupportedISA::APX; } + + if (__builtin_cpu_supports("avx2")) + { + IsaFlags |= (int)SupportedISA::APX; + } + + return (SupportedISA)IsaFlags; } #endif // defined(TARGET_UNIX) @@ -106,7 +126,7 @@ static SupportedISA s_supportedISA; bool IsSupportedInstructionSet (InstructionSet instructionSet) { assert(s_initialized); - assert(instructionSet == InstructionSet::AVX2 || instructionSet == InstructionSet::AVX512F); + assert(instructionSet == InstructionSet::AVX2 || instructionSet == InstructionSet::AVX512F || instructionSet == InstructionSet::APX); return ((int)s_supportedISA & (1 << (int)instructionSet)) != 0; } diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index e3b57b6a1e043b..1e17865d9e2f3d 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -78,41 +78,43 @@ enum CORINFO_InstructionSet InstructionSet_AVX10v1=33, InstructionSet_AVX10v1_V256=34, InstructionSet_AVX10v1_V512=35, - InstructionSet_VectorT128=36, - InstructionSet_VectorT256=37, - InstructionSet_VectorT512=38, - InstructionSet_X86Base_X64=39, - InstructionSet_SSE_X64=40, - InstructionSet_SSE2_X64=41, - InstructionSet_SSE3_X64=42, - InstructionSet_SSSE3_X64=43, - InstructionSet_SSE41_X64=44, - InstructionSet_SSE42_X64=45, - InstructionSet_AVX_X64=46, - InstructionSet_AVX2_X64=47, - InstructionSet_AES_X64=48, - InstructionSet_BMI1_X64=49, - InstructionSet_BMI2_X64=50, - InstructionSet_FMA_X64=51, - InstructionSet_LZCNT_X64=52, - InstructionSet_PCLMULQDQ_X64=53, - InstructionSet_POPCNT_X64=54, - InstructionSet_AVXVNNI_X64=55, - InstructionSet_MOVBE_X64=56, - InstructionSet_X86Serialize_X64=57, - InstructionSet_AVX512F_X64=58, - InstructionSet_AVX512F_VL_X64=59, - InstructionSet_AVX512BW_X64=60, - InstructionSet_AVX512BW_VL_X64=61, - InstructionSet_AVX512CD_X64=62, - InstructionSet_AVX512CD_VL_X64=63, - InstructionSet_AVX512DQ_X64=64, - InstructionSet_AVX512DQ_VL_X64=65, - InstructionSet_AVX512VBMI_X64=66, - InstructionSet_AVX512VBMI_VL_X64=67, - InstructionSet_AVX10v1_X64=68, - InstructionSet_AVX10v1_V256_X64=69, - InstructionSet_AVX10v1_V512_X64=70, + InstructionSet_APX=36, + InstructionSet_VectorT128=37, + InstructionSet_VectorT256=38, + InstructionSet_VectorT512=39, + InstructionSet_X86Base_X64=40, + InstructionSet_SSE_X64=41, + InstructionSet_SSE2_X64=42, + InstructionSet_SSE3_X64=43, + InstructionSet_SSSE3_X64=44, + InstructionSet_SSE41_X64=45, + InstructionSet_SSE42_X64=46, + InstructionSet_AVX_X64=47, + InstructionSet_AVX2_X64=48, + InstructionSet_AES_X64=49, + InstructionSet_BMI1_X64=50, + InstructionSet_BMI2_X64=51, + InstructionSet_FMA_X64=52, + InstructionSet_LZCNT_X64=53, + InstructionSet_PCLMULQDQ_X64=54, + InstructionSet_POPCNT_X64=55, + InstructionSet_AVXVNNI_X64=56, + InstructionSet_MOVBE_X64=57, + InstructionSet_X86Serialize_X64=58, + InstructionSet_AVX512F_X64=59, + InstructionSet_AVX512F_VL_X64=60, + InstructionSet_AVX512BW_X64=61, + InstructionSet_AVX512BW_VL_X64=62, + InstructionSet_AVX512CD_X64=63, + InstructionSet_AVX512CD_VL_X64=64, + InstructionSet_AVX512DQ_X64=65, + InstructionSet_AVX512DQ_VL_X64=66, + InstructionSet_AVX512VBMI_X64=67, + InstructionSet_AVX512VBMI_VL_X64=68, + InstructionSet_AVX10v1_X64=69, + InstructionSet_AVX10v1_V256_X64=70, + InstructionSet_AVX10v1_V512_X64=71, + InstructionSet_APX_X64=72, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -150,41 +152,43 @@ enum CORINFO_InstructionSet InstructionSet_AVX10v1=33, InstructionSet_AVX10v1_V256=34, InstructionSet_AVX10v1_V512=35, - InstructionSet_VectorT128=36, - InstructionSet_VectorT256=37, - InstructionSet_VectorT512=38, - InstructionSet_X86Base_X64=39, - InstructionSet_SSE_X64=40, - InstructionSet_SSE2_X64=41, - InstructionSet_SSE3_X64=42, - InstructionSet_SSSE3_X64=43, - InstructionSet_SSE41_X64=44, - InstructionSet_SSE42_X64=45, - InstructionSet_AVX_X64=46, - InstructionSet_AVX2_X64=47, - InstructionSet_AES_X64=48, - InstructionSet_BMI1_X64=49, - InstructionSet_BMI2_X64=50, - InstructionSet_FMA_X64=51, - InstructionSet_LZCNT_X64=52, - InstructionSet_PCLMULQDQ_X64=53, - InstructionSet_POPCNT_X64=54, - InstructionSet_AVXVNNI_X64=55, - InstructionSet_MOVBE_X64=56, - InstructionSet_X86Serialize_X64=57, - InstructionSet_AVX512F_X64=58, - InstructionSet_AVX512F_VL_X64=59, - InstructionSet_AVX512BW_X64=60, - InstructionSet_AVX512BW_VL_X64=61, - InstructionSet_AVX512CD_X64=62, - InstructionSet_AVX512CD_VL_X64=63, - InstructionSet_AVX512DQ_X64=64, - InstructionSet_AVX512DQ_VL_X64=65, - InstructionSet_AVX512VBMI_X64=66, - InstructionSet_AVX512VBMI_VL_X64=67, - InstructionSet_AVX10v1_X64=68, - InstructionSet_AVX10v1_V256_X64=69, - InstructionSet_AVX10v1_V512_X64=70, + InstructionSet_APX=36, + InstructionSet_VectorT128=37, + InstructionSet_VectorT256=38, + InstructionSet_VectorT512=39, + InstructionSet_X86Base_X64=40, + InstructionSet_SSE_X64=41, + InstructionSet_SSE2_X64=42, + InstructionSet_SSE3_X64=43, + InstructionSet_SSSE3_X64=44, + InstructionSet_SSE41_X64=45, + InstructionSet_SSE42_X64=46, + InstructionSet_AVX_X64=47, + InstructionSet_AVX2_X64=48, + InstructionSet_AES_X64=49, + InstructionSet_BMI1_X64=50, + InstructionSet_BMI2_X64=51, + InstructionSet_FMA_X64=52, + InstructionSet_LZCNT_X64=53, + InstructionSet_PCLMULQDQ_X64=54, + InstructionSet_POPCNT_X64=55, + InstructionSet_AVXVNNI_X64=56, + InstructionSet_MOVBE_X64=57, + InstructionSet_X86Serialize_X64=58, + InstructionSet_AVX512F_X64=59, + InstructionSet_AVX512F_VL_X64=60, + InstructionSet_AVX512BW_X64=61, + InstructionSet_AVX512BW_VL_X64=62, + InstructionSet_AVX512CD_X64=63, + InstructionSet_AVX512CD_VL_X64=64, + InstructionSet_AVX512DQ_X64=65, + InstructionSet_AVX512DQ_VL_X64=66, + InstructionSet_AVX512VBMI_X64=67, + InstructionSet_AVX512VBMI_VL_X64=68, + InstructionSet_AVX10v1_X64=69, + InstructionSet_AVX10v1_V256_X64=70, + InstructionSet_AVX10v1_V512_X64=71, + InstructionSet_APX_X64=72, #endif // TARGET_X86 }; @@ -364,6 +368,8 @@ struct CORINFO_InstructionSetFlags AddInstructionSet(InstructionSet_AVX10v1_V256_X64); if (HasInstructionSet(InstructionSet_AVX10v1_V512)) AddInstructionSet(InstructionSet_AVX10v1_V512_X64); + if (HasInstructionSet(InstructionSet_APX)) + AddInstructionSet(InstructionSet_APX_X64); #endif // TARGET_AMD64 #ifdef TARGET_X86 #endif // TARGET_X86 @@ -572,6 +578,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512_X64); + if (resultflags.HasInstructionSet(InstructionSet_APX) && !resultflags.HasInstructionSet(InstructionSet_APX_X64)) + resultflags.RemoveInstructionSet(InstructionSet_APX); + if (resultflags.HasInstructionSet(InstructionSet_APX_X64) && !resultflags.HasInstructionSet(InstructionSet_APX)) + resultflags.RemoveInstructionSet(InstructionSet_APX_X64); if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_SSE); if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE)) @@ -1000,6 +1010,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "AVX10v1_V512"; case InstructionSet_AVX10v1_V512_X64 : return "AVX10v1_V512_X64"; + case InstructionSet_APX : + return "APX"; + case InstructionSet_APX_X64 : + return "APX_X64"; case InstructionSet_VectorT128 : return "VectorT128"; case InstructionSet_VectorT256 : @@ -1078,6 +1092,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "AVX10v1_V256"; case InstructionSet_AVX10v1_V512 : return "AVX10v1_V512"; + case InstructionSet_APX : + return "APX"; case InstructionSet_VectorT128 : return "VectorT128"; case InstructionSet_VectorT256 : @@ -1151,6 +1167,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Avx10v1: return InstructionSet_AVX10v1; case READYTORUN_INSTRUCTION_Avx10v1_V256: return InstructionSet_AVX10v1_V256; case READYTORUN_INSTRUCTION_Avx10v1_V512: return InstructionSet_AVX10v1_V512; + case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX; case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; @@ -1188,6 +1205,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Avx10v1: return InstructionSet_AVX10v1; case READYTORUN_INSTRUCTION_Avx10v1_V256: return InstructionSet_AVX10v1_V256; case READYTORUN_INSTRUCTION_Avx10v1_V512: return InstructionSet_AVX10v1_V512; + case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX; case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 46b50f5410ce46..2a4362049af8d4 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* 227e46fa-1be3-4770-b613-4a239e7c28aa */ - 0x227e46fa, - 0x1be3, - 0x4770, - {0xb6, 0x13, 0x4a, 0x23, 0x9e, 0x7c, 0x28, 0xaa} +constexpr GUID JITEEVersionIdentifier = { /* deed5db4-371c-4b2d-904d-9cd39cb48764 */ + 0xdeed5db4, + 0x371c, + 0x4b2d, + {0x90, 0x4d, 0x9c, 0xd3, 0x9c, 0xb4, 0x87, 0x64} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index fe388c04a60fc5..1db4e4e6e15791 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -55,6 +55,7 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Avx10v1=44, READYTORUN_INSTRUCTION_Avx10v1_V256=45, READYTORUN_INSTRUCTION_Avx10v1_V512=46, + READYTORUN_INSTRUCTION_Apx=47, }; diff --git a/src/coreclr/nativeaot/Runtime/startup.cpp b/src/coreclr/nativeaot/Runtime/startup.cpp index f87bc947d970ac..fd3cfa57756b0b 100644 --- a/src/coreclr/nativeaot/Runtime/startup.cpp +++ b/src/coreclr/nativeaot/Runtime/startup.cpp @@ -50,8 +50,8 @@ extern RhConfig * g_pRhConfig; #if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64) // This field is inspected from the generated code to determine what intrinsics are available. -EXTERN_C int g_cpuFeatures; -int g_cpuFeatures = 0; +EXTERN_C long long g_cpuFeatures; +long long g_cpuFeatures = 0; // This field is defined in the generated code and sets the ISA expectations. EXTERN_C int g_requiredCpuFeatures; diff --git a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp index 94424b17562be9..9040a44e01f3a6 100644 --- a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp @@ -482,14 +482,14 @@ REDHAWK_PALEXPORT CONTEXT* PalAllocateCompleteOSContext(_Out_ uint8_t** contextB // Determine if the processor supports AVX or AVX512 so we could // retrieve extended registers DWORD64 FeatureMask = GetEnabledXStateFeatures(); - if ((FeatureMask & (XSTATE_MASK_AVX | XSTATE_MASK_AVX512)) != 0) + if ((FeatureMask & (XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX)) != 0) { context = context | CONTEXT_XSTATE; } // Retrieve contextSize by passing NULL for Buffer DWORD contextSize = 0; - ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | XSTATE_MASK_AVX | XSTATE_MASK_MPX | XSTATE_MASK_AVX512; + ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | XSTATE_MASK_AVX | XSTATE_MASK_MPX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX; // The initialize call should fail but return contextSize BOOL success = pfnInitializeContext2 ? pfnInitializeContext2(NULL, context, NULL, &contextSize, xStateCompactionMask) : @@ -540,9 +540,9 @@ REDHAWK_PALEXPORT _Success_(return) bool REDHAWK_PALAPI PalGetCompleteThreadCont #if defined(TARGET_X86) || defined(TARGET_AMD64) // Make sure that AVX feature mask is set, if supported. This should not normally fail. // The system silently ignores any feature specified in the FeatureMask which is not enabled on the processor. - if (!SetXStateFeaturesMask(pCtx, XSTATE_MASK_AVX | XSTATE_MASK_AVX512)) + if (!SetXStateFeaturesMask(pCtx, XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX)) { - _ASSERTE(!"Could not apply XSTATE_MASK_AVX | XSTATE_MASK_AVX512"); + _ASSERTE(!"Could not apply XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX"); return FALSE; } #endif //defined(TARGET_X86) || defined(TARGET_AMD64) diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index c64cf8ee020adc..cf660dd10211ac 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -1398,12 +1398,14 @@ typedef struct _KNONVOLATILE_CONTEXT_POINTERS { #define XSTATE_AVX512_KMASK (5) #define XSTATE_AVX512_ZMM_H (6) #define XSTATE_AVX512_ZMM (7) +#define XSTATE_APX (19) #define XSTATE_MASK_GSSE (UI64(1) << (XSTATE_GSSE)) #define XSTATE_MASK_AVX (XSTATE_MASK_GSSE) #define XSTATE_MASK_AVX512 ((UI64(1) << (XSTATE_AVX512_KMASK)) | \ (UI64(1) << (XSTATE_AVX512_ZMM_H)) | \ (UI64(1) << (XSTATE_AVX512_ZMM))) +#define XSTATE_MASK_APX (UI64(1) << (XSTATE_APX)) typedef struct DECLSPEC_ALIGN(16) _M128A { ULONGLONG Low; @@ -1640,6 +1642,27 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { M512 Zmm30; M512 Zmm31; }; + + struct + { + DWORD Egpr16; + DWORD Egpr17; + DWORD Egpr18; + DWORD Egpr19; + DWORD Egpr20; + DWORD Egpr21; + DWORD Egpr22; + DWORD Egpr23; + DWORD Egpr24; + DWORD Egpr25; + DWORD Egpr26; + DWORD Egpr27; + DWORD Egpr28; + DWORD Egpr29; + DWORD Egpr30; + DWORD Egpr31; + }; + } CONTEXT, *PCONTEXT, *LPCONTEXT; // diff --git a/src/coreclr/pal/src/arch/amd64/asmconstants.h b/src/coreclr/pal/src/arch/amd64/asmconstants.h index d5a72cf6eda23a..1e0951922c5e3c 100644 --- a/src/coreclr/pal/src/arch/amd64/asmconstants.h +++ b/src/coreclr/pal/src/arch/amd64/asmconstants.h @@ -8,12 +8,14 @@ #define XSTATE_AVX512_KMASK (5) #define XSTATE_AVX512_ZMM_H (6) #define XSTATE_AVX512_ZMM (7) +#define XSTATE_APX (19) #define XSTATE_MASK_GSSE (1 << (XSTATE_GSSE)) #define XSTATE_MASK_AVX (XSTATE_MASK_GSSE) #define XSTATE_MASK_AVX512 ((1 << (XSTATE_AVX512_KMASK)) | \ (1 << (XSTATE_AVX512_ZMM_H)) | \ (1 << (XSTATE_AVX512_ZMM))) +#define XSTATE_MASK_APX (1 << (XSTATE_APX)) // The arch bit is normally set in the flag constants below. Since this is already arch-specific code and the arch bit is not // relevant, the arch bit is excluded from the flag constants below for simpler tests. @@ -91,7 +93,8 @@ #define CONTEXT_KMask0 CONTEXT_Ymm0H+(16*16) #define CONTEXT_Zmm0H CONTEXT_KMask0+(8*8) #define CONTEXT_Zmm16 CONTEXT_Zmm0H+(32*16) -#define CONTEXT_Size CONTEXT_Zmm16+(64*16) +#define CONTEXT_Egpr CONTEXT_Zmm16+(16*8) +#define CONTEXT_Size CONTEXT_Egpr+(8*16) #else // HOST_64BIT diff --git a/src/coreclr/pal/src/arch/amd64/context2.S b/src/coreclr/pal/src/arch/amd64/context2.S index dba772f9dbbf5e..ac6f548f47f65b 100644 --- a/src/coreclr/pal/src/arch/amd64/context2.S +++ b/src/coreclr/pal/src/arch/amd64/context2.S @@ -183,6 +183,26 @@ LOCAL_LABEL(Done_Restore_CONTEXT_FLOATING_POINT): kmovq k6, qword ptr [rdi + (CONTEXT_KMask0 + 6 * 8)] kmovq k7, qword ptr [rdi + (CONTEXT_KMask0 + 7 * 8)] + test BYTE PTR [rdi + CONTEXT_XStateFeaturesMask], XSTATE_MASK_APX + je LOCAL_LABEL(Done_Restore_CONTEXT_XSTATE) + + // Restore the EGPR state, EGPR use previous MPX field, need to add an offset. + mov r16, qword ptr [rdi + CONTEXT_Egpr + 0 * 8] + mov r17, qword ptr [rdi + CONTEXT_Egpr + 1 * 8] + mov r18, qword ptr [rdi + CONTEXT_Egpr + 2 * 8] + mov r19, qword ptr [rdi + CONTEXT_Egpr + 3 * 8] + mov r20, qword ptr [rdi + CONTEXT_Egpr + 4 * 8] + mov r21, qword ptr [rdi + CONTEXT_Egpr + 5 * 8] + mov r22, qword ptr [rdi + CONTEXT_Egpr + 6 * 8] + mov r23, qword ptr [rdi + CONTEXT_Egpr + 7 * 8] + mov r24, qword ptr [rdi + CONTEXT_Egpr + 8 * 8] + mov r25, qword ptr [rdi + CONTEXT_Egpr + 9 * 8] + mov r26, qword ptr [rdi + CONTEXT_Egpr + 10 * 8] + mov r27, qword ptr [rdi + CONTEXT_Egpr + 11 * 8] + mov r28, qword ptr [rdi + CONTEXT_Egpr + 12 * 8] + mov r29, qword ptr [rdi + CONTEXT_Egpr + 13 * 8] + mov r30, qword ptr [rdi + CONTEXT_Egpr + 14 * 8] + mov r31, qword ptr [rdi + CONTEXT_Egpr + 15 * 8] LOCAL_LABEL(Done_Restore_CONTEXT_XSTATE): test BYTE PTR [rdi + CONTEXT_ContextFlags], CONTEXT_CONTROL diff --git a/src/coreclr/pal/src/include/pal/context.h b/src/coreclr/pal/src/include/pal/context.h index 6eeeaa6fed7453..bc50951a4c8741 100644 --- a/src/coreclr/pal/src/include/pal/context.h +++ b/src/coreclr/pal/src/include/pal/context.h @@ -58,6 +58,7 @@ using asm_sigcontext::_xstate; #if defined(XSTATE_SUPPORTED) || (defined(HOST_AMD64) && defined(HAVE_MACH_EXCEPTIONS)) bool Xstate_IsAvx512Supported(); +bool Xstate_IsApxSupported(); #endif // XSTATE_SUPPORTED || (HOST_AMD64 && HAVE_MACH_EXCEPTIONS) #ifdef HOST_S390X @@ -383,6 +384,10 @@ bool Xstate_IsAvx512Supported(); #define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK | XFEATURE_MASK_ZMM_Hi256 | XFEATURE_MASK_Hi16_ZMM) #endif // XFEATURE_MASK_AVX512 +#ifndef XFEATURE_MASK_APX +#define XFEATURE_MASK_APX (1 << XSTATE_APX) +#endif // XFEATURE_MASK_APX + #if HAVE__FPX_SW_BYTES_WITH_XSTATE_BV #define FPREG_FpxSwBytes_xfeatures(uc) FPREG_FpxSwBytes(uc)->xstate_bv #else @@ -405,7 +410,7 @@ struct Xstate_ExtendedFeature uint32_t size; }; -#define Xstate_ExtendedFeatures_Count (XSTATE_AVX512_ZMM + 1) +#define Xstate_ExtendedFeatures_Count (XSTATE_APX + 1) extern Xstate_ExtendedFeature Xstate_ExtendedFeatures[Xstate_ExtendedFeatures_Count]; inline _fpx_sw_bytes *FPREG_FpxSwBytes(const ucontext_t *uc) @@ -542,6 +547,27 @@ inline void *FPREG_Xstate_Hi16Zmm(const ucontext_t *uc, uint32_t *featureSize) _ASSERTE(FPREG_HasAvx512Registers(uc)); return FPREG_Xstate_ExtendedFeature(uc, featureSize, XSTATE_AVX512_ZMM); } + +inline bool FPREG_HasApxRegisters(const ucontext_t *uc) +{ + if (!FPREG_HasExtendedState(uc)) + { + return false; + } + + if ((FPREG_FpxSwBytes_xfeatures(uc) & XFEATURE_MASK_APX) != XFEATURE_MASK_APX) + { + return false; + } + + return Xstate_IsApxSupported(); +} + +inline void *FPREG_Xstate_Egpr(const ucontext_t *uc, uint32_t *featureSize) +{ + _ASSERTE(FPREG_HasApxRegisters(uc)); + return FPREG_Xstate_ExtendedFeature(uc, featureSize, XSTATE_APX); +} #endif // XSTATE_SUPPORTED ///////////////////// diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index 04fabab0e7253e..4c5a701adf98c4 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -380,6 +380,59 @@ bool Xstate_IsAvx512Supported() return Xstate_Avx512Supported == 1; #endif } + +bool Xstate_IsApxSupported() +{ +#if defined(HAVE_MACH_EXCEPTIONS) + // Ruihan TODO: I assume OSX will never support APX + return false; +#else + static int Xstate_ApxSupported = -1; + + if (Xstate_ApxSupported == -1) + { + int cpuidInfo[4]; + + const int CPUID_EAX = 0; + const int CPUID_EBX = 1; + const int CPUID_ECX = 2; + const int CPUID_EDX = 3; + +#ifdef _DEBUG + // We should only be calling this function if we know the extended feature exists + __cpuid(cpuidInfo, 0x00000000); + _ASSERTE(static_cast(cpuidInfo[CPUID_EAX]) >= 0x0D); +#endif // _DEBUG + + __cpuidex(cpuidInfo, 0x0000000D, 0x00000000); + + if ((cpuidInfo[CPUID_EAX] & XSTATE_MASK_APX) == XSTATE_MASK_APX) + { + // Knight's Landing and Knight's Mill shipped without all 5 of the "baseline" + // AVX-512 ISAs that are required by x86-64-v4. Specifically they do not include + // BW, DQ, or VL. RyuJIT currently requires all 5 ISAs to be present so we will + // only enable Avx512 context save/restore when all exist. This requires us to + // query which ISAs are actually supported to ensure they're all present. + + __cpuidex(cpuidInfo, 0x00000007, 0x00000001); + + const int requiredApxFlags = (1 << 21); + + if ((cpuidInfo[CPUID_EDX] & requiredApxFlags) == requiredApxFlags) + { + Xstate_ApxSupported = 1; + } + } + + if (Xstate_ApxSupported == -1) + { + Xstate_ApxSupported = 0; + } + } + + return Xstate_ApxSupported == 1; +#endif +} #endif // XSTATE_SUPPORTED || defined(HOST_AMD64) && defined(HAVE_MACH_EXCEPTIONS) #if !HAVE_MACH_EXCEPTIONS @@ -800,6 +853,15 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) _ASSERT(size == (sizeof(M512) * 16)); memcpy_s(dest, sizeof(M512) * 16, &lpContext->Zmm16, sizeof(M512) * 16); } + + if (FPREG_HasApxRegisters(native)) + { + _ASSERT((lpContext->XStateFeaturesMask & XSTATE_MASK_APX) == XSTATE_MASK_APX); + + dest = FPREG_Xstate_Egpr(native, &size); + _ASSERT(size == (sizeof(DWORD64) * 16)); + memcpy_s(dest, sizeof(DWORD64) * 16, &lpContext->Egpr16, sizeof(DWORD64) * 16); + } } } #endif //HOST_AMD64 && XSTATE_SUPPORTED @@ -1017,6 +1079,15 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex lpContext->XStateFeaturesMask |= XSTATE_MASK_AVX512; } + + if (FPREG_HasApxRegisters(native)) + { + src = FPREG_Xstate_Egpr(native, &size); + _ASSERT(size == (sizeof(DWORD64) * 16)); + memcpy_s(&lpContext->Egpr16, sizeof(DWORD64) * 16, src, sizeof(DWORD64) * 16); + + lpContext->XStateFeaturesMask |= XSTATE_MASK_APX; + } } else #endif // XSTATE_SUPPORTED diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index bef78e07ac7f06..fd9fd8cdea142b 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -34,7 +34,7 @@ public static bool IsHardwareIntrinsic(MethodDesc method) return false; } - public static void AddRuntimeRequiredIsaFlagsToBuilder(InstructionSetSupportBuilder builder, int flags) + public static void AddRuntimeRequiredIsaFlagsToBuilder(InstructionSetSupportBuilder builder, long flags) { switch (builder.Architecture) { @@ -85,7 +85,7 @@ private static class XArchIntrinsicConstants public const int Avx10v1_v256 = 0x8000000; public const int Avx10v1_v512 = 0x10000000; - public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) + public static void AddToBuilder(InstructionSetSupportBuilder builder, long flags) { if ((flags & Aes) != 0) builder.AddSupportedInstructionSet("aes"); @@ -145,9 +145,11 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) builder.AddSupportedInstructionSet("avx10v1_v256"); if ((flags & Avx10v1_v512) != 0) builder.AddSupportedInstructionSet("avx10v1_v512"); + if ((flags & Apx) != 0) + builder.AddSupportedInstructionSet("apx"); } - public static int FromInstructionSet(InstructionSet instructionSet) + public static long FromInstructionSet(InstructionSet instructionSet) { Debug.Assert(InstructionSet.X64_AES == InstructionSet.X86_AES); Debug.Assert(InstructionSet.X64_SSE41 == InstructionSet.X86_SSE41); @@ -249,7 +251,7 @@ private static class Arm64IntrinsicConstants public const int Rcpc2 = 0x0200; public const int Sve = 0x0400; - public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) + public static void AddToBuilder(InstructionSetSupportBuilder builder, long flags) { if ((flags & AdvSimd) != 0) builder.AddSupportedInstructionSet("neon"); @@ -275,7 +277,7 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) builder.AddSupportedInstructionSet("sve"); } - public static int FromInstructionSet(InstructionSet instructionSet) + public static long FromInstructionSet(InstructionSet instructionSet) { return instructionSet switch { diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index 8a7303f15f3711..227ac21a310039 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -57,7 +57,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru string jitInterfaceLibrary = "jitinterface_" + RuntimeInformation.ProcessArchitecture.ToString().ToLowerInvariant(); nint libHandle = NativeLibrary.Load(jitInterfaceLibrary, System.Reflection.Assembly.GetExecutingAssembly(), DllImportSearchPath.ApplicationDirectory); - int cpuFeatures; + long cpuFeatures; unsafe { var getCpuFeatures = (delegate* unmanaged)NativeLibrary.GetExport(libHandle, "JitGetProcessorFeatures"); diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index 0e2fec09e19d57..2b4efb809a18da 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -58,6 +58,7 @@ public enum ReadyToRunInstructionSet Avx10v1=44, Avx10v1_V256=45, Avx10v1_V512=46, + Apx=47, } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 533b017e2bae85..3745a0707949e4 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -124,6 +124,8 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_AVX10v1_V256_X64: return ReadyToRunInstructionSet.Avx10v1_V256; case InstructionSet.X64_AVX10v1_V512: return ReadyToRunInstructionSet.Avx10v1_V512; case InstructionSet.X64_AVX10v1_V512_X64: return ReadyToRunInstructionSet.Avx10v1_V512; + case InstructionSet.X64_APX: return ReadyToRunInstructionSet.Apx; + case InstructionSet.X64_APX_X64: return ReadyToRunInstructionSet.Apx; case InstructionSet.X64_VectorT128: return ReadyToRunInstructionSet.VectorT128; case InstructionSet.X64_VectorT256: return ReadyToRunInstructionSet.VectorT256; case InstructionSet.X64_VectorT512: return ReadyToRunInstructionSet.VectorT512; @@ -203,6 +205,8 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_AVX10v1_V256_X64: return null; case InstructionSet.X86_AVX10v1_V512: return ReadyToRunInstructionSet.Avx10v1_V512; case InstructionSet.X86_AVX10v1_V512_X64: return null; + case InstructionSet.X86_APX: return ReadyToRunInstructionSet.Apx; + case InstructionSet.X86_APX_X64: return null; case InstructionSet.X86_VectorT128: return ReadyToRunInstructionSet.VectorT128; case InstructionSet.X86_VectorT256: return ReadyToRunInstructionSet.VectorT256; case InstructionSet.X86_VectorT512: return ReadyToRunInstructionSet.VectorT512; diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 270d4834a9c2b2..fd751fbbaf3534 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -76,6 +76,7 @@ public enum InstructionSet X64_AVX10v1 = InstructionSet_X64.AVX10v1, X64_AVX10v1_V256 = InstructionSet_X64.AVX10v1_V256, X64_AVX10v1_V512 = InstructionSet_X64.AVX10v1_V512, + X64_APX = InstructionSet_X64.APX, X64_VectorT128 = InstructionSet_X64.VectorT128, X64_VectorT256 = InstructionSet_X64.VectorT256, X64_VectorT512 = InstructionSet_X64.VectorT512, @@ -111,6 +112,7 @@ public enum InstructionSet X64_AVX10v1_X64 = InstructionSet_X64.AVX10v1_X64, X64_AVX10v1_V256_X64 = InstructionSet_X64.AVX10v1_V256_X64, X64_AVX10v1_V512_X64 = InstructionSet_X64.AVX10v1_V512_X64, + X64_APX_X64 = InstructionSet_X64.APX_X64, X86_X86Base = InstructionSet_X86.X86Base, X86_SSE = InstructionSet_X86.SSE, X86_SSE2 = InstructionSet_X86.SSE2, @@ -146,6 +148,7 @@ public enum InstructionSet X86_AVX10v1 = InstructionSet_X86.AVX10v1, X86_AVX10v1_V256 = InstructionSet_X86.AVX10v1_V256, X86_AVX10v1_V512 = InstructionSet_X86.AVX10v1_V512, + X86_APX = InstructionSet_X86.APX, X86_VectorT128 = InstructionSet_X86.VectorT128, X86_VectorT256 = InstructionSet_X86.VectorT256, X86_VectorT512 = InstructionSet_X86.VectorT512, @@ -181,6 +184,7 @@ public enum InstructionSet X86_AVX10v1_X64 = InstructionSet_X86.AVX10v1_X64, X86_AVX10v1_V256_X64 = InstructionSet_X86.AVX10v1_V256_X64, X86_AVX10v1_V512_X64 = InstructionSet_X86.AVX10v1_V512_X64, + X86_APX_X64 = InstructionSet_X86.APX_X64, } public enum InstructionSet_ARM64 { @@ -252,41 +256,43 @@ public enum InstructionSet_X64 AVX10v1 = 33, AVX10v1_V256 = 34, AVX10v1_V512 = 35, - VectorT128 = 36, - VectorT256 = 37, - VectorT512 = 38, - X86Base_X64 = 39, - SSE_X64 = 40, - SSE2_X64 = 41, - SSE3_X64 = 42, - SSSE3_X64 = 43, - SSE41_X64 = 44, - SSE42_X64 = 45, - AVX_X64 = 46, - AVX2_X64 = 47, - AES_X64 = 48, - BMI1_X64 = 49, - BMI2_X64 = 50, - FMA_X64 = 51, - LZCNT_X64 = 52, - PCLMULQDQ_X64 = 53, - POPCNT_X64 = 54, - AVXVNNI_X64 = 55, - MOVBE_X64 = 56, - X86Serialize_X64 = 57, - AVX512F_X64 = 58, - AVX512F_VL_X64 = 59, - AVX512BW_X64 = 60, - AVX512BW_VL_X64 = 61, - AVX512CD_X64 = 62, - AVX512CD_VL_X64 = 63, - AVX512DQ_X64 = 64, - AVX512DQ_VL_X64 = 65, - AVX512VBMI_X64 = 66, - AVX512VBMI_VL_X64 = 67, - AVX10v1_X64 = 68, - AVX10v1_V256_X64 = 69, - AVX10v1_V512_X64 = 70, + APX = 36, + VectorT128 = 37, + VectorT256 = 38, + VectorT512 = 39, + X86Base_X64 = 40, + SSE_X64 = 41, + SSE2_X64 = 42, + SSE3_X64 = 43, + SSSE3_X64 = 44, + SSE41_X64 = 45, + SSE42_X64 = 46, + AVX_X64 = 47, + AVX2_X64 = 48, + AES_X64 = 49, + BMI1_X64 = 50, + BMI2_X64 = 51, + FMA_X64 = 52, + LZCNT_X64 = 53, + PCLMULQDQ_X64 = 54, + POPCNT_X64 = 55, + AVXVNNI_X64 = 56, + MOVBE_X64 = 57, + X86Serialize_X64 = 58, + AVX512F_X64 = 59, + AVX512F_VL_X64 = 60, + AVX512BW_X64 = 61, + AVX512BW_VL_X64 = 62, + AVX512CD_X64 = 63, + AVX512CD_VL_X64 = 64, + AVX512DQ_X64 = 65, + AVX512DQ_VL_X64 = 66, + AVX512VBMI_X64 = 67, + AVX512VBMI_VL_X64 = 68, + AVX10v1_X64 = 69, + AVX10v1_V256_X64 = 70, + AVX10v1_V512_X64 = 71, + APX_X64 = 72, } public enum InstructionSet_X86 @@ -328,41 +334,43 @@ public enum InstructionSet_X86 AVX10v1 = 33, AVX10v1_V256 = 34, AVX10v1_V512 = 35, - VectorT128 = 36, - VectorT256 = 37, - VectorT512 = 38, - X86Base_X64 = 39, - SSE_X64 = 40, - SSE2_X64 = 41, - SSE3_X64 = 42, - SSSE3_X64 = 43, - SSE41_X64 = 44, - SSE42_X64 = 45, - AVX_X64 = 46, - AVX2_X64 = 47, - AES_X64 = 48, - BMI1_X64 = 49, - BMI2_X64 = 50, - FMA_X64 = 51, - LZCNT_X64 = 52, - PCLMULQDQ_X64 = 53, - POPCNT_X64 = 54, - AVXVNNI_X64 = 55, - MOVBE_X64 = 56, - X86Serialize_X64 = 57, - AVX512F_X64 = 58, - AVX512F_VL_X64 = 59, - AVX512BW_X64 = 60, - AVX512BW_VL_X64 = 61, - AVX512CD_X64 = 62, - AVX512CD_VL_X64 = 63, - AVX512DQ_X64 = 64, - AVX512DQ_VL_X64 = 65, - AVX512VBMI_X64 = 66, - AVX512VBMI_VL_X64 = 67, - AVX10v1_X64 = 68, - AVX10v1_V256_X64 = 69, - AVX10v1_V512_X64 = 70, + APX = 36, + VectorT128 = 37, + VectorT256 = 38, + VectorT512 = 39, + X86Base_X64 = 40, + SSE_X64 = 41, + SSE2_X64 = 42, + SSE3_X64 = 43, + SSSE3_X64 = 44, + SSE41_X64 = 45, + SSE42_X64 = 46, + AVX_X64 = 47, + AVX2_X64 = 48, + AES_X64 = 49, + BMI1_X64 = 50, + BMI2_X64 = 51, + FMA_X64 = 52, + LZCNT_X64 = 53, + PCLMULQDQ_X64 = 54, + POPCNT_X64 = 55, + AVXVNNI_X64 = 56, + MOVBE_X64 = 57, + X86Serialize_X64 = 58, + AVX512F_X64 = 59, + AVX512F_VL_X64 = 60, + AVX512BW_X64 = 61, + AVX512BW_VL_X64 = 62, + AVX512CD_X64 = 63, + AVX512CD_VL_X64 = 64, + AVX512DQ_X64 = 65, + AVX512DQ_VL_X64 = 66, + AVX512VBMI_X64 = 67, + AVX512VBMI_VL_X64 = 68, + AVX10v1_X64 = 69, + AVX10v1_V256_X64 = 70, + AVX10v1_V512_X64 = 71, + APX_X64 = 72, } public unsafe struct InstructionSetFlags : IEnumerable @@ -710,6 +718,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512_X64)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_APX)) + resultflags.AddInstructionSet(InstructionSet.X64_APX_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_APX_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_APX); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) @@ -1067,6 +1079,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V256); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512_X64)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_APX_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_APX); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_SSE); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) @@ -1405,6 +1419,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("avx10v1", "Avx10v1", InstructionSet.X64_AVX10v1, true); yield return new InstructionSetInfo("avx10v1_v256", "Avx10v1_V256", InstructionSet.X64_AVX10v1_V256, true); yield return new InstructionSetInfo("avx10v1_v512", "Avx10v1_V512", InstructionSet.X64_AVX10v1_V512, true); + yield return new InstructionSetInfo("apx", "Apx", InstructionSet.X64_APX, true); yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.X64_VectorT128, true); yield return new InstructionSetInfo("vectort256", "VectorT256", InstructionSet.X64_VectorT256, true); yield return new InstructionSetInfo("vectort512", "VectorT512", InstructionSet.X64_VectorT512, true); @@ -1446,6 +1461,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("avx10v1", "Avx10v1", InstructionSet.X86_AVX10v1, true); yield return new InstructionSetInfo("avx10v1_v256", "Avx10v1_V256", InstructionSet.X86_AVX10v1_V256, true); yield return new InstructionSetInfo("avx10v1_v512", "Avx10v1_V512", InstructionSet.X86_AVX10v1_V512, true); + yield return new InstructionSetInfo("apx", "Apx", InstructionSet.X86_APX, true); yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.X86_VectorT128, true); yield return new InstructionSetInfo("vectort256", "VectorT256", InstructionSet.X86_VectorT256, true); yield return new InstructionSetInfo("vectort512", "VectorT512", InstructionSet.X86_VectorT512, true); @@ -1544,6 +1560,8 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture) AddInstructionSet(InstructionSet.X64_AVX10v1_V256_X64); if (HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64); + if (HasInstructionSet(InstructionSet.X64_APX)) + AddInstructionSet(InstructionSet.X64_APX_X64); break; case TargetArchitecture.X86: @@ -1601,6 +1619,7 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X64_AVX10v1_X64); AddInstructionSet(InstructionSet.X64_AVX10v1_V256_X64); AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64); + AddInstructionSet(InstructionSet.X64_APX_X64); break; case TargetArchitecture.X86: @@ -1636,6 +1655,7 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X86_AVX10v1_X64); AddInstructionSet(InstructionSet.X86_AVX10v1_V256_X64); AddInstructionSet(InstructionSet.X86_AVX10v1_V512_X64); + AddInstructionSet(InstructionSet.X86_APX_X64); break; } } @@ -1931,6 +1951,12 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite else { return InstructionSet.X64_AVX10v1_V512; } + case "Apx": + if (nestedTypeName == "X64") + { return InstructionSet.X64_APX_X64; } + else + { return InstructionSet.X64_APX; } + case "VectorT128": { return InstructionSet.X64_VectorT128; } @@ -2043,6 +2069,9 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite case "Avx10v1_V512": { return InstructionSet.X86_AVX10v1_V512; } + case "Apx": + { return InstructionSet.X86_APX; } + case "VectorT128": { return InstructionSet.X86_VectorT128; } diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 5f1953f71e9190..e4952d10ed18cb 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -60,6 +60,7 @@ instructionset ,X86 ,Avx512Vbmi_VL , ,38 ,AVX512VBMI_VL instructionset ,X86 ,Avx10v1 , ,44 ,AVX10v1 ,avx10v1 instructionset ,X86 ,Avx10v1_V256 , ,45 ,AVX10v1_V256 ,avx10v1_v256 instructionset ,X86 ,Avx10v1_V512 , ,46 ,AVX10v1_V512 ,avx10v1_v512 +instructionset ,X86 ,Apx , ,47 ,APX ,apx instructionset ,X86 ,VectorT128 , ,39 ,VectorT128 ,vectort128 instructionset ,X86 ,VectorT256 , ,40 ,VectorT256 ,vectort256 instructionset ,X86 ,VectorT512 , ,41 ,VectorT512 ,vectort512 @@ -96,6 +97,7 @@ instructionset64bit,X86 ,AVX512VBMI_VL instructionset64bit,X86 ,AVX10v1 instructionset64bit,X86 ,AVX10v1_V256 instructionset64bit,X86 ,AVX10v1_V512 +instructionset64bit,X86 ,APX vectorinstructionset,X86 ,Vector128 vectorinstructionset,X86 ,Vector256 diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ExpectedIsaFeaturesRootProvider.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ExpectedIsaFeaturesRootProvider.cs index 90f7b77a4f76fb..161664bbf9fcb9 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ExpectedIsaFeaturesRootProvider.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ExpectedIsaFeaturesRootProvider.cs @@ -22,9 +22,9 @@ void ICompilationRootProvider.AddCompilationRoots(IRootingServiceProvider rootPr || _isaSupport.Architecture == TargetArchitecture.X86 || _isaSupport.Architecture == TargetArchitecture.ARM64) { - int isaFlags = HardwareIntrinsicHelpers.GetRuntimeRequiredIsaFlags(_isaSupport); + long isaFlags = HardwareIntrinsicHelpers.GetRuntimeRequiredIsaFlags(_isaSupport); byte[] bytes = BitConverter.GetBytes(isaFlags); - rootProvider.RootReadOnlyDataBlob(bytes, 4, "ISA support flags", "g_requiredCpuFeatures"); + rootProvider.RootReadOnlyDataBlob(bytes, 8, "ISA support flags", "g_requiredCpuFeatures"); } } } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs index ee355b964404d7..6a03ddf8ae74f9 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs @@ -28,7 +28,7 @@ public static MethodIL EmitIsSupportedIL(MethodDesc method, FieldDesc isSupporte Debug.Assert(IsIsSupportedMethod(method)); Debug.Assert(isSupportedField.IsStatic && isSupportedField.FieldType.IsWellKnownType(WellKnownType.Int32)); - int flag = 0; + long flag = 0; switch (method.Context.Target.Architecture) { @@ -50,7 +50,8 @@ public static MethodIL EmitIsSupportedIL(MethodDesc method, FieldDesc isSupporte ILCodeStream codeStream = emit.NewCodeStream(); codeStream.Emit(ILOpcode.ldsfld, emit.NewToken(isSupportedField)); - codeStream.EmitLdc(flag); + codeStream.EmitLdc((int)flag); + codeStream.EmitLdc((int)(flag >> 32)); codeStream.Emit(ILOpcode.and); codeStream.EmitLdc(0); codeStream.Emit(ILOpcode.cgt_un); @@ -59,9 +60,9 @@ public static MethodIL EmitIsSupportedIL(MethodDesc method, FieldDesc isSupporte return emit.Link(method); } - public static int GetRuntimeRequiredIsaFlags(InstructionSetSupport instructionSetSupport) + public static long GetRuntimeRequiredIsaFlags(InstructionSetSupport instructionSetSupport) { - int result = 0; + long result = 0; switch (instructionSetSupport.Architecture) { case TargetArchitecture.X86: diff --git a/src/coreclr/tools/aot/jitinterface/jitwrapper.cpp b/src/coreclr/tools/aot/jitinterface/jitwrapper.cpp index 8fd38d192f84e9..dc0945967a93d7 100644 --- a/src/coreclr/tools/aot/jitinterface/jitwrapper.cpp +++ b/src/coreclr/tools/aot/jitinterface/jitwrapper.cpp @@ -53,7 +53,7 @@ DLL_EXPORT void JitProcessShutdownWork(ICorJitCompiler * pJit) return pJit->ProcessShutdownWork(nullptr); } -DLL_EXPORT int JitGetProcessorFeatures() +DLL_EXPORT long long JitGetProcessorFeatures() { return minipal_getcpufeatures(); } diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index d115a22850a742..3377f0004faa0a 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1255,7 +1255,7 @@ void EEJitManager::SetCpuInfo() CORJIT_FLAGS CPUCompileFlags; - int cpuFeatures = minipal_getcpufeatures(); + long long cpuFeatures = minipal_getcpufeatures(); #if defined(TARGET_X86) || defined(TARGET_AMD64) CPUCompileFlags.Set(InstructionSet_VectorT128); @@ -1443,6 +1443,11 @@ void EEJitManager::SetCpuInfo() { CPUCompileFlags.Set(InstructionSet_AVX10v1_V512); } + + if (((cpuFeatures & ((long long)XArchIntrinsicConstants_Apx << 32)) != 0)) + { + CPUCompileFlags.Set(InstructionSet_APX); + } #elif defined(TARGET_ARM64) #if !defined(TARGET_WINDOWS) diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 40ae02264804fd..c795403b7aa19c 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -1961,14 +1961,15 @@ CONTEXT* AllocateOSContextHelper(BYTE** contextBuffer) // Determine if the processor supports AVX so we could // retrieve extended registers DWORD64 FeatureMask = GetEnabledXStateFeatures(); - if ((FeatureMask & (XSTATE_MASK_AVX | XSTATE_MASK_AVX512)) != 0) + if ((FeatureMask & (XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX)) != 0) { context = context | CONTEXT_XSTATE; } // Retrieve contextSize by passing NULL for Buffer DWORD contextSize = 0; - ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | XSTATE_MASK_AVX | XSTATE_MASK_MPX | XSTATE_MASK_AVX512; + // Ruihan TODO: MPX and APX cannot and will not co-exist, will setting the mask in this way be an issue? + ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | XSTATE_MASK_AVX | XSTATE_MASK_MPX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX; // The initialize call should fail but return contextSize BOOL success = g_pfnInitializeContext2 ? g_pfnInitializeContext2(NULL, context, NULL, &contextSize, xStateCompactionMask) : @@ -2902,7 +2903,7 @@ BOOL Thread::RedirectThreadAtHandledJITCase(PFN_REDIRECTTARGET pTgt) // This should not normally fail. // The system silently ignores any feature specified in the FeatureMask // which is not enabled on the processor. - SetXStateFeaturesMask(pCtx, (XSTATE_MASK_AVX | XSTATE_MASK_AVX512)); + SetXStateFeaturesMask(pCtx, (XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX)); #endif //defined(TARGET_X86) || defined(TARGET_AMD64) // Make sure we specify CONTEXT_EXCEPTION_REQUEST to detect "trap frame reporting". @@ -3038,7 +3039,7 @@ BOOL Thread::RedirectCurrentThreadAtHandledJITCase(PFN_REDIRECTTARGET pTgt, CONT // Get may return 0 if no XState is set, which Set would not accept. if (srcFeatures != 0) { - success = SetXStateFeaturesMask(pCurrentThreadCtx, srcFeatures & (XSTATE_MASK_AVX | XSTATE_MASK_AVX512)); + success = SetXStateFeaturesMask(pCurrentThreadCtx, srcFeatures & (XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX)); _ASSERTE(success); if (!success) return FALSE; diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index 1af0f86f18243a..fbf2469fa32423 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -72,6 +72,10 @@ static uint32_t xmmYmmStateSupport() #define XSTATE_MASK_AVX512 (0xE0) /* 0b1110_0000 */ #endif // XSTATE_MASK_AVX512 +#ifndef XSTATE_MASK_APX +#define XSTATE_MASK_APX (0x80000) +#endif // XSTATE_MASK_APX + static uint32_t avx512StateSupport() { #if defined(HOST_APPLE) @@ -99,6 +103,21 @@ static uint32_t avx512StateSupport() #endif } +static uint32_t apxStateSupport() +{ +#if defined(HOST_APPLE) + return false; +#else + uint32_t eax; + __asm(" xgetbv\n" \ + : "=a"(eax) /*output in eax*/\ + : "c"(0) /*inputs - 0 in ecx*/\ + : "edx" /* registers that are clobbered*/ + ); + return ((eax & 0x80000) == 0x80000) ? 1 : 0; +#endif +} + static bool IsAvxEnabled() { return true; @@ -108,6 +127,11 @@ static bool IsAvx512Enabled() { return true; } + +static bool IsApxEnabled() +{ + return true; +} #endif // defined(HOST_X86) || defined(HOST_AMD64) #endif // HOST_UNIX @@ -137,12 +161,23 @@ static bool IsAvx512Enabled() return ((FeatureMask & XSTATE_MASK_AVX512) != 0); } +static bool IsApxEnabled() +{ + DWORD64 FeatureMask = GetEnabledXStateFeatures(); + return ((FeatureMask & XSTATE_MASK_APX) != 0); +} + +static uint32_t apxStateSupport() +{ + return ((_xgetbv(0) & 0x80000) == 0x80000) ? 1 : 0; +} + #endif // defined(HOST_X86) || defined(HOST_AMD64) #endif // HOST_WINDOWS -int minipal_getcpufeatures(void) +long long minipal_getcpufeatures(void) { - int result = 0; + long long result = 0; #if defined(HOST_X86) || defined(HOST_AMD64) @@ -273,10 +308,18 @@ int minipal_getcpufeatures(void) __cpuidex(cpuidInfo, 0x00000007, 0x00000001); - if ((cpuidInfo[CPUID_EAX] & (1 << 4)) != 0) // AVX-VNNI - { - result |= XArchIntrinsicConstants_AvxVnni; - } + if ((cpuidInfo[CPUID_EAX] & (1 << 4)) != 0) // AVX-VNNI + { + result |= XArchIntrinsicConstants_AvxVnni; + } + + if (IsApxEnabled() && apxStateSupport()) + { + if ((cpuidInfo[CPUID_EDX] & (1 << 21)) != 0) // APX_F + { + result |= XArchIntrinsicConstants_Apx; + } + } if ((cpuidInfo[CPUID_EDX] & (1 << 19)) != 0) // Avx10 { From 38c53662a403b0db7a8f1952b240b0f35157631d Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Fri, 31 May 2024 14:01:45 -0700 Subject: [PATCH 03/13] 1. revert the int-to-long changes 2. merge Avx512F, BW, CD, DQ to a converged ISA flag, Avx512. 3. introduce APX cpuid detection. --- src/coreclr/nativeaot/Runtime/startup.cpp | 4 +- .../Compiler/HardwareIntrinsicHelpers.cs | 80 +++++++++---------- .../tools/Common/InstructionSetHelpers.cs | 2 +- .../ExpectedIsaFeaturesRootProvider.cs | 4 +- .../Compiler/HardwareIntrinsicHelpers.Aot.cs | 9 +-- .../tools/aot/jitinterface/jitwrapper.cpp | 2 +- src/coreclr/vm/codeman.cpp | 20 ++--- src/native/minipal/cpufeatures.c | 56 ++++--------- src/native/minipal/cpufeatures.h | 22 ++--- 9 files changed, 82 insertions(+), 117 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/startup.cpp b/src/coreclr/nativeaot/Runtime/startup.cpp index fd3cfa57756b0b..f87bc947d970ac 100644 --- a/src/coreclr/nativeaot/Runtime/startup.cpp +++ b/src/coreclr/nativeaot/Runtime/startup.cpp @@ -50,8 +50,8 @@ extern RhConfig * g_pRhConfig; #if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64) // This field is inspected from the generated code to determine what intrinsics are available. -EXTERN_C long long g_cpuFeatures; -long long g_cpuFeatures = 0; +EXTERN_C int g_cpuFeatures; +int g_cpuFeatures = 0; // This field is defined in the generated code and sets the ISA expectations. EXTERN_C int g_requiredCpuFeatures; diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index fd9fd8cdea142b..fdeb8307285259 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -34,7 +34,7 @@ public static bool IsHardwareIntrinsic(MethodDesc method) return false; } - public static void AddRuntimeRequiredIsaFlagsToBuilder(InstructionSetSupportBuilder builder, long flags) + public static void AddRuntimeRequiredIsaFlagsToBuilder(InstructionSetSupportBuilder builder, int flags) { switch (builder.Architecture) { @@ -70,22 +70,16 @@ private static class XArchIntrinsicConstants public const int Lzcnt = 0x1000; public const int AvxVnni = 0x2000; public const int Movbe = 0x4000; - public const int Avx512f = 0x8000; - public const int Avx512f_vl = 0x10000; - public const int Avx512bw = 0x20000; - public const int Avx512bw_vl = 0x40000; - public const int Avx512cd = 0x80000; - public const int Avx512cd_vl = 0x100000; - public const int Avx512dq = 0x200000; - public const int Avx512dq_vl = 0x400000; - public const int Avx512Vbmi = 0x800000; - public const int Avx512Vbmi_vl = 0x1000000; - public const int Serialize = 0x2000000; - public const int Avx10v1 = 0x4000000; - public const int Avx10v1_v256 = 0x8000000; - public const int Avx10v1_v512 = 0x10000000; + public const int Avx512 = 0x8000; + public const int Avx512Vbmi = 0x10000; + public const int Avx512Vbmi_vl = 0x20000; + public const int Serialize = 0x40000; + public const int Avx10v1 = 0x80000; + public const int Avx10v1_v256 = 0x100000; + public const int Avx10v1_v512 = 0x200000; + public const int Apx = 0x400000; - public static void AddToBuilder(InstructionSetSupportBuilder builder, long flags) + public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) { if ((flags & Aes) != 0) builder.AddSupportedInstructionSet("aes"); @@ -117,21 +111,21 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, long flags builder.AddSupportedInstructionSet("avxvnni"); if ((flags & Movbe) != 0) builder.AddSupportedInstructionSet("movbe"); - if ((flags & Avx512f) != 0) + if ((flags & Avx512) != 0) builder.AddSupportedInstructionSet("avx512f"); - if ((flags & Avx512f_vl) != 0) + if ((flags & Avx512) != 0) builder.AddSupportedInstructionSet("avx512f_vl"); - if ((flags & Avx512bw) != 0) + if ((flags & Avx512) != 0) builder.AddSupportedInstructionSet("avx512bw"); - if ((flags & Avx512bw_vl) != 0) + if ((flags & Avx512) != 0) builder.AddSupportedInstructionSet("avx512bw_vl"); - if ((flags & Avx512cd) != 0) + if ((flags & Avx512) != 0) builder.AddSupportedInstructionSet("avx512cd"); - if ((flags & Avx512cd_vl) != 0) + if ((flags & Avx512) != 0) builder.AddSupportedInstructionSet("avx512cd_vl"); - if ((flags & Avx512dq) != 0) + if ((flags & Avx512) != 0) builder.AddSupportedInstructionSet("avx512dq"); - if ((flags & Avx512dq_vl) != 0) + if ((flags & Avx512) != 0) builder.AddSupportedInstructionSet("avx512dq_vl"); if ((flags & Avx512Vbmi) != 0) builder.AddSupportedInstructionSet("avx512vbmi"); @@ -149,7 +143,7 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, long flags builder.AddSupportedInstructionSet("apx"); } - public static long FromInstructionSet(InstructionSet instructionSet) + public static int FromInstructionSet(InstructionSet instructionSet) { Debug.Assert(InstructionSet.X64_AES == InstructionSet.X86_AES); Debug.Assert(InstructionSet.X64_SSE41 == InstructionSet.X86_SSE41); @@ -188,22 +182,22 @@ public static long FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_AVXVNNI_X64 => AvxVnni, InstructionSet.X64_MOVBE => Movbe, InstructionSet.X64_MOVBE_X64 => Movbe, - InstructionSet.X64_AVX512F => Avx512f, - InstructionSet.X64_AVX512F_X64 => Avx512f, - InstructionSet.X64_AVX512F_VL => Avx512f_vl, - InstructionSet.X64_AVX512F_VL_X64 => Avx512f_vl, - InstructionSet.X64_AVX512BW => Avx512bw, - InstructionSet.X64_AVX512BW_X64 => Avx512bw, - InstructionSet.X64_AVX512BW_VL => Avx512bw_vl, - InstructionSet.X64_AVX512BW_VL_X64 => Avx512bw_vl, - InstructionSet.X64_AVX512CD => Avx512cd, - InstructionSet.X64_AVX512CD_X64 => Avx512cd, - InstructionSet.X64_AVX512CD_VL => Avx512cd_vl, - InstructionSet.X64_AVX512CD_VL_X64 => Avx512cd_vl, - InstructionSet.X64_AVX512DQ => Avx512dq, - InstructionSet.X64_AVX512DQ_X64 => Avx512dq, - InstructionSet.X64_AVX512DQ_VL => Avx512dq_vl, - InstructionSet.X64_AVX512DQ_VL_X64 => Avx512dq_vl, + InstructionSet.X64_AVX512F => Avx512, + InstructionSet.X64_AVX512F_X64 => Avx512, + InstructionSet.X64_AVX512F_VL => Avx512, + InstructionSet.X64_AVX512F_VL_X64 => Avx512, + InstructionSet.X64_AVX512BW => Avx512, + InstructionSet.X64_AVX512BW_X64 => Avx512, + InstructionSet.X64_AVX512BW_VL => Avx512, + InstructionSet.X64_AVX512BW_VL_X64 => Avx512, + InstructionSet.X64_AVX512CD => Avx512, + InstructionSet.X64_AVX512CD_X64 => Avx512, + InstructionSet.X64_AVX512CD_VL => Avx512, + InstructionSet.X64_AVX512CD_VL_X64 => Avx512, + InstructionSet.X64_AVX512DQ => Avx512, + InstructionSet.X64_AVX512DQ_X64 => Avx512, + InstructionSet.X64_AVX512DQ_VL => Avx512, + InstructionSet.X64_AVX512DQ_VL_X64 => Avx512, InstructionSet.X64_AVX512VBMI => Avx512Vbmi, InstructionSet.X64_AVX512VBMI_X64 => Avx512Vbmi, InstructionSet.X64_AVX512VBMI_VL => Avx512Vbmi_vl, @@ -251,7 +245,7 @@ private static class Arm64IntrinsicConstants public const int Rcpc2 = 0x0200; public const int Sve = 0x0400; - public static void AddToBuilder(InstructionSetSupportBuilder builder, long flags) + public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) { if ((flags & AdvSimd) != 0) builder.AddSupportedInstructionSet("neon"); @@ -277,7 +271,7 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, long flags builder.AddSupportedInstructionSet("sve"); } - public static long FromInstructionSet(InstructionSet instructionSet) + public static int FromInstructionSet(InstructionSet instructionSet) { return instructionSet switch { diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index 227ac21a310039..8a7303f15f3711 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -57,7 +57,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru string jitInterfaceLibrary = "jitinterface_" + RuntimeInformation.ProcessArchitecture.ToString().ToLowerInvariant(); nint libHandle = NativeLibrary.Load(jitInterfaceLibrary, System.Reflection.Assembly.GetExecutingAssembly(), DllImportSearchPath.ApplicationDirectory); - long cpuFeatures; + int cpuFeatures; unsafe { var getCpuFeatures = (delegate* unmanaged)NativeLibrary.GetExport(libHandle, "JitGetProcessorFeatures"); diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ExpectedIsaFeaturesRootProvider.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ExpectedIsaFeaturesRootProvider.cs index 161664bbf9fcb9..90f7b77a4f76fb 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ExpectedIsaFeaturesRootProvider.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ExpectedIsaFeaturesRootProvider.cs @@ -22,9 +22,9 @@ void ICompilationRootProvider.AddCompilationRoots(IRootingServiceProvider rootPr || _isaSupport.Architecture == TargetArchitecture.X86 || _isaSupport.Architecture == TargetArchitecture.ARM64) { - long isaFlags = HardwareIntrinsicHelpers.GetRuntimeRequiredIsaFlags(_isaSupport); + int isaFlags = HardwareIntrinsicHelpers.GetRuntimeRequiredIsaFlags(_isaSupport); byte[] bytes = BitConverter.GetBytes(isaFlags); - rootProvider.RootReadOnlyDataBlob(bytes, 8, "ISA support flags", "g_requiredCpuFeatures"); + rootProvider.RootReadOnlyDataBlob(bytes, 4, "ISA support flags", "g_requiredCpuFeatures"); } } } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs index 6a03ddf8ae74f9..ee355b964404d7 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs @@ -28,7 +28,7 @@ public static MethodIL EmitIsSupportedIL(MethodDesc method, FieldDesc isSupporte Debug.Assert(IsIsSupportedMethod(method)); Debug.Assert(isSupportedField.IsStatic && isSupportedField.FieldType.IsWellKnownType(WellKnownType.Int32)); - long flag = 0; + int flag = 0; switch (method.Context.Target.Architecture) { @@ -50,8 +50,7 @@ public static MethodIL EmitIsSupportedIL(MethodDesc method, FieldDesc isSupporte ILCodeStream codeStream = emit.NewCodeStream(); codeStream.Emit(ILOpcode.ldsfld, emit.NewToken(isSupportedField)); - codeStream.EmitLdc((int)flag); - codeStream.EmitLdc((int)(flag >> 32)); + codeStream.EmitLdc(flag); codeStream.Emit(ILOpcode.and); codeStream.EmitLdc(0); codeStream.Emit(ILOpcode.cgt_un); @@ -60,9 +59,9 @@ public static MethodIL EmitIsSupportedIL(MethodDesc method, FieldDesc isSupporte return emit.Link(method); } - public static long GetRuntimeRequiredIsaFlags(InstructionSetSupport instructionSetSupport) + public static int GetRuntimeRequiredIsaFlags(InstructionSetSupport instructionSetSupport) { - long result = 0; + int result = 0; switch (instructionSetSupport.Architecture) { case TargetArchitecture.X86: diff --git a/src/coreclr/tools/aot/jitinterface/jitwrapper.cpp b/src/coreclr/tools/aot/jitinterface/jitwrapper.cpp index dc0945967a93d7..8fd38d192f84e9 100644 --- a/src/coreclr/tools/aot/jitinterface/jitwrapper.cpp +++ b/src/coreclr/tools/aot/jitinterface/jitwrapper.cpp @@ -53,7 +53,7 @@ DLL_EXPORT void JitProcessShutdownWork(ICorJitCompiler * pJit) return pJit->ProcessShutdownWork(nullptr); } -DLL_EXPORT long long JitGetProcessorFeatures() +DLL_EXPORT int JitGetProcessorFeatures() { return minipal_getcpufeatures(); } diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 3377f0004faa0a..226739ec282bd2 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1255,7 +1255,7 @@ void EEJitManager::SetCpuInfo() CORJIT_FLAGS CPUCompileFlags; - long long cpuFeatures = minipal_getcpufeatures(); + int cpuFeatures = minipal_getcpufeatures(); #if defined(TARGET_X86) || defined(TARGET_AMD64) CPUCompileFlags.Set(InstructionSet_VectorT128); @@ -1305,42 +1305,42 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Set(InstructionSet_AVX2); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512f) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F)) { CPUCompileFlags.Set(InstructionSet_AVX512F); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512f_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F_VL)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F_VL)) { CPUCompileFlags.Set(InstructionSet_AVX512F_VL); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512bw) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW)) { CPUCompileFlags.Set(InstructionSet_AVX512BW); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512bw_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW_VL)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW_VL)) { CPUCompileFlags.Set(InstructionSet_AVX512BW_VL); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512cd) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD)) { CPUCompileFlags.Set(InstructionSet_AVX512CD); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512cd_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD_VL)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD_VL)) { CPUCompileFlags.Set(InstructionSet_AVX512CD_VL); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512dq) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ)) { CPUCompileFlags.Set(InstructionSet_AVX512DQ); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512dq_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ_VL)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ_VL)) { CPUCompileFlags.Set(InstructionSet_AVX512DQ_VL); } @@ -1444,7 +1444,7 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Set(InstructionSet_AVX10v1_V512); } - if (((cpuFeatures & ((long long)XArchIntrinsicConstants_Apx << 32)) != 0)) + if (((cpuFeatures & XArchIntrinsicConstants_Apx) != 0)) { CPUCompileFlags.Set(InstructionSet_APX); } diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index fbf2469fa32423..5b983b39926079 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -175,9 +175,9 @@ static uint32_t apxStateSupport() #endif // defined(HOST_X86) || defined(HOST_AMD64) #endif // HOST_WINDOWS -long long minipal_getcpufeatures(void) +int minipal_getcpufeatures(void) { - long long result = 0; + int result = 0; #if defined(HOST_X86) || defined(HOST_AMD64) @@ -255,45 +255,23 @@ long long minipal_getcpufeatures(void) { result |= XArchIntrinsicConstants_Avx2; - if (IsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 - { - if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F - { - result |= XArchIntrinsicConstants_Avx512f; - - bool isAVX512_VLSupported = false; - if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL - { - result |= XArchIntrinsicConstants_Avx512f_vl; - isAVX512_VLSupported = true; - } - - if ((cpuidInfo[CPUID_EBX] & (1 << 30)) != 0) // AVX512BW - { - result |= XArchIntrinsicConstants_Avx512bw; - if (isAVX512_VLSupported) // AVX512BW_VL - { - result |= XArchIntrinsicConstants_Avx512bw_vl; - } - } - - if ((cpuidInfo[CPUID_EBX] & (1 << 28)) != 0) // AVX512CD + if (IsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 { - result |= XArchIntrinsicConstants_Avx512cd; - if (isAVX512_VLSupported) // AVX512CD_VL + if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F { - result |= XArchIntrinsicConstants_Avx512cd_vl; - } - } - - if ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0) // AVX512DQ - { - result |= XArchIntrinsicConstants_Avx512dq; - if (isAVX512_VLSupported) // AVX512DQ_VL - { - result |= XArchIntrinsicConstants_Avx512dq_vl; - } - } + bool isAVX512_VLSupported = false; + const int subsetMask = (1 << 30) | (1 << 28) | (1 << 17); // AVX512BW + AVX512CD + AVX512DQ + if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL + { + isAVX512_VLSupported = true; + } + + if (((cpuidInfo[CPUID_EBX] & subsetMask) != 0) && isAVX512_VLSupported) + { + // AVX512F+BW+CD+DQ are required on all Avx512 enabled machine, checking them all at once. + result |= XArchIntrinsicConstants_Avx512; + result |= XArchIntrinsicConstants_VectorT512; + } if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // AVX512VBMI { diff --git a/src/native/minipal/cpufeatures.h b/src/native/minipal/cpufeatures.h index 62aa1c75256a84..aeab2dc280042f 100644 --- a/src/native/minipal/cpufeatures.h +++ b/src/native/minipal/cpufeatures.h @@ -26,20 +26,14 @@ enum XArchIntrinsicConstants XArchIntrinsicConstants_Lzcnt = 0x1000, XArchIntrinsicConstants_AvxVnni = 0x2000, XArchIntrinsicConstants_Movbe = 0x4000, - XArchIntrinsicConstants_Avx512f = 0x8000, - XArchIntrinsicConstants_Avx512f_vl = 0x10000, - XArchIntrinsicConstants_Avx512bw = 0x20000, - XArchIntrinsicConstants_Avx512bw_vl = 0x40000, - XArchIntrinsicConstants_Avx512cd = 0x80000, - XArchIntrinsicConstants_Avx512cd_vl = 0x100000, - XArchIntrinsicConstants_Avx512dq = 0x200000, - XArchIntrinsicConstants_Avx512dq_vl = 0x400000, - XArchIntrinsicConstants_Avx512Vbmi = 0x800000, - XArchIntrinsicConstants_Avx512Vbmi_vl = 0x1000000, - XArchIntrinsicConstants_Serialize = 0x2000000, - XArchIntrinsicConstants_Avx10v1 = 0x4000000, - XArchIntrinsicConstants_Avx10v1_V256 = 0x8000000, - XArchIntrinsicConstants_Avx10v1_V512 = 0x10000000, + XArchIntrinsicConstants_Avx512 = 0x8000, + XArchIntrinsicConstants_Avx512Vbmi = 0x10000, + XArchIntrinsicConstants_Avx512Vbmi_vl = 0x20000, + XArchIntrinsicConstants_Serialize = 0x40000, + XArchIntrinsicConstants_Avx10v1 = 0x80000, + XArchIntrinsicConstants_Avx10v1_V256 = 0x100000, + XArchIntrinsicConstants_Avx10v1_V512 = 0x200000, + XArchIntrinsicConstants_Apx = 0x400000, }; #endif // HOST_X86 || HOST_AMD64 From ffb7a014bbd09aeaad24a2b42e63c78afc277d66 Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Fri, 31 May 2024 15:38:49 -0700 Subject: [PATCH 04/13] improve the Avx512 check logics. --- src/native/minipal/cpufeatures.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index 5b983b39926079..c6911ca129b150 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -257,32 +257,21 @@ int minipal_getcpufeatures(void) if (IsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 { - if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F + + // Checking Avx512F+BW+CD+DQ+VL altogether. + const int subsetMasks = (1 << 16) | (1 <<17) | (1 << 28) | (1 << 30) | (1 << 31); + if ((cpuidInfo[CPUID_EBX] & subsetMasks) == subsetMasks) { - bool isAVX512_VLSupported = false; - const int subsetMask = (1 << 30) | (1 << 28) | (1 << 17); // AVX512BW + AVX512CD + AVX512DQ - if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL - { - isAVX512_VLSupported = true; - } + result |= XArchIntrinsicConstants_Avx512; + result |= XArchIntrinsicConstants_VectorT512; - if (((cpuidInfo[CPUID_EBX] & subsetMask) != 0) && isAVX512_VLSupported) + if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // AVX512VBMI { - // AVX512F+BW+CD+DQ are required on all Avx512 enabled machine, checking them all at once. - result |= XArchIntrinsicConstants_Avx512; - result |= XArchIntrinsicConstants_VectorT512; + result |= XArchIntrinsicConstants_Avx512Vbmi; + result |= XArchIntrinsicConstants_Avx512Vbmi_vl; } - - if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // AVX512VBMI - { - result |= XArchIntrinsicConstants_Avx512Vbmi; - if (isAVX512_VLSupported) // AVX512VBMI_VL - { - result |= XArchIntrinsicConstants_Avx512Vbmi_vl; } } - } - } __cpuidex(cpuidInfo, 0x00000007, 0x00000001); From 1923060ee12585d701537b38cdfd34b2621f00f5 Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Mon, 3 Jun 2024 17:39:15 -0700 Subject: [PATCH 05/13] resolve merge conflict --- src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs | 2 +- src/coreclr/vm/codeman.cpp | 2 +- src/native/minipal/cpufeatures.c | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index fdeb8307285259..8fb840d9db4f63 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -223,7 +223,7 @@ public static int FromInstructionSet(InstructionSet instructionSet) // Vector Sizes InstructionSet.X64_VectorT128 => 0, InstructionSet.X64_VectorT256 => Avx2, - InstructionSet.X64_VectorT512 => Avx512f, + InstructionSet.X64_VectorT512 => Avx512, _ => throw new NotSupportedException(((InstructionSet_X64)instructionSet).ToString()) }; diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 226739ec282bd2..6277be58f05f9f 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1269,7 +1269,7 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Set(InstructionSet_VectorT256); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512f) != 0) && (maxVectorTBitWidth >= 512)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) && (maxVectorTBitWidth >= 512)) { // We require 512-bit Vector to be opt-in CPUCompileFlags.Set(InstructionSet_VectorT512); diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index c6911ca129b150..d1b696a1d794ad 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -263,7 +263,6 @@ int minipal_getcpufeatures(void) if ((cpuidInfo[CPUID_EBX] & subsetMasks) == subsetMasks) { result |= XArchIntrinsicConstants_Avx512; - result |= XArchIntrinsicConstants_VectorT512; if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // AVX512VBMI { From a22937f3a7bd99f6911377c19f261e3fc27d5d87 Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Tue, 4 Jun 2024 12:58:23 -0700 Subject: [PATCH 06/13] revert changes in GC --- src/coreclr/gc/vxsort/do_vxsort.h | 1 - src/coreclr/gc/vxsort/isa_detection.cpp | 46 +++++++------------------ 2 files changed, 13 insertions(+), 34 deletions(-) diff --git a/src/coreclr/gc/vxsort/do_vxsort.h b/src/coreclr/gc/vxsort/do_vxsort.h index 9cb89136472286..edd803f310f492 100644 --- a/src/coreclr/gc/vxsort/do_vxsort.h +++ b/src/coreclr/gc/vxsort/do_vxsort.h @@ -6,7 +6,6 @@ enum class InstructionSet { AVX2 = 0, AVX512F = 1, - APX = 2, }; void InitSupportedInstructionSet (int32_t configSetting); diff --git a/src/coreclr/gc/vxsort/isa_detection.cpp b/src/coreclr/gc/vxsort/isa_detection.cpp index 9fde70d4813c63..869bc0b254512d 100644 --- a/src/coreclr/gc/vxsort/isa_detection.cpp +++ b/src/coreclr/gc/vxsort/isa_detection.cpp @@ -14,8 +14,7 @@ enum class SupportedISA { None = 0, AVX2 = 1 << (int)InstructionSet::AVX2, - AVX512F = 1 << (int)InstructionSet::AVX512F, - APX = 1 << (int)InstructionSet::APX + AVX512F = 1 << (int)InstructionSet::AVX512F }; #if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) @@ -40,7 +39,6 @@ SupportedISA DetermineSupportedISA() AVX2 = 1<< 5, AVX512F = 1<<16, AVX512DQ = 1<<17, - APX = 1<<21, }; int reg[COUNT]; @@ -60,16 +58,6 @@ SupportedISA DetermineSupportedISA() // get OS XState info DWORD64 FeatureMask = GetEnabledXStateFeatures(); - int IsaFlags = (int)SupportedISA::None; - - __cpuidex(reg, 7, 1); - if((reg[EDX] & APX) && - (xcr0 & 0x80000) && - (FeatureMask & (XSTATE_MASK_APX)) == (XSTATE_MASK_APX)) - { - IsaFlags |= (int)SupportedISA::APX; - } - // get processor extended feature flag info __cpuidex(reg, 7, 0); @@ -78,7 +66,7 @@ SupportedISA DetermineSupportedISA() (xcr0 & 0xe6) == 0xe6 && (FeatureMask & (XSTATE_MASK_AVX | XSTATE_MASK_AVX512)) == (XSTATE_MASK_AVX | XSTATE_MASK_AVX512)) { - IsaFlags |= (int)SupportedISA::AVX512F; + return (SupportedISA)((int)SupportedISA::AVX2 | (int)SupportedISA::AVX512F); } // check if AVX2 is supported by both processor and OS @@ -86,10 +74,10 @@ SupportedISA DetermineSupportedISA() (xcr0 & 0x06) == 0x06 && (FeatureMask & XSTATE_MASK_AVX) == XSTATE_MASK_AVX) { - IsaFlags |= (int)SupportedISA::AVX2; + return SupportedISA::AVX2; } - return (SupportedISA)IsaFlags; + return SupportedISA::None; } #elif defined(TARGET_UNIX) @@ -97,25 +85,17 @@ SupportedISA DetermineSupportedISA() SupportedISA DetermineSupportedISA() { __builtin_cpu_init(); - - int IsaFlags = 0; - - if (__builtin_cpu_supports("apx")) - { - IsaFlags |= (int)SupportedISA::APX; - } - - if (__builtin_cpu_supports("avx512")) + if (__builtin_cpu_supports("avx2")) { - IsaFlags |= (int)SupportedISA::APX; + if (__builtin_cpu_supports("avx512f")) + return (SupportedISA)((int)SupportedISA::AVX2 | (int)SupportedISA::AVX512F); + else + return SupportedISA::AVX2; } - - if (__builtin_cpu_supports("avx2")) + else { - IsaFlags |= (int)SupportedISA::APX; + return SupportedISA::None; } - - return (SupportedISA)IsaFlags; } #endif // defined(TARGET_UNIX) @@ -126,7 +106,7 @@ static SupportedISA s_supportedISA; bool IsSupportedInstructionSet (InstructionSet instructionSet) { assert(s_initialized); - assert(instructionSet == InstructionSet::AVX2 || instructionSet == InstructionSet::AVX512F || instructionSet == InstructionSet::APX); + assert(instructionSet == InstructionSet::AVX2 || instructionSet == InstructionSet::AVX512F); return ((int)s_supportedISA & (1 << (int)instructionSet)) != 0; } @@ -138,4 +118,4 @@ void InitSupportedInstructionSet (int32_t configSetting) if (!((int)s_supportedISA & (int)SupportedISA::AVX2)) s_supportedISA = SupportedISA::None; s_initialized = true; -} +} \ No newline at end of file From 8c7878d8e83bba7c853d16d2ffe88c0ff0f492b2 Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Tue, 4 Jun 2024 13:01:34 -0700 Subject: [PATCH 07/13] move apx doc to the proper place --- apx-doc/README.md => docs/design/features/xarch-apx.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename apx-doc/README.md => docs/design/features/xarch-apx.md (100%) diff --git a/apx-doc/README.md b/docs/design/features/xarch-apx.md similarity index 100% rename from apx-doc/README.md rename to docs/design/features/xarch-apx.md From ddfa139294254f2dc631546c9e983ed4c5e6d604 Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Tue, 4 Jun 2024 13:25:32 -0700 Subject: [PATCH 08/13] Try to hack XSTATE_MASK_APX. --- .../nativeaot/Runtime/windows/PalRedhawkMinWin.cpp | 8 ++++---- src/coreclr/pal/inc/pal.h | 6 ++++-- src/coreclr/pal/src/arch/amd64/asmconstants.h | 6 ++++-- src/coreclr/pal/src/arch/amd64/context2.S | 5 ++++- src/coreclr/pal/src/thread/context.cpp | 6 +++--- src/coreclr/vm/threadsuspend.cpp | 8 ++++---- src/native/minipal/cpufeatures.c | 8 ++++---- 7 files changed, 27 insertions(+), 20 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp index 9040a44e01f3a6..e4e810b19cb5da 100644 --- a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp @@ -482,14 +482,14 @@ REDHAWK_PALEXPORT CONTEXT* PalAllocateCompleteOSContext(_Out_ uint8_t** contextB // Determine if the processor supports AVX or AVX512 so we could // retrieve extended registers DWORD64 FeatureMask = GetEnabledXStateFeatures(); - if ((FeatureMask & (XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX)) != 0) + if ((FeatureMask & (XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | /*XSATE_MASK_APX*/(0x80000))) != 0) { context = context | CONTEXT_XSTATE; } // Retrieve contextSize by passing NULL for Buffer DWORD contextSize = 0; - ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | XSTATE_MASK_AVX | XSTATE_MASK_MPX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX; + ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | XSTATE_MASK_AVX | XSTATE_MASK_MPX | XSTATE_MASK_AVX512 | /*XSATE_MASK_APX*/(0x80000); // The initialize call should fail but return contextSize BOOL success = pfnInitializeContext2 ? pfnInitializeContext2(NULL, context, NULL, &contextSize, xStateCompactionMask) : @@ -540,9 +540,9 @@ REDHAWK_PALEXPORT _Success_(return) bool REDHAWK_PALAPI PalGetCompleteThreadCont #if defined(TARGET_X86) || defined(TARGET_AMD64) // Make sure that AVX feature mask is set, if supported. This should not normally fail. // The system silently ignores any feature specified in the FeatureMask which is not enabled on the processor. - if (!SetXStateFeaturesMask(pCtx, XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX)) + if (!SetXStateFeaturesMask(pCtx, XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | /*XSATE_MASK_APX*/(0x80000))) { - _ASSERTE(!"Could not apply XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX"); + _ASSERTE(!"Could not apply XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | /*XSATE_MASK_APX*/(0x80000)"); return FALSE; } #endif //defined(TARGET_X86) || defined(TARGET_AMD64) diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index cf660dd10211ac..bc47fb4f82ec0a 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -1398,14 +1398,16 @@ typedef struct _KNONVOLATILE_CONTEXT_POINTERS { #define XSTATE_AVX512_KMASK (5) #define XSTATE_AVX512_ZMM_H (6) #define XSTATE_AVX512_ZMM (7) -#define XSTATE_APX (19) #define XSTATE_MASK_GSSE (UI64(1) << (XSTATE_GSSE)) #define XSTATE_MASK_AVX (XSTATE_MASK_GSSE) #define XSTATE_MASK_AVX512 ((UI64(1) << (XSTATE_AVX512_KMASK)) | \ (UI64(1) << (XSTATE_AVX512_ZMM_H)) | \ (UI64(1) << (XSTATE_AVX512_ZMM))) -#define XSTATE_MASK_APX (UI64(1) << (XSTATE_APX)) + +// TODO-xarch-apx: the definition of XSTATE mask value for APX is now missing on the OS level, +// we are currently using bare value to hack it through the build process, and test the implementation through CI. +// those changes will be removed when we have the OS support for APX. typedef struct DECLSPEC_ALIGN(16) _M128A { ULONGLONG Low; diff --git a/src/coreclr/pal/src/arch/amd64/asmconstants.h b/src/coreclr/pal/src/arch/amd64/asmconstants.h index 1e0951922c5e3c..e77e79e99e4d99 100644 --- a/src/coreclr/pal/src/arch/amd64/asmconstants.h +++ b/src/coreclr/pal/src/arch/amd64/asmconstants.h @@ -8,14 +8,16 @@ #define XSTATE_AVX512_KMASK (5) #define XSTATE_AVX512_ZMM_H (6) #define XSTATE_AVX512_ZMM (7) -#define XSTATE_APX (19) #define XSTATE_MASK_GSSE (1 << (XSTATE_GSSE)) #define XSTATE_MASK_AVX (XSTATE_MASK_GSSE) #define XSTATE_MASK_AVX512 ((1 << (XSTATE_AVX512_KMASK)) | \ (1 << (XSTATE_AVX512_ZMM_H)) | \ (1 << (XSTATE_AVX512_ZMM))) -#define XSTATE_MASK_APX (1 << (XSTATE_APX)) + +// TODO-xarch-apx: the definition of XSTATE mask value for APX is now missing on the OS level, +// we are currently using bare value to hack it through the build process, and test the implementation through CI. +// those changes will be removed when we have the OS support for APX. // The arch bit is normally set in the flag constants below. Since this is already arch-specific code and the arch bit is not // relevant, the arch bit is excluded from the flag constants below for simpler tests. diff --git a/src/coreclr/pal/src/arch/amd64/context2.S b/src/coreclr/pal/src/arch/amd64/context2.S index ac6f548f47f65b..2f4c77376d65de 100644 --- a/src/coreclr/pal/src/arch/amd64/context2.S +++ b/src/coreclr/pal/src/arch/amd64/context2.S @@ -183,7 +183,10 @@ LOCAL_LABEL(Done_Restore_CONTEXT_FLOATING_POINT): kmovq k6, qword ptr [rdi + (CONTEXT_KMask0 + 6 * 8)] kmovq k7, qword ptr [rdi + (CONTEXT_KMask0 + 7 * 8)] - test BYTE PTR [rdi + CONTEXT_XStateFeaturesMask], XSTATE_MASK_APX + // TODO-xarch-apx: the definition of XSTATE mask value for APX is now missing on the OS level, + // we are currently using bare value to hack it through the build process, and test the implementation through CI. + // those changes will be removed when we have the OS support for APX. + test BYTE PTR [rdi + CONTEXT_XStateFeaturesMask], 524288 je LOCAL_LABEL(Done_Restore_CONTEXT_XSTATE) // Restore the EGPR state, EGPR use previous MPX field, need to add an offset. diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index 4c5a701adf98c4..45e03d52ca3097 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -406,7 +406,7 @@ bool Xstate_IsApxSupported() __cpuidex(cpuidInfo, 0x0000000D, 0x00000000); - if ((cpuidInfo[CPUID_EAX] & XSTATE_MASK_APX) == XSTATE_MASK_APX) + if ((cpuidInfo[CPUID_EAX] & /*XSATE_MASK_APX*/(0x80000)) == /*XSATE_MASK_APX*/(0x80000)) { // Knight's Landing and Knight's Mill shipped without all 5 of the "baseline" // AVX-512 ISAs that are required by x86-64-v4. Specifically they do not include @@ -856,7 +856,7 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) if (FPREG_HasApxRegisters(native)) { - _ASSERT((lpContext->XStateFeaturesMask & XSTATE_MASK_APX) == XSTATE_MASK_APX); + _ASSERT((lpContext->XStateFeaturesMask & /*XSATE_MASK_APX*/(0x80000)) == /*XSATE_MASK_APX*/(0x80000)); dest = FPREG_Xstate_Egpr(native, &size); _ASSERT(size == (sizeof(DWORD64) * 16)); @@ -1086,7 +1086,7 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex _ASSERT(size == (sizeof(DWORD64) * 16)); memcpy_s(&lpContext->Egpr16, sizeof(DWORD64) * 16, src, sizeof(DWORD64) * 16); - lpContext->XStateFeaturesMask |= XSTATE_MASK_APX; + lpContext->XStateFeaturesMask |= /*XSATE_MASK_APX*/(0x80000); } } else diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index c795403b7aa19c..30e92c743eb58b 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -1961,7 +1961,7 @@ CONTEXT* AllocateOSContextHelper(BYTE** contextBuffer) // Determine if the processor supports AVX so we could // retrieve extended registers DWORD64 FeatureMask = GetEnabledXStateFeatures(); - if ((FeatureMask & (XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX)) != 0) + if ((FeatureMask & (XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | /*XSATE_MASK_APX*/(0x80000))) != 0) { context = context | CONTEXT_XSTATE; } @@ -1969,7 +1969,7 @@ CONTEXT* AllocateOSContextHelper(BYTE** contextBuffer) // Retrieve contextSize by passing NULL for Buffer DWORD contextSize = 0; // Ruihan TODO: MPX and APX cannot and will not co-exist, will setting the mask in this way be an issue? - ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | XSTATE_MASK_AVX | XSTATE_MASK_MPX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX; + ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | XSTATE_MASK_AVX | XSTATE_MASK_MPX | XSTATE_MASK_AVX512 | /*XSATE_MASK_APX*/(0x80000); // The initialize call should fail but return contextSize BOOL success = g_pfnInitializeContext2 ? g_pfnInitializeContext2(NULL, context, NULL, &contextSize, xStateCompactionMask) : @@ -2903,7 +2903,7 @@ BOOL Thread::RedirectThreadAtHandledJITCase(PFN_REDIRECTTARGET pTgt) // This should not normally fail. // The system silently ignores any feature specified in the FeatureMask // which is not enabled on the processor. - SetXStateFeaturesMask(pCtx, (XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX)); + SetXStateFeaturesMask(pCtx, (XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | /*XSATE_MASK_APX*/(0x80000))); #endif //defined(TARGET_X86) || defined(TARGET_AMD64) // Make sure we specify CONTEXT_EXCEPTION_REQUEST to detect "trap frame reporting". @@ -3039,7 +3039,7 @@ BOOL Thread::RedirectCurrentThreadAtHandledJITCase(PFN_REDIRECTTARGET pTgt, CONT // Get may return 0 if no XState is set, which Set would not accept. if (srcFeatures != 0) { - success = SetXStateFeaturesMask(pCurrentThreadCtx, srcFeatures & (XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX)); + success = SetXStateFeaturesMask(pCurrentThreadCtx, srcFeatures & (XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | /*XSATE_MASK_APX*/(0x80000))); _ASSERTE(success); if (!success) return FALSE; diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index d1b696a1d794ad..ccfe25fe4b4555 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -72,9 +72,9 @@ static uint32_t xmmYmmStateSupport() #define XSTATE_MASK_AVX512 (0xE0) /* 0b1110_0000 */ #endif // XSTATE_MASK_AVX512 -#ifndef XSTATE_MASK_APX -#define XSTATE_MASK_APX (0x80000) -#endif // XSTATE_MASK_APX +// TODO-xarch-apx: the definition of XSTATE mask value for APX is now missing on the OS level, +// we are currently using bare value to hack it through the build process, and test the implementation through CI. +// those changes will be removed when we have the OS support for APX. static uint32_t avx512StateSupport() { @@ -164,7 +164,7 @@ static bool IsAvx512Enabled() static bool IsApxEnabled() { DWORD64 FeatureMask = GetEnabledXStateFeatures(); - return ((FeatureMask & XSTATE_MASK_APX) != 0); + return ((FeatureMask & /*XSATE_MASK_APX*/(0x80000)) != 0); } static uint32_t apxStateSupport() From 8e2f79adf3c8052b016cde5067417b1062ee7696 Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Tue, 4 Jun 2024 13:32:14 -0700 Subject: [PATCH 09/13] Update comments --- src/coreclr/pal/src/thread/context.cpp | 2 +- src/coreclr/vm/threadsuspend.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index 45e03d52ca3097..4e3c533ebe0fbf 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -384,7 +384,7 @@ bool Xstate_IsAvx512Supported() bool Xstate_IsApxSupported() { #if defined(HAVE_MACH_EXCEPTIONS) - // Ruihan TODO: I assume OSX will never support APX + // TODO-xarch-apx: I assume OSX will never support APX return false; #else static int Xstate_ApxSupported = -1; diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 30e92c743eb58b..6c80f6b4d84922 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -1968,7 +1968,7 @@ CONTEXT* AllocateOSContextHelper(BYTE** contextBuffer) // Retrieve contextSize by passing NULL for Buffer DWORD contextSize = 0; - // Ruihan TODO: MPX and APX cannot and will not co-exist, will setting the mask in this way be an issue? + // TODO-xarch-apx: MPX and APX cannot and will not co-exist, will setting the mask in this way be an issue? ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | XSTATE_MASK_AVX | XSTATE_MASK_MPX | XSTATE_MASK_AVX512 | /*XSATE_MASK_APX*/(0x80000); // The initialize call should fail but return contextSize BOOL success = g_pfnInitializeContext2 ? From 2eb664494795581768b1f3b355201ee760fdd608 Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Tue, 4 Jun 2024 14:18:59 -0700 Subject: [PATCH 10/13] comment clean up --- src/coreclr/pal/src/include/pal/context.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/coreclr/pal/src/include/pal/context.h b/src/coreclr/pal/src/include/pal/context.h index bc50951a4c8741..f868c530e59ab0 100644 --- a/src/coreclr/pal/src/include/pal/context.h +++ b/src/coreclr/pal/src/include/pal/context.h @@ -384,9 +384,9 @@ bool Xstate_IsApxSupported(); #define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK | XFEATURE_MASK_ZMM_Hi256 | XFEATURE_MASK_Hi16_ZMM) #endif // XFEATURE_MASK_AVX512 -#ifndef XFEATURE_MASK_APX -#define XFEATURE_MASK_APX (1 << XSTATE_APX) -#endif // XFEATURE_MASK_APX +// TODO-xarch-apx: the definition of XSTATE mask value for APX is now missing on the OS level, +// we are currently using bare value to hack it through the build process, and test the implementation through CI. +// those changes will be removed when we have the OS support for APX. #if HAVE__FPX_SW_BYTES_WITH_XSTATE_BV #define FPREG_FpxSwBytes_xfeatures(uc) FPREG_FpxSwBytes(uc)->xstate_bv @@ -410,7 +410,7 @@ struct Xstate_ExtendedFeature uint32_t size; }; -#define Xstate_ExtendedFeatures_Count (XSTATE_APX + 1) +#define Xstate_ExtendedFeatures_Count (/*XSTATE_APX*/19 + 1) extern Xstate_ExtendedFeature Xstate_ExtendedFeatures[Xstate_ExtendedFeatures_Count]; inline _fpx_sw_bytes *FPREG_FpxSwBytes(const ucontext_t *uc) @@ -555,7 +555,7 @@ inline bool FPREG_HasApxRegisters(const ucontext_t *uc) return false; } - if ((FPREG_FpxSwBytes_xfeatures(uc) & XFEATURE_MASK_APX) != XFEATURE_MASK_APX) + if ((FPREG_FpxSwBytes_xfeatures(uc) & /*XFEATURE_MASK_APX*/(0x80000)) != /*XFEATURE_MASK_APX*/(0x80000)) { return false; } @@ -566,7 +566,7 @@ inline bool FPREG_HasApxRegisters(const ucontext_t *uc) inline void *FPREG_Xstate_Egpr(const ucontext_t *uc, uint32_t *featureSize) { _ASSERTE(FPREG_HasApxRegisters(uc)); - return FPREG_Xstate_ExtendedFeature(uc, featureSize, XSTATE_APX); + return FPREG_Xstate_ExtendedFeature(uc, featureSize, /*XSTATE_APX*/19); } #endif // XSTATE_SUPPORTED From 9877981edb50df9aa2f4e2847193c18e894c54a9 Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Tue, 4 Jun 2024 14:26:36 -0700 Subject: [PATCH 11/13] make sure the context size is correct with APX feature. --- src/coreclr/pal/inc/pal.h | 32 ++++++++++++++--------------- src/coreclr/vm/amd64/asmconstants.h | 4 ++-- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index bc47fb4f82ec0a..87cc989a997d0c 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -1647,22 +1647,22 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { struct { - DWORD Egpr16; - DWORD Egpr17; - DWORD Egpr18; - DWORD Egpr19; - DWORD Egpr20; - DWORD Egpr21; - DWORD Egpr22; - DWORD Egpr23; - DWORD Egpr24; - DWORD Egpr25; - DWORD Egpr26; - DWORD Egpr27; - DWORD Egpr28; - DWORD Egpr29; - DWORD Egpr30; - DWORD Egpr31; + DWORD64 Egpr16; + DWORD64 Egpr17; + DWORD64 Egpr18; + DWORD64 Egpr19; + DWORD64 Egpr20; + DWORD64 Egpr21; + DWORD64 Egpr22; + DWORD64 Egpr23; + DWORD64 Egpr24; + DWORD64 Egpr25; + DWORD64 Egpr26; + DWORD64 Egpr27; + DWORD64 Egpr28; + DWORD64 Egpr29; + DWORD64 Egpr30; + DWORD64 Egpr31; }; } CONTEXT, *PCONTEXT, *LPCONTEXT; diff --git a/src/coreclr/vm/amd64/asmconstants.h b/src/coreclr/vm/amd64/asmconstants.h index b51088a6b47930..d773f447ec4ad9 100644 --- a/src/coreclr/vm/amd64/asmconstants.h +++ b/src/coreclr/vm/amd64/asmconstants.h @@ -276,8 +276,8 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__VASigCookie__pNDirectILStub #if defined(UNIX_AMD64_ABI) && !defined(HOST_WINDOWS) // Expression is too complicated, is currently: -// (8*6 + 4*2 + 2*6 + 4 + 8*6 + 8*16 + 8 + /*XMM_SAVE_AREA32*/(2*2 + 1*2 + 2 + 4 + 2*2 + 4 + 2*2 + 4*2 + 16*8 + 16*16 + 1*96) + 26*16 + 8 + 8*5 + /*XSTATE*/ + 8 + 8 + /*XSTATE_AVX*/ 16*16 + /*XSTATE_AVX512_KMASK*/ 8*8 + /*XSTATE_AVX512_ZMM_H*/ 32*16 + /*XSTATE_AVX512_ZMM*/ 64*16) -#define SIZEOF__CONTEXT (3104) +// (8*6 + 4*2 + 2*6 + 4 + 8*6 + 8*16 + 8 + /*XMM_SAVE_AREA32*/(2*2 + 1*2 + 2 + 4 + 2*2 + 4 + 2*2 + 4*2 + 16*8 + 16*16 + 1*96) + 26*16 + 8 + 8*5 + /*XSTATE*/ + 8 + 8 + /*XSTATE_AVX*/ 16*16 + /*XSTATE_AVX512_KMASK*/ 8*8 + /*XSTATE_AVX512_ZMM_H*/ 32*16 + /*XSTATE_AVX512_ZMM*/ 64*16 + /*XSATE_APX_EGPR*/ 8*16) +#define SIZEOF__CONTEXT (3232) #else // Expression is too complicated, is currently: // (8*6 + 4*2 + 2*6 + 4 + 8*6 + 8*16 + 8 + /*XMM_SAVE_AREA32*/(2*2 + 1*2 + 2 + 4 + 2*2 + 4 + 2*2 + 4*2 + 16*8 + 16*16 + 1*96) + 26*16 + 8 + 8*5) From 37bc8a0c6e51002c423cb4bbea0d2ec8a094f4b3 Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Tue, 4 Jun 2024 14:42:25 -0700 Subject: [PATCH 12/13] exclude OSX from APX changes. --- src/coreclr/pal/src/thread/context.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index 4e3c533ebe0fbf..96c6824e1f802a 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -853,7 +853,8 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) _ASSERT(size == (sizeof(M512) * 16)); memcpy_s(dest, sizeof(M512) * 16, &lpContext->Zmm16, sizeof(M512) * 16); } - +#ifndef TARGET_OSX + // TODO-xarch-apx: I suppose OSX will not support APX. if (FPREG_HasApxRegisters(native)) { _ASSERT((lpContext->XStateFeaturesMask & /*XSATE_MASK_APX*/(0x80000)) == /*XSATE_MASK_APX*/(0x80000)); @@ -862,6 +863,7 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) _ASSERT(size == (sizeof(DWORD64) * 16)); memcpy_s(dest, sizeof(DWORD64) * 16, &lpContext->Egpr16, sizeof(DWORD64) * 16); } +#endif // TARGET_OSX } } #endif //HOST_AMD64 && XSTATE_SUPPORTED From ff7bae624878031d2e921d545ec846476199fb96 Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Tue, 4 Jun 2024 15:07:20 -0700 Subject: [PATCH 13/13] make sure the context size is correct on unix system. --- .../src/System/Runtime/ExceptionServices/AsmOffsets.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs index 7db188808e26a4..30b17c626f8a1e 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs @@ -108,7 +108,7 @@ class AsmOffsets #if TARGET_AMD64 #if TARGET_UNIX - public const int SIZEOF__PAL_LIMITED_CONTEXT = 0xc20; + public const int SIZEOF__PAL_LIMITED_CONTEXT = 0xca0; #else // TARGET_UNIX public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x4d0; #endif // TARGET_UNIx