diff --git a/docs/design/features/xarch-apx.md b/docs/design/features/xarch-apx.md new file mode 100644 index 00000000000000..8997da56fbdb8a --- /dev/null +++ b/docs/design/features/xarch-apx.md @@ -0,0 +1,3 @@ +# APX Integration in .NET + +Let's keep documentation on APX integration and notes on things here. I will evolve this as necessary. \ No newline at end of file diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs index 7db188808e26a4..30b17c626f8a1e 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs @@ -108,7 +108,7 @@ class AsmOffsets #if TARGET_AMD64 #if TARGET_UNIX - public const int SIZEOF__PAL_LIMITED_CONTEXT = 0xc20; + public const int SIZEOF__PAL_LIMITED_CONTEXT = 0xca0; #else // TARGET_UNIX public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x4d0; #endif // TARGET_UNIx diff --git a/src/coreclr/gc/vxsort/isa_detection.cpp b/src/coreclr/gc/vxsort/isa_detection.cpp index 93c7288663c42f..869bc0b254512d 100644 --- a/src/coreclr/gc/vxsort/isa_detection.cpp +++ b/src/coreclr/gc/vxsort/isa_detection.cpp @@ -118,4 +118,4 @@ void InitSupportedInstructionSet (int32_t configSetting) if (!((int)s_supportedISA & (int)SupportedISA::AVX2)) s_supportedISA = SupportedISA::None; s_initialized = true; -} +} \ No newline at end of file diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index e3b57b6a1e043b..1e17865d9e2f3d 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -78,41 +78,43 @@ enum CORINFO_InstructionSet InstructionSet_AVX10v1=33, InstructionSet_AVX10v1_V256=34, InstructionSet_AVX10v1_V512=35, - InstructionSet_VectorT128=36, - InstructionSet_VectorT256=37, - InstructionSet_VectorT512=38, - InstructionSet_X86Base_X64=39, - InstructionSet_SSE_X64=40, - InstructionSet_SSE2_X64=41, - InstructionSet_SSE3_X64=42, - InstructionSet_SSSE3_X64=43, - InstructionSet_SSE41_X64=44, - InstructionSet_SSE42_X64=45, - InstructionSet_AVX_X64=46, - InstructionSet_AVX2_X64=47, - InstructionSet_AES_X64=48, - InstructionSet_BMI1_X64=49, - InstructionSet_BMI2_X64=50, - InstructionSet_FMA_X64=51, - InstructionSet_LZCNT_X64=52, - InstructionSet_PCLMULQDQ_X64=53, - InstructionSet_POPCNT_X64=54, - InstructionSet_AVXVNNI_X64=55, - InstructionSet_MOVBE_X64=56, - InstructionSet_X86Serialize_X64=57, - InstructionSet_AVX512F_X64=58, - InstructionSet_AVX512F_VL_X64=59, - InstructionSet_AVX512BW_X64=60, - InstructionSet_AVX512BW_VL_X64=61, - InstructionSet_AVX512CD_X64=62, - InstructionSet_AVX512CD_VL_X64=63, - InstructionSet_AVX512DQ_X64=64, - InstructionSet_AVX512DQ_VL_X64=65, - InstructionSet_AVX512VBMI_X64=66, - InstructionSet_AVX512VBMI_VL_X64=67, - InstructionSet_AVX10v1_X64=68, - InstructionSet_AVX10v1_V256_X64=69, - InstructionSet_AVX10v1_V512_X64=70, + InstructionSet_APX=36, + InstructionSet_VectorT128=37, + InstructionSet_VectorT256=38, + InstructionSet_VectorT512=39, + InstructionSet_X86Base_X64=40, + InstructionSet_SSE_X64=41, + InstructionSet_SSE2_X64=42, + InstructionSet_SSE3_X64=43, + InstructionSet_SSSE3_X64=44, + InstructionSet_SSE41_X64=45, + InstructionSet_SSE42_X64=46, + InstructionSet_AVX_X64=47, + InstructionSet_AVX2_X64=48, + InstructionSet_AES_X64=49, + InstructionSet_BMI1_X64=50, + InstructionSet_BMI2_X64=51, + InstructionSet_FMA_X64=52, + InstructionSet_LZCNT_X64=53, + InstructionSet_PCLMULQDQ_X64=54, + InstructionSet_POPCNT_X64=55, + InstructionSet_AVXVNNI_X64=56, + InstructionSet_MOVBE_X64=57, + InstructionSet_X86Serialize_X64=58, + InstructionSet_AVX512F_X64=59, + InstructionSet_AVX512F_VL_X64=60, + InstructionSet_AVX512BW_X64=61, + InstructionSet_AVX512BW_VL_X64=62, + InstructionSet_AVX512CD_X64=63, + InstructionSet_AVX512CD_VL_X64=64, + InstructionSet_AVX512DQ_X64=65, + InstructionSet_AVX512DQ_VL_X64=66, + InstructionSet_AVX512VBMI_X64=67, + InstructionSet_AVX512VBMI_VL_X64=68, + InstructionSet_AVX10v1_X64=69, + InstructionSet_AVX10v1_V256_X64=70, + InstructionSet_AVX10v1_V512_X64=71, + InstructionSet_APX_X64=72, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -150,41 +152,43 @@ enum CORINFO_InstructionSet InstructionSet_AVX10v1=33, InstructionSet_AVX10v1_V256=34, InstructionSet_AVX10v1_V512=35, - InstructionSet_VectorT128=36, - InstructionSet_VectorT256=37, - InstructionSet_VectorT512=38, - InstructionSet_X86Base_X64=39, - InstructionSet_SSE_X64=40, - InstructionSet_SSE2_X64=41, - InstructionSet_SSE3_X64=42, - InstructionSet_SSSE3_X64=43, - InstructionSet_SSE41_X64=44, - InstructionSet_SSE42_X64=45, - InstructionSet_AVX_X64=46, - InstructionSet_AVX2_X64=47, - InstructionSet_AES_X64=48, - InstructionSet_BMI1_X64=49, - InstructionSet_BMI2_X64=50, - InstructionSet_FMA_X64=51, - InstructionSet_LZCNT_X64=52, - InstructionSet_PCLMULQDQ_X64=53, - InstructionSet_POPCNT_X64=54, - InstructionSet_AVXVNNI_X64=55, - InstructionSet_MOVBE_X64=56, - InstructionSet_X86Serialize_X64=57, - InstructionSet_AVX512F_X64=58, - InstructionSet_AVX512F_VL_X64=59, - InstructionSet_AVX512BW_X64=60, - InstructionSet_AVX512BW_VL_X64=61, - InstructionSet_AVX512CD_X64=62, - InstructionSet_AVX512CD_VL_X64=63, - InstructionSet_AVX512DQ_X64=64, - InstructionSet_AVX512DQ_VL_X64=65, - InstructionSet_AVX512VBMI_X64=66, - InstructionSet_AVX512VBMI_VL_X64=67, - InstructionSet_AVX10v1_X64=68, - InstructionSet_AVX10v1_V256_X64=69, - InstructionSet_AVX10v1_V512_X64=70, + InstructionSet_APX=36, + InstructionSet_VectorT128=37, + InstructionSet_VectorT256=38, + InstructionSet_VectorT512=39, + InstructionSet_X86Base_X64=40, + InstructionSet_SSE_X64=41, + InstructionSet_SSE2_X64=42, + InstructionSet_SSE3_X64=43, + InstructionSet_SSSE3_X64=44, + InstructionSet_SSE41_X64=45, + InstructionSet_SSE42_X64=46, + InstructionSet_AVX_X64=47, + InstructionSet_AVX2_X64=48, + InstructionSet_AES_X64=49, + InstructionSet_BMI1_X64=50, + InstructionSet_BMI2_X64=51, + InstructionSet_FMA_X64=52, + InstructionSet_LZCNT_X64=53, + InstructionSet_PCLMULQDQ_X64=54, + InstructionSet_POPCNT_X64=55, + InstructionSet_AVXVNNI_X64=56, + InstructionSet_MOVBE_X64=57, + InstructionSet_X86Serialize_X64=58, + InstructionSet_AVX512F_X64=59, + InstructionSet_AVX512F_VL_X64=60, + InstructionSet_AVX512BW_X64=61, + InstructionSet_AVX512BW_VL_X64=62, + InstructionSet_AVX512CD_X64=63, + InstructionSet_AVX512CD_VL_X64=64, + InstructionSet_AVX512DQ_X64=65, + InstructionSet_AVX512DQ_VL_X64=66, + InstructionSet_AVX512VBMI_X64=67, + InstructionSet_AVX512VBMI_VL_X64=68, + InstructionSet_AVX10v1_X64=69, + InstructionSet_AVX10v1_V256_X64=70, + InstructionSet_AVX10v1_V512_X64=71, + InstructionSet_APX_X64=72, #endif // TARGET_X86 }; @@ -364,6 +368,8 @@ struct CORINFO_InstructionSetFlags AddInstructionSet(InstructionSet_AVX10v1_V256_X64); if (HasInstructionSet(InstructionSet_AVX10v1_V512)) AddInstructionSet(InstructionSet_AVX10v1_V512_X64); + if (HasInstructionSet(InstructionSet_APX)) + AddInstructionSet(InstructionSet_APX_X64); #endif // TARGET_AMD64 #ifdef TARGET_X86 #endif // TARGET_X86 @@ -572,6 +578,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512_X64); + if (resultflags.HasInstructionSet(InstructionSet_APX) && !resultflags.HasInstructionSet(InstructionSet_APX_X64)) + resultflags.RemoveInstructionSet(InstructionSet_APX); + if (resultflags.HasInstructionSet(InstructionSet_APX_X64) && !resultflags.HasInstructionSet(InstructionSet_APX)) + resultflags.RemoveInstructionSet(InstructionSet_APX_X64); if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_SSE); if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE)) @@ -1000,6 +1010,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "AVX10v1_V512"; case InstructionSet_AVX10v1_V512_X64 : return "AVX10v1_V512_X64"; + case InstructionSet_APX : + return "APX"; + case InstructionSet_APX_X64 : + return "APX_X64"; case InstructionSet_VectorT128 : return "VectorT128"; case InstructionSet_VectorT256 : @@ -1078,6 +1092,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "AVX10v1_V256"; case InstructionSet_AVX10v1_V512 : return "AVX10v1_V512"; + case InstructionSet_APX : + return "APX"; case InstructionSet_VectorT128 : return "VectorT128"; case InstructionSet_VectorT256 : @@ -1151,6 +1167,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Avx10v1: return InstructionSet_AVX10v1; case READYTORUN_INSTRUCTION_Avx10v1_V256: return InstructionSet_AVX10v1_V256; case READYTORUN_INSTRUCTION_Avx10v1_V512: return InstructionSet_AVX10v1_V512; + case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX; case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; @@ -1188,6 +1205,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Avx10v1: return InstructionSet_AVX10v1; case READYTORUN_INSTRUCTION_Avx10v1_V256: return InstructionSet_AVX10v1_V256; case READYTORUN_INSTRUCTION_Avx10v1_V512: return InstructionSet_AVX10v1_V512; + case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX; case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 46b50f5410ce46..2a4362049af8d4 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* 227e46fa-1be3-4770-b613-4a239e7c28aa */ - 0x227e46fa, - 0x1be3, - 0x4770, - {0xb6, 0x13, 0x4a, 0x23, 0x9e, 0x7c, 0x28, 0xaa} +constexpr GUID JITEEVersionIdentifier = { /* deed5db4-371c-4b2d-904d-9cd39cb48764 */ + 0xdeed5db4, + 0x371c, + 0x4b2d, + {0x90, 0x4d, 0x9c, 0xd3, 0x9c, 0xb4, 0x87, 0x64} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index fe388c04a60fc5..1db4e4e6e15791 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -55,6 +55,7 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Avx10v1=44, READYTORUN_INSTRUCTION_Avx10v1_V256=45, READYTORUN_INSTRUCTION_Avx10v1_V512=46, + READYTORUN_INSTRUCTION_Apx=47, }; diff --git a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp index 94424b17562be9..e4e810b19cb5da 100644 --- a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp @@ -482,14 +482,14 @@ REDHAWK_PALEXPORT CONTEXT* PalAllocateCompleteOSContext(_Out_ uint8_t** contextB // Determine if the processor supports AVX or AVX512 so we could // retrieve extended registers DWORD64 FeatureMask = GetEnabledXStateFeatures(); - if ((FeatureMask & (XSTATE_MASK_AVX | XSTATE_MASK_AVX512)) != 0) + if ((FeatureMask & (XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | /*XSATE_MASK_APX*/(0x80000))) != 0) { context = context | CONTEXT_XSTATE; } // Retrieve contextSize by passing NULL for Buffer DWORD contextSize = 0; - ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | XSTATE_MASK_AVX | XSTATE_MASK_MPX | XSTATE_MASK_AVX512; + ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | XSTATE_MASK_AVX | XSTATE_MASK_MPX | XSTATE_MASK_AVX512 | /*XSATE_MASK_APX*/(0x80000); // The initialize call should fail but return contextSize BOOL success = pfnInitializeContext2 ? pfnInitializeContext2(NULL, context, NULL, &contextSize, xStateCompactionMask) : @@ -540,9 +540,9 @@ REDHAWK_PALEXPORT _Success_(return) bool REDHAWK_PALAPI PalGetCompleteThreadCont #if defined(TARGET_X86) || defined(TARGET_AMD64) // Make sure that AVX feature mask is set, if supported. This should not normally fail. // The system silently ignores any feature specified in the FeatureMask which is not enabled on the processor. - if (!SetXStateFeaturesMask(pCtx, XSTATE_MASK_AVX | XSTATE_MASK_AVX512)) + if (!SetXStateFeaturesMask(pCtx, XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | /*XSATE_MASK_APX*/(0x80000))) { - _ASSERTE(!"Could not apply XSTATE_MASK_AVX | XSTATE_MASK_AVX512"); + _ASSERTE(!"Could not apply XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | /*XSATE_MASK_APX*/(0x80000)"); return FALSE; } #endif //defined(TARGET_X86) || defined(TARGET_AMD64) diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index c64cf8ee020adc..87cc989a997d0c 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -1405,6 +1405,10 @@ typedef struct _KNONVOLATILE_CONTEXT_POINTERS { (UI64(1) << (XSTATE_AVX512_ZMM_H)) | \ (UI64(1) << (XSTATE_AVX512_ZMM))) +// TODO-xarch-apx: the definition of XSTATE mask value for APX is now missing on the OS level, +// we are currently using bare value to hack it through the build process, and test the implementation through CI. +// those changes will be removed when we have the OS support for APX. + typedef struct DECLSPEC_ALIGN(16) _M128A { ULONGLONG Low; LONGLONG High; @@ -1640,6 +1644,27 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { M512 Zmm30; M512 Zmm31; }; + + struct + { + DWORD64 Egpr16; + DWORD64 Egpr17; + DWORD64 Egpr18; + DWORD64 Egpr19; + DWORD64 Egpr20; + DWORD64 Egpr21; + DWORD64 Egpr22; + DWORD64 Egpr23; + DWORD64 Egpr24; + DWORD64 Egpr25; + DWORD64 Egpr26; + DWORD64 Egpr27; + DWORD64 Egpr28; + DWORD64 Egpr29; + DWORD64 Egpr30; + DWORD64 Egpr31; + }; + } CONTEXT, *PCONTEXT, *LPCONTEXT; // diff --git a/src/coreclr/pal/src/arch/amd64/asmconstants.h b/src/coreclr/pal/src/arch/amd64/asmconstants.h index d5a72cf6eda23a..e77e79e99e4d99 100644 --- a/src/coreclr/pal/src/arch/amd64/asmconstants.h +++ b/src/coreclr/pal/src/arch/amd64/asmconstants.h @@ -15,6 +15,10 @@ (1 << (XSTATE_AVX512_ZMM_H)) | \ (1 << (XSTATE_AVX512_ZMM))) +// TODO-xarch-apx: the definition of XSTATE mask value for APX is now missing on the OS level, +// we are currently using bare value to hack it through the build process, and test the implementation through CI. +// those changes will be removed when we have the OS support for APX. + // The arch bit is normally set in the flag constants below. Since this is already arch-specific code and the arch bit is not // relevant, the arch bit is excluded from the flag constants below for simpler tests. #define CONTEXT_AMD64 0x100000 @@ -91,7 +95,8 @@ #define CONTEXT_KMask0 CONTEXT_Ymm0H+(16*16) #define CONTEXT_Zmm0H CONTEXT_KMask0+(8*8) #define CONTEXT_Zmm16 CONTEXT_Zmm0H+(32*16) -#define CONTEXT_Size CONTEXT_Zmm16+(64*16) +#define CONTEXT_Egpr CONTEXT_Zmm16+(16*8) +#define CONTEXT_Size CONTEXT_Egpr+(8*16) #else // HOST_64BIT diff --git a/src/coreclr/pal/src/arch/amd64/context2.S b/src/coreclr/pal/src/arch/amd64/context2.S index dba772f9dbbf5e..2f4c77376d65de 100644 --- a/src/coreclr/pal/src/arch/amd64/context2.S +++ b/src/coreclr/pal/src/arch/amd64/context2.S @@ -183,6 +183,29 @@ LOCAL_LABEL(Done_Restore_CONTEXT_FLOATING_POINT): kmovq k6, qword ptr [rdi + (CONTEXT_KMask0 + 6 * 8)] kmovq k7, qword ptr [rdi + (CONTEXT_KMask0 + 7 * 8)] + // TODO-xarch-apx: the definition of XSTATE mask value for APX is now missing on the OS level, + // we are currently using bare value to hack it through the build process, and test the implementation through CI. + // those changes will be removed when we have the OS support for APX. + test BYTE PTR [rdi + CONTEXT_XStateFeaturesMask], 524288 + je LOCAL_LABEL(Done_Restore_CONTEXT_XSTATE) + + // Restore the EGPR state, EGPR use previous MPX field, need to add an offset. + mov r16, qword ptr [rdi + CONTEXT_Egpr + 0 * 8] + mov r17, qword ptr [rdi + CONTEXT_Egpr + 1 * 8] + mov r18, qword ptr [rdi + CONTEXT_Egpr + 2 * 8] + mov r19, qword ptr [rdi + CONTEXT_Egpr + 3 * 8] + mov r20, qword ptr [rdi + CONTEXT_Egpr + 4 * 8] + mov r21, qword ptr [rdi + CONTEXT_Egpr + 5 * 8] + mov r22, qword ptr [rdi + CONTEXT_Egpr + 6 * 8] + mov r23, qword ptr [rdi + CONTEXT_Egpr + 7 * 8] + mov r24, qword ptr [rdi + CONTEXT_Egpr + 8 * 8] + mov r25, qword ptr [rdi + CONTEXT_Egpr + 9 * 8] + mov r26, qword ptr [rdi + CONTEXT_Egpr + 10 * 8] + mov r27, qword ptr [rdi + CONTEXT_Egpr + 11 * 8] + mov r28, qword ptr [rdi + CONTEXT_Egpr + 12 * 8] + mov r29, qword ptr [rdi + CONTEXT_Egpr + 13 * 8] + mov r30, qword ptr [rdi + CONTEXT_Egpr + 14 * 8] + mov r31, qword ptr [rdi + CONTEXT_Egpr + 15 * 8] LOCAL_LABEL(Done_Restore_CONTEXT_XSTATE): test BYTE PTR [rdi + CONTEXT_ContextFlags], CONTEXT_CONTROL diff --git a/src/coreclr/pal/src/include/pal/context.h b/src/coreclr/pal/src/include/pal/context.h index 6eeeaa6fed7453..f868c530e59ab0 100644 --- a/src/coreclr/pal/src/include/pal/context.h +++ b/src/coreclr/pal/src/include/pal/context.h @@ -58,6 +58,7 @@ using asm_sigcontext::_xstate; #if defined(XSTATE_SUPPORTED) || (defined(HOST_AMD64) && defined(HAVE_MACH_EXCEPTIONS)) bool Xstate_IsAvx512Supported(); +bool Xstate_IsApxSupported(); #endif // XSTATE_SUPPORTED || (HOST_AMD64 && HAVE_MACH_EXCEPTIONS) #ifdef HOST_S390X @@ -383,6 +384,10 @@ bool Xstate_IsAvx512Supported(); #define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK | XFEATURE_MASK_ZMM_Hi256 | XFEATURE_MASK_Hi16_ZMM) #endif // XFEATURE_MASK_AVX512 +// TODO-xarch-apx: the definition of XSTATE mask value for APX is now missing on the OS level, +// we are currently using bare value to hack it through the build process, and test the implementation through CI. +// those changes will be removed when we have the OS support for APX. + #if HAVE__FPX_SW_BYTES_WITH_XSTATE_BV #define FPREG_FpxSwBytes_xfeatures(uc) FPREG_FpxSwBytes(uc)->xstate_bv #else @@ -405,7 +410,7 @@ struct Xstate_ExtendedFeature uint32_t size; }; -#define Xstate_ExtendedFeatures_Count (XSTATE_AVX512_ZMM + 1) +#define Xstate_ExtendedFeatures_Count (/*XSTATE_APX*/19 + 1) extern Xstate_ExtendedFeature Xstate_ExtendedFeatures[Xstate_ExtendedFeatures_Count]; inline _fpx_sw_bytes *FPREG_FpxSwBytes(const ucontext_t *uc) @@ -542,6 +547,27 @@ inline void *FPREG_Xstate_Hi16Zmm(const ucontext_t *uc, uint32_t *featureSize) _ASSERTE(FPREG_HasAvx512Registers(uc)); return FPREG_Xstate_ExtendedFeature(uc, featureSize, XSTATE_AVX512_ZMM); } + +inline bool FPREG_HasApxRegisters(const ucontext_t *uc) +{ + if (!FPREG_HasExtendedState(uc)) + { + return false; + } + + if ((FPREG_FpxSwBytes_xfeatures(uc) & /*XFEATURE_MASK_APX*/(0x80000)) != /*XFEATURE_MASK_APX*/(0x80000)) + { + return false; + } + + return Xstate_IsApxSupported(); +} + +inline void *FPREG_Xstate_Egpr(const ucontext_t *uc, uint32_t *featureSize) +{ + _ASSERTE(FPREG_HasApxRegisters(uc)); + return FPREG_Xstate_ExtendedFeature(uc, featureSize, /*XSTATE_APX*/19); +} #endif // XSTATE_SUPPORTED ///////////////////// diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index 04fabab0e7253e..96c6824e1f802a 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -380,6 +380,59 @@ bool Xstate_IsAvx512Supported() return Xstate_Avx512Supported == 1; #endif } + +bool Xstate_IsApxSupported() +{ +#if defined(HAVE_MACH_EXCEPTIONS) + // TODO-xarch-apx: I assume OSX will never support APX + return false; +#else + static int Xstate_ApxSupported = -1; + + if (Xstate_ApxSupported == -1) + { + int cpuidInfo[4]; + + const int CPUID_EAX = 0; + const int CPUID_EBX = 1; + const int CPUID_ECX = 2; + const int CPUID_EDX = 3; + +#ifdef _DEBUG + // We should only be calling this function if we know the extended feature exists + __cpuid(cpuidInfo, 0x00000000); + _ASSERTE(static_cast(cpuidInfo[CPUID_EAX]) >= 0x0D); +#endif // _DEBUG + + __cpuidex(cpuidInfo, 0x0000000D, 0x00000000); + + if ((cpuidInfo[CPUID_EAX] & /*XSATE_MASK_APX*/(0x80000)) == /*XSATE_MASK_APX*/(0x80000)) + { + // Knight's Landing and Knight's Mill shipped without all 5 of the "baseline" + // AVX-512 ISAs that are required by x86-64-v4. Specifically they do not include + // BW, DQ, or VL. RyuJIT currently requires all 5 ISAs to be present so we will + // only enable Avx512 context save/restore when all exist. This requires us to + // query which ISAs are actually supported to ensure they're all present. + + __cpuidex(cpuidInfo, 0x00000007, 0x00000001); + + const int requiredApxFlags = (1 << 21); + + if ((cpuidInfo[CPUID_EDX] & requiredApxFlags) == requiredApxFlags) + { + Xstate_ApxSupported = 1; + } + } + + if (Xstate_ApxSupported == -1) + { + Xstate_ApxSupported = 0; + } + } + + return Xstate_ApxSupported == 1; +#endif +} #endif // XSTATE_SUPPORTED || defined(HOST_AMD64) && defined(HAVE_MACH_EXCEPTIONS) #if !HAVE_MACH_EXCEPTIONS @@ -800,6 +853,17 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) _ASSERT(size == (sizeof(M512) * 16)); memcpy_s(dest, sizeof(M512) * 16, &lpContext->Zmm16, sizeof(M512) * 16); } +#ifndef TARGET_OSX + // TODO-xarch-apx: I suppose OSX will not support APX. + if (FPREG_HasApxRegisters(native)) + { + _ASSERT((lpContext->XStateFeaturesMask & /*XSATE_MASK_APX*/(0x80000)) == /*XSATE_MASK_APX*/(0x80000)); + + dest = FPREG_Xstate_Egpr(native, &size); + _ASSERT(size == (sizeof(DWORD64) * 16)); + memcpy_s(dest, sizeof(DWORD64) * 16, &lpContext->Egpr16, sizeof(DWORD64) * 16); + } +#endif // TARGET_OSX } } #endif //HOST_AMD64 && XSTATE_SUPPORTED @@ -1017,6 +1081,15 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex lpContext->XStateFeaturesMask |= XSTATE_MASK_AVX512; } + + if (FPREG_HasApxRegisters(native)) + { + src = FPREG_Xstate_Egpr(native, &size); + _ASSERT(size == (sizeof(DWORD64) * 16)); + memcpy_s(&lpContext->Egpr16, sizeof(DWORD64) * 16, src, sizeof(DWORD64) * 16); + + lpContext->XStateFeaturesMask |= /*XSATE_MASK_APX*/(0x80000); + } } else #endif // XSTATE_SUPPORTED diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index bef78e07ac7f06..8fb840d9db4f63 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -70,20 +70,14 @@ private static class XArchIntrinsicConstants public const int Lzcnt = 0x1000; public const int AvxVnni = 0x2000; public const int Movbe = 0x4000; - public const int Avx512f = 0x8000; - public const int Avx512f_vl = 0x10000; - public const int Avx512bw = 0x20000; - public const int Avx512bw_vl = 0x40000; - public const int Avx512cd = 0x80000; - public const int Avx512cd_vl = 0x100000; - public const int Avx512dq = 0x200000; - public const int Avx512dq_vl = 0x400000; - public const int Avx512Vbmi = 0x800000; - public const int Avx512Vbmi_vl = 0x1000000; - public const int Serialize = 0x2000000; - public const int Avx10v1 = 0x4000000; - public const int Avx10v1_v256 = 0x8000000; - public const int Avx10v1_v512 = 0x10000000; + public const int Avx512 = 0x8000; + public const int Avx512Vbmi = 0x10000; + public const int Avx512Vbmi_vl = 0x20000; + public const int Serialize = 0x40000; + public const int Avx10v1 = 0x80000; + public const int Avx10v1_v256 = 0x100000; + public const int Avx10v1_v512 = 0x200000; + public const int Apx = 0x400000; public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) { @@ -117,21 +111,21 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) builder.AddSupportedInstructionSet("avxvnni"); if ((flags & Movbe) != 0) builder.AddSupportedInstructionSet("movbe"); - if ((flags & Avx512f) != 0) + if ((flags & Avx512) != 0) builder.AddSupportedInstructionSet("avx512f"); - if ((flags & Avx512f_vl) != 0) + if ((flags & Avx512) != 0) builder.AddSupportedInstructionSet("avx512f_vl"); - if ((flags & Avx512bw) != 0) + if ((flags & Avx512) != 0) builder.AddSupportedInstructionSet("avx512bw"); - if ((flags & Avx512bw_vl) != 0) + if ((flags & Avx512) != 0) builder.AddSupportedInstructionSet("avx512bw_vl"); - if ((flags & Avx512cd) != 0) + if ((flags & Avx512) != 0) builder.AddSupportedInstructionSet("avx512cd"); - if ((flags & Avx512cd_vl) != 0) + if ((flags & Avx512) != 0) builder.AddSupportedInstructionSet("avx512cd_vl"); - if ((flags & Avx512dq) != 0) + if ((flags & Avx512) != 0) builder.AddSupportedInstructionSet("avx512dq"); - if ((flags & Avx512dq_vl) != 0) + if ((flags & Avx512) != 0) builder.AddSupportedInstructionSet("avx512dq_vl"); if ((flags & Avx512Vbmi) != 0) builder.AddSupportedInstructionSet("avx512vbmi"); @@ -145,6 +139,8 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) builder.AddSupportedInstructionSet("avx10v1_v256"); if ((flags & Avx10v1_v512) != 0) builder.AddSupportedInstructionSet("avx10v1_v512"); + if ((flags & Apx) != 0) + builder.AddSupportedInstructionSet("apx"); } public static int FromInstructionSet(InstructionSet instructionSet) @@ -186,22 +182,22 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_AVXVNNI_X64 => AvxVnni, InstructionSet.X64_MOVBE => Movbe, InstructionSet.X64_MOVBE_X64 => Movbe, - InstructionSet.X64_AVX512F => Avx512f, - InstructionSet.X64_AVX512F_X64 => Avx512f, - InstructionSet.X64_AVX512F_VL => Avx512f_vl, - InstructionSet.X64_AVX512F_VL_X64 => Avx512f_vl, - InstructionSet.X64_AVX512BW => Avx512bw, - InstructionSet.X64_AVX512BW_X64 => Avx512bw, - InstructionSet.X64_AVX512BW_VL => Avx512bw_vl, - InstructionSet.X64_AVX512BW_VL_X64 => Avx512bw_vl, - InstructionSet.X64_AVX512CD => Avx512cd, - InstructionSet.X64_AVX512CD_X64 => Avx512cd, - InstructionSet.X64_AVX512CD_VL => Avx512cd_vl, - InstructionSet.X64_AVX512CD_VL_X64 => Avx512cd_vl, - InstructionSet.X64_AVX512DQ => Avx512dq, - InstructionSet.X64_AVX512DQ_X64 => Avx512dq, - InstructionSet.X64_AVX512DQ_VL => Avx512dq_vl, - InstructionSet.X64_AVX512DQ_VL_X64 => Avx512dq_vl, + InstructionSet.X64_AVX512F => Avx512, + InstructionSet.X64_AVX512F_X64 => Avx512, + InstructionSet.X64_AVX512F_VL => Avx512, + InstructionSet.X64_AVX512F_VL_X64 => Avx512, + InstructionSet.X64_AVX512BW => Avx512, + InstructionSet.X64_AVX512BW_X64 => Avx512, + InstructionSet.X64_AVX512BW_VL => Avx512, + InstructionSet.X64_AVX512BW_VL_X64 => Avx512, + InstructionSet.X64_AVX512CD => Avx512, + InstructionSet.X64_AVX512CD_X64 => Avx512, + InstructionSet.X64_AVX512CD_VL => Avx512, + InstructionSet.X64_AVX512CD_VL_X64 => Avx512, + InstructionSet.X64_AVX512DQ => Avx512, + InstructionSet.X64_AVX512DQ_X64 => Avx512, + InstructionSet.X64_AVX512DQ_VL => Avx512, + InstructionSet.X64_AVX512DQ_VL_X64 => Avx512, InstructionSet.X64_AVX512VBMI => Avx512Vbmi, InstructionSet.X64_AVX512VBMI_X64 => Avx512Vbmi, InstructionSet.X64_AVX512VBMI_VL => Avx512Vbmi_vl, @@ -227,7 +223,7 @@ public static int FromInstructionSet(InstructionSet instructionSet) // Vector Sizes InstructionSet.X64_VectorT128 => 0, InstructionSet.X64_VectorT256 => Avx2, - InstructionSet.X64_VectorT512 => Avx512f, + InstructionSet.X64_VectorT512 => Avx512, _ => throw new NotSupportedException(((InstructionSet_X64)instructionSet).ToString()) }; diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index 0e2fec09e19d57..2b4efb809a18da 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -58,6 +58,7 @@ public enum ReadyToRunInstructionSet Avx10v1=44, Avx10v1_V256=45, Avx10v1_V512=46, + Apx=47, } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 533b017e2bae85..3745a0707949e4 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -124,6 +124,8 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_AVX10v1_V256_X64: return ReadyToRunInstructionSet.Avx10v1_V256; case InstructionSet.X64_AVX10v1_V512: return ReadyToRunInstructionSet.Avx10v1_V512; case InstructionSet.X64_AVX10v1_V512_X64: return ReadyToRunInstructionSet.Avx10v1_V512; + case InstructionSet.X64_APX: return ReadyToRunInstructionSet.Apx; + case InstructionSet.X64_APX_X64: return ReadyToRunInstructionSet.Apx; case InstructionSet.X64_VectorT128: return ReadyToRunInstructionSet.VectorT128; case InstructionSet.X64_VectorT256: return ReadyToRunInstructionSet.VectorT256; case InstructionSet.X64_VectorT512: return ReadyToRunInstructionSet.VectorT512; @@ -203,6 +205,8 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_AVX10v1_V256_X64: return null; case InstructionSet.X86_AVX10v1_V512: return ReadyToRunInstructionSet.Avx10v1_V512; case InstructionSet.X86_AVX10v1_V512_X64: return null; + case InstructionSet.X86_APX: return ReadyToRunInstructionSet.Apx; + case InstructionSet.X86_APX_X64: return null; case InstructionSet.X86_VectorT128: return ReadyToRunInstructionSet.VectorT128; case InstructionSet.X86_VectorT256: return ReadyToRunInstructionSet.VectorT256; case InstructionSet.X86_VectorT512: return ReadyToRunInstructionSet.VectorT512; diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 270d4834a9c2b2..fd751fbbaf3534 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -76,6 +76,7 @@ public enum InstructionSet X64_AVX10v1 = InstructionSet_X64.AVX10v1, X64_AVX10v1_V256 = InstructionSet_X64.AVX10v1_V256, X64_AVX10v1_V512 = InstructionSet_X64.AVX10v1_V512, + X64_APX = InstructionSet_X64.APX, X64_VectorT128 = InstructionSet_X64.VectorT128, X64_VectorT256 = InstructionSet_X64.VectorT256, X64_VectorT512 = InstructionSet_X64.VectorT512, @@ -111,6 +112,7 @@ public enum InstructionSet X64_AVX10v1_X64 = InstructionSet_X64.AVX10v1_X64, X64_AVX10v1_V256_X64 = InstructionSet_X64.AVX10v1_V256_X64, X64_AVX10v1_V512_X64 = InstructionSet_X64.AVX10v1_V512_X64, + X64_APX_X64 = InstructionSet_X64.APX_X64, X86_X86Base = InstructionSet_X86.X86Base, X86_SSE = InstructionSet_X86.SSE, X86_SSE2 = InstructionSet_X86.SSE2, @@ -146,6 +148,7 @@ public enum InstructionSet X86_AVX10v1 = InstructionSet_X86.AVX10v1, X86_AVX10v1_V256 = InstructionSet_X86.AVX10v1_V256, X86_AVX10v1_V512 = InstructionSet_X86.AVX10v1_V512, + X86_APX = InstructionSet_X86.APX, X86_VectorT128 = InstructionSet_X86.VectorT128, X86_VectorT256 = InstructionSet_X86.VectorT256, X86_VectorT512 = InstructionSet_X86.VectorT512, @@ -181,6 +184,7 @@ public enum InstructionSet X86_AVX10v1_X64 = InstructionSet_X86.AVX10v1_X64, X86_AVX10v1_V256_X64 = InstructionSet_X86.AVX10v1_V256_X64, X86_AVX10v1_V512_X64 = InstructionSet_X86.AVX10v1_V512_X64, + X86_APX_X64 = InstructionSet_X86.APX_X64, } public enum InstructionSet_ARM64 { @@ -252,41 +256,43 @@ public enum InstructionSet_X64 AVX10v1 = 33, AVX10v1_V256 = 34, AVX10v1_V512 = 35, - VectorT128 = 36, - VectorT256 = 37, - VectorT512 = 38, - X86Base_X64 = 39, - SSE_X64 = 40, - SSE2_X64 = 41, - SSE3_X64 = 42, - SSSE3_X64 = 43, - SSE41_X64 = 44, - SSE42_X64 = 45, - AVX_X64 = 46, - AVX2_X64 = 47, - AES_X64 = 48, - BMI1_X64 = 49, - BMI2_X64 = 50, - FMA_X64 = 51, - LZCNT_X64 = 52, - PCLMULQDQ_X64 = 53, - POPCNT_X64 = 54, - AVXVNNI_X64 = 55, - MOVBE_X64 = 56, - X86Serialize_X64 = 57, - AVX512F_X64 = 58, - AVX512F_VL_X64 = 59, - AVX512BW_X64 = 60, - AVX512BW_VL_X64 = 61, - AVX512CD_X64 = 62, - AVX512CD_VL_X64 = 63, - AVX512DQ_X64 = 64, - AVX512DQ_VL_X64 = 65, - AVX512VBMI_X64 = 66, - AVX512VBMI_VL_X64 = 67, - AVX10v1_X64 = 68, - AVX10v1_V256_X64 = 69, - AVX10v1_V512_X64 = 70, + APX = 36, + VectorT128 = 37, + VectorT256 = 38, + VectorT512 = 39, + X86Base_X64 = 40, + SSE_X64 = 41, + SSE2_X64 = 42, + SSE3_X64 = 43, + SSSE3_X64 = 44, + SSE41_X64 = 45, + SSE42_X64 = 46, + AVX_X64 = 47, + AVX2_X64 = 48, + AES_X64 = 49, + BMI1_X64 = 50, + BMI2_X64 = 51, + FMA_X64 = 52, + LZCNT_X64 = 53, + PCLMULQDQ_X64 = 54, + POPCNT_X64 = 55, + AVXVNNI_X64 = 56, + MOVBE_X64 = 57, + X86Serialize_X64 = 58, + AVX512F_X64 = 59, + AVX512F_VL_X64 = 60, + AVX512BW_X64 = 61, + AVX512BW_VL_X64 = 62, + AVX512CD_X64 = 63, + AVX512CD_VL_X64 = 64, + AVX512DQ_X64 = 65, + AVX512DQ_VL_X64 = 66, + AVX512VBMI_X64 = 67, + AVX512VBMI_VL_X64 = 68, + AVX10v1_X64 = 69, + AVX10v1_V256_X64 = 70, + AVX10v1_V512_X64 = 71, + APX_X64 = 72, } public enum InstructionSet_X86 @@ -328,41 +334,43 @@ public enum InstructionSet_X86 AVX10v1 = 33, AVX10v1_V256 = 34, AVX10v1_V512 = 35, - VectorT128 = 36, - VectorT256 = 37, - VectorT512 = 38, - X86Base_X64 = 39, - SSE_X64 = 40, - SSE2_X64 = 41, - SSE3_X64 = 42, - SSSE3_X64 = 43, - SSE41_X64 = 44, - SSE42_X64 = 45, - AVX_X64 = 46, - AVX2_X64 = 47, - AES_X64 = 48, - BMI1_X64 = 49, - BMI2_X64 = 50, - FMA_X64 = 51, - LZCNT_X64 = 52, - PCLMULQDQ_X64 = 53, - POPCNT_X64 = 54, - AVXVNNI_X64 = 55, - MOVBE_X64 = 56, - X86Serialize_X64 = 57, - AVX512F_X64 = 58, - AVX512F_VL_X64 = 59, - AVX512BW_X64 = 60, - AVX512BW_VL_X64 = 61, - AVX512CD_X64 = 62, - AVX512CD_VL_X64 = 63, - AVX512DQ_X64 = 64, - AVX512DQ_VL_X64 = 65, - AVX512VBMI_X64 = 66, - AVX512VBMI_VL_X64 = 67, - AVX10v1_X64 = 68, - AVX10v1_V256_X64 = 69, - AVX10v1_V512_X64 = 70, + APX = 36, + VectorT128 = 37, + VectorT256 = 38, + VectorT512 = 39, + X86Base_X64 = 40, + SSE_X64 = 41, + SSE2_X64 = 42, + SSE3_X64 = 43, + SSSE3_X64 = 44, + SSE41_X64 = 45, + SSE42_X64 = 46, + AVX_X64 = 47, + AVX2_X64 = 48, + AES_X64 = 49, + BMI1_X64 = 50, + BMI2_X64 = 51, + FMA_X64 = 52, + LZCNT_X64 = 53, + PCLMULQDQ_X64 = 54, + POPCNT_X64 = 55, + AVXVNNI_X64 = 56, + MOVBE_X64 = 57, + X86Serialize_X64 = 58, + AVX512F_X64 = 59, + AVX512F_VL_X64 = 60, + AVX512BW_X64 = 61, + AVX512BW_VL_X64 = 62, + AVX512CD_X64 = 63, + AVX512CD_VL_X64 = 64, + AVX512DQ_X64 = 65, + AVX512DQ_VL_X64 = 66, + AVX512VBMI_X64 = 67, + AVX512VBMI_VL_X64 = 68, + AVX10v1_X64 = 69, + AVX10v1_V256_X64 = 70, + AVX10v1_V512_X64 = 71, + APX_X64 = 72, } public unsafe struct InstructionSetFlags : IEnumerable @@ -710,6 +718,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512_X64)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_APX)) + resultflags.AddInstructionSet(InstructionSet.X64_APX_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_APX_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_APX); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) @@ -1067,6 +1079,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V256); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512_X64)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_APX_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_APX); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_SSE); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) @@ -1405,6 +1419,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("avx10v1", "Avx10v1", InstructionSet.X64_AVX10v1, true); yield return new InstructionSetInfo("avx10v1_v256", "Avx10v1_V256", InstructionSet.X64_AVX10v1_V256, true); yield return new InstructionSetInfo("avx10v1_v512", "Avx10v1_V512", InstructionSet.X64_AVX10v1_V512, true); + yield return new InstructionSetInfo("apx", "Apx", InstructionSet.X64_APX, true); yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.X64_VectorT128, true); yield return new InstructionSetInfo("vectort256", "VectorT256", InstructionSet.X64_VectorT256, true); yield return new InstructionSetInfo("vectort512", "VectorT512", InstructionSet.X64_VectorT512, true); @@ -1446,6 +1461,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("avx10v1", "Avx10v1", InstructionSet.X86_AVX10v1, true); yield return new InstructionSetInfo("avx10v1_v256", "Avx10v1_V256", InstructionSet.X86_AVX10v1_V256, true); yield return new InstructionSetInfo("avx10v1_v512", "Avx10v1_V512", InstructionSet.X86_AVX10v1_V512, true); + yield return new InstructionSetInfo("apx", "Apx", InstructionSet.X86_APX, true); yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.X86_VectorT128, true); yield return new InstructionSetInfo("vectort256", "VectorT256", InstructionSet.X86_VectorT256, true); yield return new InstructionSetInfo("vectort512", "VectorT512", InstructionSet.X86_VectorT512, true); @@ -1544,6 +1560,8 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture) AddInstructionSet(InstructionSet.X64_AVX10v1_V256_X64); if (HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64); + if (HasInstructionSet(InstructionSet.X64_APX)) + AddInstructionSet(InstructionSet.X64_APX_X64); break; case TargetArchitecture.X86: @@ -1601,6 +1619,7 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X64_AVX10v1_X64); AddInstructionSet(InstructionSet.X64_AVX10v1_V256_X64); AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64); + AddInstructionSet(InstructionSet.X64_APX_X64); break; case TargetArchitecture.X86: @@ -1636,6 +1655,7 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X86_AVX10v1_X64); AddInstructionSet(InstructionSet.X86_AVX10v1_V256_X64); AddInstructionSet(InstructionSet.X86_AVX10v1_V512_X64); + AddInstructionSet(InstructionSet.X86_APX_X64); break; } } @@ -1931,6 +1951,12 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite else { return InstructionSet.X64_AVX10v1_V512; } + case "Apx": + if (nestedTypeName == "X64") + { return InstructionSet.X64_APX_X64; } + else + { return InstructionSet.X64_APX; } + case "VectorT128": { return InstructionSet.X64_VectorT128; } @@ -2043,6 +2069,9 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite case "Avx10v1_V512": { return InstructionSet.X86_AVX10v1_V512; } + case "Apx": + { return InstructionSet.X86_APX; } + case "VectorT128": { return InstructionSet.X86_VectorT128; } diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 5f1953f71e9190..e4952d10ed18cb 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -60,6 +60,7 @@ instructionset ,X86 ,Avx512Vbmi_VL , ,38 ,AVX512VBMI_VL instructionset ,X86 ,Avx10v1 , ,44 ,AVX10v1 ,avx10v1 instructionset ,X86 ,Avx10v1_V256 , ,45 ,AVX10v1_V256 ,avx10v1_v256 instructionset ,X86 ,Avx10v1_V512 , ,46 ,AVX10v1_V512 ,avx10v1_v512 +instructionset ,X86 ,Apx , ,47 ,APX ,apx instructionset ,X86 ,VectorT128 , ,39 ,VectorT128 ,vectort128 instructionset ,X86 ,VectorT256 , ,40 ,VectorT256 ,vectort256 instructionset ,X86 ,VectorT512 , ,41 ,VectorT512 ,vectort512 @@ -96,6 +97,7 @@ instructionset64bit,X86 ,AVX512VBMI_VL instructionset64bit,X86 ,AVX10v1 instructionset64bit,X86 ,AVX10v1_V256 instructionset64bit,X86 ,AVX10v1_V512 +instructionset64bit,X86 ,APX vectorinstructionset,X86 ,Vector128 vectorinstructionset,X86 ,Vector256 diff --git a/src/coreclr/vm/amd64/asmconstants.h b/src/coreclr/vm/amd64/asmconstants.h index b51088a6b47930..d773f447ec4ad9 100644 --- a/src/coreclr/vm/amd64/asmconstants.h +++ b/src/coreclr/vm/amd64/asmconstants.h @@ -276,8 +276,8 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__VASigCookie__pNDirectILStub #if defined(UNIX_AMD64_ABI) && !defined(HOST_WINDOWS) // Expression is too complicated, is currently: -// (8*6 + 4*2 + 2*6 + 4 + 8*6 + 8*16 + 8 + /*XMM_SAVE_AREA32*/(2*2 + 1*2 + 2 + 4 + 2*2 + 4 + 2*2 + 4*2 + 16*8 + 16*16 + 1*96) + 26*16 + 8 + 8*5 + /*XSTATE*/ + 8 + 8 + /*XSTATE_AVX*/ 16*16 + /*XSTATE_AVX512_KMASK*/ 8*8 + /*XSTATE_AVX512_ZMM_H*/ 32*16 + /*XSTATE_AVX512_ZMM*/ 64*16) -#define SIZEOF__CONTEXT (3104) +// (8*6 + 4*2 + 2*6 + 4 + 8*6 + 8*16 + 8 + /*XMM_SAVE_AREA32*/(2*2 + 1*2 + 2 + 4 + 2*2 + 4 + 2*2 + 4*2 + 16*8 + 16*16 + 1*96) + 26*16 + 8 + 8*5 + /*XSTATE*/ + 8 + 8 + /*XSTATE_AVX*/ 16*16 + /*XSTATE_AVX512_KMASK*/ 8*8 + /*XSTATE_AVX512_ZMM_H*/ 32*16 + /*XSTATE_AVX512_ZMM*/ 64*16 + /*XSATE_APX_EGPR*/ 8*16) +#define SIZEOF__CONTEXT (3232) #else // Expression is too complicated, is currently: // (8*6 + 4*2 + 2*6 + 4 + 8*6 + 8*16 + 8 + /*XMM_SAVE_AREA32*/(2*2 + 1*2 + 2 + 4 + 2*2 + 4 + 2*2 + 4*2 + 16*8 + 16*16 + 1*96) + 26*16 + 8 + 8*5) diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index d115a22850a742..6277be58f05f9f 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1269,7 +1269,7 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Set(InstructionSet_VectorT256); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512f) != 0) && (maxVectorTBitWidth >= 512)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) && (maxVectorTBitWidth >= 512)) { // We require 512-bit Vector to be opt-in CPUCompileFlags.Set(InstructionSet_VectorT512); @@ -1305,42 +1305,42 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Set(InstructionSet_AVX2); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512f) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F)) { CPUCompileFlags.Set(InstructionSet_AVX512F); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512f_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F_VL)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F_VL)) { CPUCompileFlags.Set(InstructionSet_AVX512F_VL); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512bw) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW)) { CPUCompileFlags.Set(InstructionSet_AVX512BW); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512bw_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW_VL)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW_VL)) { CPUCompileFlags.Set(InstructionSet_AVX512BW_VL); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512cd) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD)) { CPUCompileFlags.Set(InstructionSet_AVX512CD); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512cd_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD_VL)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD_VL)) { CPUCompileFlags.Set(InstructionSet_AVX512CD_VL); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512dq) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ)) { CPUCompileFlags.Set(InstructionSet_AVX512DQ); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512dq_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ_VL)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ_VL)) { CPUCompileFlags.Set(InstructionSet_AVX512DQ_VL); } @@ -1443,6 +1443,11 @@ void EEJitManager::SetCpuInfo() { CPUCompileFlags.Set(InstructionSet_AVX10v1_V512); } + + if (((cpuFeatures & XArchIntrinsicConstants_Apx) != 0)) + { + CPUCompileFlags.Set(InstructionSet_APX); + } #elif defined(TARGET_ARM64) #if !defined(TARGET_WINDOWS) diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 40ae02264804fd..6c80f6b4d84922 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -1961,14 +1961,15 @@ CONTEXT* AllocateOSContextHelper(BYTE** contextBuffer) // Determine if the processor supports AVX so we could // retrieve extended registers DWORD64 FeatureMask = GetEnabledXStateFeatures(); - if ((FeatureMask & (XSTATE_MASK_AVX | XSTATE_MASK_AVX512)) != 0) + if ((FeatureMask & (XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | /*XSATE_MASK_APX*/(0x80000))) != 0) { context = context | CONTEXT_XSTATE; } // Retrieve contextSize by passing NULL for Buffer DWORD contextSize = 0; - ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | XSTATE_MASK_AVX | XSTATE_MASK_MPX | XSTATE_MASK_AVX512; + // TODO-xarch-apx: MPX and APX cannot and will not co-exist, will setting the mask in this way be an issue? + ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | XSTATE_MASK_AVX | XSTATE_MASK_MPX | XSTATE_MASK_AVX512 | /*XSATE_MASK_APX*/(0x80000); // The initialize call should fail but return contextSize BOOL success = g_pfnInitializeContext2 ? g_pfnInitializeContext2(NULL, context, NULL, &contextSize, xStateCompactionMask) : @@ -2902,7 +2903,7 @@ BOOL Thread::RedirectThreadAtHandledJITCase(PFN_REDIRECTTARGET pTgt) // This should not normally fail. // The system silently ignores any feature specified in the FeatureMask // which is not enabled on the processor. - SetXStateFeaturesMask(pCtx, (XSTATE_MASK_AVX | XSTATE_MASK_AVX512)); + SetXStateFeaturesMask(pCtx, (XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | /*XSATE_MASK_APX*/(0x80000))); #endif //defined(TARGET_X86) || defined(TARGET_AMD64) // Make sure we specify CONTEXT_EXCEPTION_REQUEST to detect "trap frame reporting". @@ -3038,7 +3039,7 @@ BOOL Thread::RedirectCurrentThreadAtHandledJITCase(PFN_REDIRECTTARGET pTgt, CONT // Get may return 0 if no XState is set, which Set would not accept. if (srcFeatures != 0) { - success = SetXStateFeaturesMask(pCurrentThreadCtx, srcFeatures & (XSTATE_MASK_AVX | XSTATE_MASK_AVX512)); + success = SetXStateFeaturesMask(pCurrentThreadCtx, srcFeatures & (XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | /*XSATE_MASK_APX*/(0x80000))); _ASSERTE(success); if (!success) return FALSE; diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index 1af0f86f18243a..ccfe25fe4b4555 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -72,6 +72,10 @@ static uint32_t xmmYmmStateSupport() #define XSTATE_MASK_AVX512 (0xE0) /* 0b1110_0000 */ #endif // XSTATE_MASK_AVX512 +// TODO-xarch-apx: the definition of XSTATE mask value for APX is now missing on the OS level, +// we are currently using bare value to hack it through the build process, and test the implementation through CI. +// those changes will be removed when we have the OS support for APX. + static uint32_t avx512StateSupport() { #if defined(HOST_APPLE) @@ -99,6 +103,21 @@ static uint32_t avx512StateSupport() #endif } +static uint32_t apxStateSupport() +{ +#if defined(HOST_APPLE) + return false; +#else + uint32_t eax; + __asm(" xgetbv\n" \ + : "=a"(eax) /*output in eax*/\ + : "c"(0) /*inputs - 0 in ecx*/\ + : "edx" /* registers that are clobbered*/ + ); + return ((eax & 0x80000) == 0x80000) ? 1 : 0; +#endif +} + static bool IsAvxEnabled() { return true; @@ -108,6 +127,11 @@ static bool IsAvx512Enabled() { return true; } + +static bool IsApxEnabled() +{ + return true; +} #endif // defined(HOST_X86) || defined(HOST_AMD64) #endif // HOST_UNIX @@ -137,6 +161,17 @@ static bool IsAvx512Enabled() return ((FeatureMask & XSTATE_MASK_AVX512) != 0); } +static bool IsApxEnabled() +{ + DWORD64 FeatureMask = GetEnabledXStateFeatures(); + return ((FeatureMask & /*XSATE_MASK_APX*/(0x80000)) != 0); +} + +static uint32_t apxStateSupport() +{ + return ((_xgetbv(0) & 0x80000) == 0x80000) ? 1 : 0; +} + #endif // defined(HOST_X86) || defined(HOST_AMD64) #endif // HOST_WINDOWS @@ -220,63 +255,37 @@ int minipal_getcpufeatures(void) { result |= XArchIntrinsicConstants_Avx2; - if (IsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 - { - if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F - { - result |= XArchIntrinsicConstants_Avx512f; - - bool isAVX512_VLSupported = false; - if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL + if (IsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 { - result |= XArchIntrinsicConstants_Avx512f_vl; - isAVX512_VLSupported = true; - } - if ((cpuidInfo[CPUID_EBX] & (1 << 30)) != 0) // AVX512BW - { - result |= XArchIntrinsicConstants_Avx512bw; - if (isAVX512_VLSupported) // AVX512BW_VL + // Checking Avx512F+BW+CD+DQ+VL altogether. + const int subsetMasks = (1 << 16) | (1 <<17) | (1 << 28) | (1 << 30) | (1 << 31); + if ((cpuidInfo[CPUID_EBX] & subsetMasks) == subsetMasks) { - result |= XArchIntrinsicConstants_Avx512bw_vl; - } - } + result |= XArchIntrinsicConstants_Avx512; - if ((cpuidInfo[CPUID_EBX] & (1 << 28)) != 0) // AVX512CD - { - result |= XArchIntrinsicConstants_Avx512cd; - if (isAVX512_VLSupported) // AVX512CD_VL - { - result |= XArchIntrinsicConstants_Avx512cd_vl; + if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // AVX512VBMI + { + result |= XArchIntrinsicConstants_Avx512Vbmi; + result |= XArchIntrinsicConstants_Avx512Vbmi_vl; + } } } - if ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0) // AVX512DQ + __cpuidex(cpuidInfo, 0x00000007, 0x00000001); + + if ((cpuidInfo[CPUID_EAX] & (1 << 4)) != 0) // AVX-VNNI { - result |= XArchIntrinsicConstants_Avx512dq; - if (isAVX512_VLSupported) // AVX512DQ_VL - { - result |= XArchIntrinsicConstants_Avx512dq_vl; - } + result |= XArchIntrinsicConstants_AvxVnni; } - if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // AVX512VBMI + if (IsApxEnabled() && apxStateSupport()) { - result |= XArchIntrinsicConstants_Avx512Vbmi; - if (isAVX512_VLSupported) // AVX512VBMI_VL + if ((cpuidInfo[CPUID_EDX] & (1 << 21)) != 0) // APX_F { - result |= XArchIntrinsicConstants_Avx512Vbmi_vl; + result |= XArchIntrinsicConstants_Apx; } } - } - } - - __cpuidex(cpuidInfo, 0x00000007, 0x00000001); - - if ((cpuidInfo[CPUID_EAX] & (1 << 4)) != 0) // AVX-VNNI - { - result |= XArchIntrinsicConstants_AvxVnni; - } if ((cpuidInfo[CPUID_EDX] & (1 << 19)) != 0) // Avx10 { diff --git a/src/native/minipal/cpufeatures.h b/src/native/minipal/cpufeatures.h index 62aa1c75256a84..aeab2dc280042f 100644 --- a/src/native/minipal/cpufeatures.h +++ b/src/native/minipal/cpufeatures.h @@ -26,20 +26,14 @@ enum XArchIntrinsicConstants XArchIntrinsicConstants_Lzcnt = 0x1000, XArchIntrinsicConstants_AvxVnni = 0x2000, XArchIntrinsicConstants_Movbe = 0x4000, - XArchIntrinsicConstants_Avx512f = 0x8000, - XArchIntrinsicConstants_Avx512f_vl = 0x10000, - XArchIntrinsicConstants_Avx512bw = 0x20000, - XArchIntrinsicConstants_Avx512bw_vl = 0x40000, - XArchIntrinsicConstants_Avx512cd = 0x80000, - XArchIntrinsicConstants_Avx512cd_vl = 0x100000, - XArchIntrinsicConstants_Avx512dq = 0x200000, - XArchIntrinsicConstants_Avx512dq_vl = 0x400000, - XArchIntrinsicConstants_Avx512Vbmi = 0x800000, - XArchIntrinsicConstants_Avx512Vbmi_vl = 0x1000000, - XArchIntrinsicConstants_Serialize = 0x2000000, - XArchIntrinsicConstants_Avx10v1 = 0x4000000, - XArchIntrinsicConstants_Avx10v1_V256 = 0x8000000, - XArchIntrinsicConstants_Avx10v1_V512 = 0x10000000, + XArchIntrinsicConstants_Avx512 = 0x8000, + XArchIntrinsicConstants_Avx512Vbmi = 0x10000, + XArchIntrinsicConstants_Avx512Vbmi_vl = 0x20000, + XArchIntrinsicConstants_Serialize = 0x40000, + XArchIntrinsicConstants_Avx10v1 = 0x80000, + XArchIntrinsicConstants_Avx10v1_V256 = 0x100000, + XArchIntrinsicConstants_Avx10v1_V512 = 0x200000, + XArchIntrinsicConstants_Apx = 0x400000, }; #endif // HOST_X86 || HOST_AMD64