@@ -92,7 +92,7 @@ enum class CPU : uint32_t {
9292 amd_znver1,
9393};
9494
95- static constexpr size_t feature_sz = 9 ;
95+ static constexpr size_t feature_sz = 11 ;
9696static constexpr FeatureName feature_names[] = {
9797#define JL_FEATURE_DEF (name, bit, llvmver ) {#name, bit, llvmver},
9898#define JL_FEATURE_DEF_NAME (name, bit, llvmver, str ) {str, bit, llvmver},
@@ -130,16 +130,27 @@ static constexpr FeatureDep deps[] = {
130130 {avx, sse42},
131131 {f16c, avx},
132132 {avx2, avx},
133+ {vaes, avx},
134+ {vaes, aes},
135+ {vpclmulqdq, avx},
136+ {vpclmulqdq, pclmul},
133137 {avx512f, avx2},
134138 {avx512dq, avx512f},
135139 {avx512ifma, avx512f},
136140 {avx512pf, avx512f},
137141 {avx512er, avx512f},
138142 {avx512cd, avx512f},
139143 {avx512bw, avx512f},
144+ {avx512bf16, avx512bw},
145+ {avx512bitalg, avx512bw},
140146 {avx512vl, avx512f},
141147 {avx512vbmi, avx512bw},
148+ {avx512vbmi2, avx512bw},
149+ {avx512vnni, avx512f},
150+ {avx512vp2intersect, avx512f},
142151 {avx512vpopcntdq, avx512f},
152+ {amx_int8, amx_tile},
153+ {amx_bf16, amx_tile},
143154 {sse4a, sse3},
144155 {xop, fma4},
145156 {fma4, avx},
@@ -470,15 +481,23 @@ static inline void features_disable_avx512(T &features)
470481{
471482 using namespace Feature ;
472483 unset_bits (features, avx512f, avx512dq, avx512ifma, avx512pf, avx512er, avx512cd,
473- avx512bw, avx512vl, avx512vbmi);
484+ avx512bw, avx512vl, avx512vbmi, avx512vpopcntdq, avx512vbmi2, avx512vnni,
485+ avx512bitalg, avx512vp2intersect, avx512bf16);
474486}
475487
476488template <typename T>
477489static inline void features_disable_avx (T &features)
478490{
479491 using namespace Feature ;
480492 unset_bits (features, avx, Feature::fma, f16c, xsave, avx2, xop, fma4,
481- xsaveopt, xsavec, xsaves);
493+ xsaveopt, xsavec, xsaves, vaes, vpclmulqdq);
494+ }
495+
496+ template <typename T>
497+ static inline void features_disable_amx (T &features)
498+ {
499+ using namespace Feature ;
500+ unset_bits (features, amx_bf16, amx_tile, amx_int8);
482501}
483502
484503static NOINLINE std::pair<uint32_t ,FeatureList<feature_sz>> _get_host_cpu (void )
@@ -535,15 +554,25 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
535554 jl_cpuidex (infoex8, 0x80000008 , 0 );
536555 features[8 ] = infoex8[1 ];
537556 }
557+ if (maxleaf >= 7 ) {
558+ int32_t info7[4 ];
559+ jl_cpuidex (info7, 7 , 1 );
560+ features[9 ] = info7[0 ];
561+ }
562+ if (maxleaf >= 0x14 ) {
563+ int32_t info14[4 ];
564+ jl_cpuidex (info14, 0x14 , 0 );
565+ features[10 ] = info14[1 ];
566+ }
538567
539568 // Fix up AVX bits to account for OS support and match LLVM model
540569 uint64_t xcr0 = 0 ;
541- const uint32_t avx_mask = (1 << 27 ) | (1 << 28 );
542- bool hasavx = test_all_bits (features[0 ], avx_mask);
543- if (hasavx) {
570+ bool hasxsave = test_all_bits (features[0 ], 1 << 27 );
571+ if (hasxsave) {
544572 xcr0 = get_xcr0 ();
545- hasavx = test_all_bits (xcr0, 0x6 );
573+ hasxsave = test_all_bits (xcr0, 0x6 );
546574 }
575+ bool hasavx = hasxsave && test_all_bits (features[0 ], 1 << 28 );
547576 unset_bits (features, 32 + 27 );
548577 if (!hasavx)
549578 features_disable_avx (features);
@@ -557,6 +586,10 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
557586#endif
558587 if (!hasavx512save)
559588 features_disable_avx512 (features);
589+ // AMX requires additional context to be saved by the OS.
590+ bool hasamxsave = hasxsave && test_all_bits (xcr0, (1 << 17 ) | (1 << 18 ));
591+ if (!hasamxsave)
592+ features_disable_amx (features);
560593 // Ignore feature bits that we are not interested in.
561594 mask_features (feature_masks, &features[0 ]);
562595
@@ -788,12 +821,16 @@ static void ensure_jit_target(bool imaging)
788821 static constexpr uint32_t clone_simd[] = {Feature::sse3, Feature::ssse3,
789822 Feature::sse41, Feature::sse42,
790823 Feature::avx, Feature::avx2,
824+ Feature::vaes, Feature::vpclmulqdq,
791825 Feature::sse4a, Feature::avx512f,
792826 Feature::avx512dq, Feature::avx512ifma,
793827 Feature::avx512pf, Feature::avx512er,
794828 Feature::avx512cd, Feature::avx512bw,
795829 Feature::avx512vl, Feature::avx512vbmi,
796- Feature::avx512vpopcntdq};
830+ Feature::avx512vpopcntdq,
831+ Feature::avx512vbmi2, Feature::avx512vnni,
832+ Feature::avx512bitalg, Feature::avx512bf16,
833+ Feature::avx512vp2intersect};
797834 for (auto fe: clone_math) {
798835 if (!test_nbit (features0, fe) && test_nbit (t.en .features , fe)) {
799836 t.en .flags |= JL_TARGET_CLONE_MATH;
@@ -847,6 +884,9 @@ get_llvm_target_noext(const TargetData<feature_sz> &data)
847884 // returns a value that may not have 64bit support.
848885 // This can happen with virtualization.
849886 features.push_back (" +64bit" );
887+ #endif
888+ #if JL_LLVM_VERSION >= 90000
889+ features.push_back (" +cx8" );
850890#endif
851891 return std::make_pair (std::move (name), std::move (features));
852892}
0 commit comments