Skip to content

Commit 9ad9daa

Browse files
committed
New X86 features and detections
1 parent 9f83eaf commit 9ad9daa

File tree

2 files changed

+75
-8
lines changed

2 files changed

+75
-8
lines changed

src/features_x86.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,31 @@ JL_FEATURE_DEF(avx512vl, 32 * 2 + 31, 0)
5656
JL_FEATURE_DEF(prefetchwt1, 32 * 3 + 0, 0)
5757
JL_FEATURE_DEF(avx512vbmi, 32 * 3 + 1, 0)
5858
JL_FEATURE_DEF(pku, 32 * 3 + 4, 0) // ospke
59+
JL_FEATURE_DEF(waitpkg, 32 * 3 + 5, 0)
60+
JL_FEATURE_DEF(avx512vbmi2, 32 * 3 + 6, 0)
61+
JL_FEATURE_DEF(shstk, 32 * 3 + 7, 0)
62+
JL_FEATURE_DEF(gfni, 32 * 3 + 8, 0)
63+
JL_FEATURE_DEF(vaes, 32 * 3 + 9, 0)
64+
JL_FEATURE_DEF(vpclmulqdq, 32 * 3 + 10, 0)
65+
JL_FEATURE_DEF(avx512vnni, 32 * 3 + 11, 0)
66+
JL_FEATURE_DEF(avx512bitalg, 32 * 3 + 12, 0)
5967
JL_FEATURE_DEF(avx512vpopcntdq, 32 * 3 + 14, 0)
68+
JL_FEATURE_DEF(rdpid, 32 * 3 + 22, 0)
69+
JL_FEATURE_DEF(cldemote, 32 * 3 + 25, 0)
70+
JL_FEATURE_DEF(movdiri, 32 * 3 + 27, 0)
71+
JL_FEATURE_DEF(movdir64b, 32 * 3 + 28, 0)
72+
JL_FEATURE_DEF(enqcmd, 32 * 3 + 29, 90000)
6073

6174
// EAX=7,ECX=0: EDX
6275
// JL_FEATURE_DEF(avx5124vnniw, 32 * 4 + 2, ?????)
6376
// JL_FEATURE_DEF(avx5124fmaps, 32 * 4 + 3, ?????)
77+
JL_FEATURE_DEF(avx512vp2intersect, 32 * 4 + 8, 90000)
78+
JL_FEATURE_DEF(serialize, 32 * 4 + 14, 110000)
79+
JL_FEATURE_DEF(tsxldtrk, 32 * 4 + 16, 110000)
80+
JL_FEATURE_DEF(pconfig, 32 * 4 + 18, 0)
81+
JL_FEATURE_DEF_NAME(amx_bf16, 32 * 4 + 22, 110000, "amx-bf16")
82+
JL_FEATURE_DEF_NAME(amx_tile, 32 * 4 + 24, 110000, "amx-tile")
83+
JL_FEATURE_DEF_NAME(amx_int8, 32 * 4 + 25, 110000, "amx-int8")
6484

6585
// EAX=0x80000001: ECX
6686
// ignore sahf on 32bit x86 since it is required
@@ -85,5 +105,12 @@ JL_FEATURE_DEF(xsaves, 32 * 7 + 3, 0)
85105

86106
// EAX=0x80000008: EBX
87107
JL_FEATURE_DEF(clzero, 32 * 8 + 0, 0)
108+
JL_FEATURE_DEF(wbnoinvd, 32 * 8 + 9, 0)
109+
110+
// EAX=7,ECX=1: EAX
111+
JL_FEATURE_DEF(avx512bf16, 32 * 9 + 5, 90000)
112+
113+
// EAX=0x14,ECX=0: EBX
114+
JL_FEATURE_DEF(ptwrite, 32 * 10 + 4, 0)
88115

89116
#undef JL_X86_64ONLY_VER

src/processor_x86.cpp

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ enum class CPU : uint32_t {
9292
amd_znver1,
9393
};
9494

95-
static constexpr size_t feature_sz = 9;
95+
static constexpr size_t feature_sz = 11;
9696
static constexpr FeatureName feature_names[] = {
9797
#define JL_FEATURE_DEF(name, bit, llvmver) {#name, bit, llvmver},
9898
#define JL_FEATURE_DEF_NAME(name, bit, llvmver, str) {str, bit, llvmver},
@@ -130,16 +130,27 @@ static constexpr FeatureDep deps[] = {
130130
{avx, sse42},
131131
{f16c, avx},
132132
{avx2, avx},
133+
{vaes, avx},
134+
{vaes, aes},
135+
{vpclmulqdq, avx},
136+
{vpclmulqdq, pclmul},
133137
{avx512f, avx2},
134138
{avx512dq, avx512f},
135139
{avx512ifma, avx512f},
136140
{avx512pf, avx512f},
137141
{avx512er, avx512f},
138142
{avx512cd, avx512f},
139143
{avx512bw, avx512f},
144+
{avx512bf16, avx512bw},
145+
{avx512bitalg, avx512bw},
140146
{avx512vl, avx512f},
141147
{avx512vbmi, avx512bw},
148+
{avx512vbmi2, avx512bw},
149+
{avx512vnni, avx512f},
150+
{avx512vp2intersect, avx512f},
142151
{avx512vpopcntdq, avx512f},
152+
{amx_int8, amx_tile},
153+
{amx_bf16, amx_tile},
143154
{sse4a, sse3},
144155
{xop, fma4},
145156
{fma4, avx},
@@ -470,15 +481,23 @@ static inline void features_disable_avx512(T &features)
470481
{
471482
using namespace Feature;
472483
unset_bits(features, avx512f, avx512dq, avx512ifma, avx512pf, avx512er, avx512cd,
473-
avx512bw, avx512vl, avx512vbmi);
484+
avx512bw, avx512vl, avx512vbmi, avx512vpopcntdq, avx512vbmi2, avx512vnni,
485+
avx512bitalg, avx512vp2intersect, avx512bf16);
474486
}
475487

476488
template<typename T>
477489
static inline void features_disable_avx(T &features)
478490
{
479491
using namespace Feature;
480492
unset_bits(features, avx, Feature::fma, f16c, xsave, avx2, xop, fma4,
481-
xsaveopt, xsavec, xsaves);
493+
xsaveopt, xsavec, xsaves, vaes, vpclmulqdq);
494+
}
495+
496+
template<typename T>
497+
static inline void features_disable_amx(T &features)
498+
{
499+
using namespace Feature;
500+
unset_bits(features, amx_bf16, amx_tile, amx_int8);
482501
}
483502

484503
static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
@@ -535,15 +554,25 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
535554
jl_cpuidex(infoex8, 0x80000008, 0);
536555
features[8] = infoex8[1];
537556
}
557+
if (maxleaf >= 7) {
558+
int32_t info7[4];
559+
jl_cpuidex(info7, 7, 1);
560+
features[9] = info7[0];
561+
}
562+
if (maxleaf >= 0x14) {
563+
int32_t info14[4];
564+
jl_cpuidex(info14, 0x14, 0);
565+
features[10] = info14[1];
566+
}
538567

539568
// Fix up AVX bits to account for OS support and match LLVM model
540569
uint64_t xcr0 = 0;
541-
const uint32_t avx_mask = (1 << 27) | (1 << 28);
542-
bool hasavx = test_all_bits(features[0], avx_mask);
543-
if (hasavx) {
570+
bool hasxsave = test_all_bits(features[0], 1 << 27);
571+
if (hasxsave) {
544572
xcr0 = get_xcr0();
545-
hasavx = test_all_bits(xcr0, 0x6);
573+
hasxsave = test_all_bits(xcr0, 0x6);
546574
}
575+
bool hasavx = hasxsave && test_all_bits(features[0], 1 << 28);
547576
unset_bits(features, 32 + 27);
548577
if (!hasavx)
549578
features_disable_avx(features);
@@ -557,6 +586,10 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
557586
#endif
558587
if (!hasavx512save)
559588
features_disable_avx512(features);
589+
// AMX requires additional context to be saved by the OS.
590+
bool hasamxsave = hasxsave && test_all_bits(xcr0, (1 << 17) | (1 << 18));
591+
if (!hasamxsave)
592+
features_disable_amx(features);
560593
// Ignore feature bits that we are not interested in.
561594
mask_features(feature_masks, &features[0]);
562595

@@ -788,12 +821,16 @@ static void ensure_jit_target(bool imaging)
788821
static constexpr uint32_t clone_simd[] = {Feature::sse3, Feature::ssse3,
789822
Feature::sse41, Feature::sse42,
790823
Feature::avx, Feature::avx2,
824+
Feature::vaes, Feature::vpclmulqdq,
791825
Feature::sse4a, Feature::avx512f,
792826
Feature::avx512dq, Feature::avx512ifma,
793827
Feature::avx512pf, Feature::avx512er,
794828
Feature::avx512cd, Feature::avx512bw,
795829
Feature::avx512vl, Feature::avx512vbmi,
796-
Feature::avx512vpopcntdq};
830+
Feature::avx512vpopcntdq,
831+
Feature::avx512vbmi2, Feature::avx512vnni,
832+
Feature::avx512bitalg, Feature::avx512bf16,
833+
Feature::avx512vp2intersect};
797834
for (auto fe: clone_math) {
798835
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
799836
t.en.flags |= JL_TARGET_CLONE_MATH;
@@ -847,6 +884,9 @@ get_llvm_target_noext(const TargetData<feature_sz> &data)
847884
// returns a value that may not have 64bit support.
848885
// This can happen with virtualization.
849886
features.push_back("+64bit");
887+
#endif
888+
#if JL_LLVM_VERSION >= 90000
889+
features.push_back("+cx8");
850890
#endif
851891
return std::make_pair(std::move(name), std::move(features));
852892
}

0 commit comments

Comments
 (0)