Skip to content

Commit f705012

Browse files
committed
New X86 CPU types and detections
1 parent cedd6f1 commit f705012

File tree

1 file changed

+92
-20
lines changed

1 file changed

+92
-20
lines changed

src/processor_x86.cpp

Lines changed: 92 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ enum class CPU : uint32_t {
6060
intel_atom_bonnell,
6161
intel_atom_silvermont,
6262
intel_atom_goldmont,
63+
intel_atom_goldmont_plus,
64+
intel_atom_tremont,
6365
intel_core2,
6466
intel_core2_penryn,
6567
intel_yonah,
@@ -71,8 +73,14 @@ enum class CPU : uint32_t {
7173
intel_corei7_broadwell,
7274
intel_corei7_skylake,
7375
intel_corei7_skylake_avx512,
76+
intel_corei7_cascadelake,
77+
intel_corei7_cooperlake,
7478
intel_corei7_cannonlake,
79+
intel_corei7_icelake_client,
80+
intel_corei7_icelake_server,
81+
intel_corei7_tigerlake,
7582
intel_knights_landing,
83+
intel_knights_mill,
7684

7785
amd_fam10h,
7886
amd_athlon_fx,
@@ -90,6 +98,7 @@ enum class CPU : uint32_t {
9098
amd_opteron_sse3,
9199
amd_barcelona,
92100
amd_znver1,
101+
amd_znver2,
93102
};
94103

95104
static constexpr size_t feature_sz = 11;
@@ -160,27 +169,41 @@ static constexpr FeatureDep deps[] = {
160169
constexpr auto generic = get_feature_masks(cx16);
161170
constexpr auto bonnell = get_feature_masks(sse3, ssse3, cx16, movbe, sahf);
162171
constexpr auto silvermont = bonnell | get_feature_masks(sse41, sse42, popcnt,
163-
pclmul, aes, prfchw);
164-
constexpr auto goldmont = silvermont | get_feature_masks(sha, rdrnd, rdseed, xsave,
165-
xsaveopt, xsavec, xsaves, clflushopt);
172+
pclmul, prfchw, rdrnd);
173+
constexpr auto goldmont = silvermont | get_feature_masks(aes, sha, rdseed, xsave, xsaveopt,
174+
xsavec, xsaves, clflushopt, fsgsbase);
175+
constexpr auto goldmont_plus = goldmont | get_feature_masks(ptwrite, rdpid); // sgx
176+
constexpr auto tremont = goldmont_plus | get_feature_masks(clwb, gfni);
177+
constexpr auto knl = get_feature_masks(sse3, ssse3, sse41, sse42, cx16, sahf, popcnt,
178+
aes, pclmul, avx, xsave, xsaveopt, rdrnd, f16c, fsgsbase,
179+
avx2, bmi, bmi2, fma, lzcnt, movbe, adx, rdseed, prfchw,
180+
avx512f, avx512er, avx512cd, avx512pf, prefetchwt1);
181+
constexpr auto knm = knl | get_feature_masks(avx512vpopcntdq);
166182
constexpr auto yonah = get_feature_masks(sse3);
167183
constexpr auto prescott = yonah;
168184
constexpr auto core2 = get_feature_masks(sse3, ssse3, cx16, sahf);
169185
constexpr auto nocona = get_feature_masks(sse3, cx16);
170186
constexpr auto penryn = nocona | get_feature_masks(ssse3, sse41, sahf);
171187
constexpr auto nehalem = penryn | get_feature_masks(sse42, popcnt);
172-
constexpr auto westmere = nehalem | get_feature_masks(aes, pclmul);
188+
constexpr auto westmere = nehalem | get_feature_masks(pclmul);
173189
constexpr auto sandybridge = westmere | get_feature_masks(avx, xsave, xsaveopt);
174190
constexpr auto ivybridge = sandybridge | get_feature_masks(rdrnd, f16c, fsgsbase);
175191
constexpr auto haswell = ivybridge | get_feature_masks(avx2, bmi, bmi2, fma, lzcnt, movbe);
176192
constexpr auto broadwell = haswell | get_feature_masks(adx, rdseed, prfchw);
177-
constexpr auto skylake = broadwell | get_feature_masks(rtm, xsavec, xsaves,
178-
clflushopt); // ignore sgx; hle
179-
constexpr auto knl = broadwell | get_feature_masks(avx512f, avx512er, avx512cd, avx512pf,
180-
prefetchwt1);
193+
constexpr auto skylake = broadwell | get_feature_masks(aes, xsavec, xsaves, clflushopt); // sgx
181194
constexpr auto skx = skylake | get_feature_masks(avx512f, avx512cd, avx512dq, avx512bw, avx512vl,
182195
pku, clwb);
183-
constexpr auto cannonlake = skx | get_feature_masks(avx512vbmi, avx512ifma, sha);
196+
constexpr auto cascadelake = skx | get_feature_masks(avx512vnni);
197+
constexpr auto cooperlake = cascadelake | get_feature_masks(avx512bf16);
198+
constexpr auto cannonlake = skylake | get_feature_masks(avx512f, avx512cd, avx512dq, avx512bw,
199+
avx512vl, pku, avx512vbmi, avx512ifma,
200+
sha); // sgx
201+
constexpr auto icelake = cannonlake | get_feature_masks(avx512bitalg, vaes, avx512vbmi2,
202+
vpclmulqdq, avx512vpopcntdq,
203+
gfni, clwb, rdpid);
204+
constexpr auto icelake_server = icelake | get_feature_masks(pconfig, wbnoinvd);
205+
constexpr auto tigerlake = icelake | get_feature_masks(avx512vp2intersect, movdiri,
206+
movdir64b, shstk);
184207

185208
constexpr auto k8_sse3 = get_feature_masks(sse3, cx16);
186209
constexpr auto amdfam10 = k8_sse3 | get_feature_masks(sse4a, lzcnt, popcnt, sahf);
@@ -195,8 +218,9 @@ constexpr auto bdver2 = bdver1 | get_feature_masks(f16c, bmi, tbm, fma);
195218
constexpr auto bdver3 = bdver2 | get_feature_masks(xsaveopt, fsgsbase);
196219
constexpr auto bdver4 = bdver3 | get_feature_masks(avx2, bmi2, mwaitx);
197220

198-
constexpr auto znver1 = haswell | get_feature_masks(adx, clflushopt, clzero, mwaitx, prfchw,
221+
constexpr auto znver1 = haswell | get_feature_masks(adx, aes, clflushopt, clzero, mwaitx, prfchw,
199222
rdseed, sha, sse4a, xsavec, xsaves);
223+
constexpr auto znver2 = znver1 | get_feature_masks(clwb, rdpid, wbnoinvd);
200224

201225
}
202226

@@ -205,6 +229,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
205229
{"bonnell", CPU::intel_atom_bonnell, CPU::generic, 0, Feature::bonnell},
206230
{"silvermont", CPU::intel_atom_silvermont, CPU::generic, 0, Feature::silvermont},
207231
{"goldmont", CPU::intel_atom_goldmont, CPU::generic, 0, Feature::goldmont},
232+
{"goldmont-plus", CPU::intel_atom_goldmont_plus, CPU::generic, 0, Feature::goldmont_plus},
233+
{"tremont", CPU::intel_atom_tremont, CPU::generic, 0, Feature::tremont},
208234
{"core2", CPU::intel_core2, CPU::generic, 0, Feature::core2},
209235
{"yonah", CPU::intel_yonah, CPU::generic, 0, Feature::yonah},
210236
{"prescott", CPU::intel_prescott, CPU::generic, 0, Feature::prescott},
@@ -218,8 +244,17 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
218244
{"broadwell", CPU::intel_corei7_broadwell, CPU::generic, 0, Feature::broadwell},
219245
{"skylake", CPU::intel_corei7_skylake, CPU::generic, 0, Feature::skylake},
220246
{"knl", CPU::intel_knights_landing, CPU::generic, 0, Feature::knl},
247+
{"knm", CPU::intel_knights_mill, CPU::generic, 0, Feature::knm},
221248
{"skylake-avx512", CPU::intel_corei7_skylake_avx512, CPU::generic, 0, Feature::skx},
249+
{"cascadelake", CPU::intel_corei7_cascadelake, CPU::generic, 0, Feature::cascadelake},
250+
{"cooperlake", CPU::intel_corei7_cooperlake, CPU::intel_corei7_cascadelake,
251+
90000, Feature::cooperlake},
222252
{"cannonlake", CPU::intel_corei7_cannonlake, CPU::generic, 0, Feature::cannonlake},
253+
{"icelake-client", CPU::intel_corei7_icelake_client, CPU::generic, 0, Feature::icelake},
254+
{"icelake-server", CPU::intel_corei7_icelake_server, CPU::generic, 0,
255+
Feature::icelake_server},
256+
{"tigerlake", CPU::intel_corei7_tigerlake, CPU::intel_corei7_icelake_client, 100000,
257+
Feature::tigerlake},
223258

224259
{"athlon64", CPU::amd_athlon_64, CPU::generic, 0, Feature::generic},
225260
{"athlon-fx", CPU::amd_athlon_fx, CPU::generic, 0, Feature::generic},
@@ -242,6 +277,7 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
242277
{"bdver4", CPU::amd_bdver4, CPU::generic, 0, Feature::bdver4},
243278

244279
{"znver1", CPU::amd_znver1, CPU::generic, 0, Feature::znver1},
280+
{"znver2", CPU::amd_znver2, CPU::amd_znver1, 90000, Feature::znver2},
245281
};
246282
static constexpr size_t ncpu_names = sizeof(cpus) / sizeof(cpus[0]);
247283

@@ -346,11 +382,37 @@ static CPU get_intel_processor_name(uint32_t family, uint32_t model, uint32_t br
346382
case 0x5e: // Skylake desktop
347383
case 0x8e: // Kaby Lake mobile
348384
case 0x9e: // Kaby Lake desktop
385+
case 0xa5: // Comet Lake-H/S
386+
case 0xa6: // Comet Lake-U
349387
return CPU::intel_corei7_skylake;
350388

351389
// Skylake Xeon:
352390
case 0x55:
353-
return CPU::intel_corei7_skylake;
391+
if (test_nbit(features, Feature::avx512bf16))
392+
return CPU::intel_corei7_cooperlake;
393+
if (test_nbit(features, Feature::avx512vnni))
394+
return CPU::intel_corei7_cascadelake;
395+
return CPU::intel_corei7_skylake_avx512;
396+
397+
// Cannonlake:
398+
case 0x66:
399+
return CPU::intel_corei7_cannonlake;
400+
401+
// Icelake:
402+
case 0x7d:
403+
case 0x7e:
404+
case 0x9d:
405+
return CPU::intel_corei7_icelake_client;
406+
407+
// Icelake Xeon:
408+
case 0x6a:
409+
case 0x6c:
410+
return CPU::intel_corei7_icelake_server;
411+
412+
// Tiger Lake
413+
case 0x8c:
414+
case 0x8d:
415+
return CPU::intel_corei7_tigerlake;
354416

355417
case 0x1c: // Most 45 nm Intel Atom processors
356418
case 0x26: // 45 nm Atom Lincroft
@@ -363,19 +425,30 @@ static CPU get_intel_processor_name(uint32_t family, uint32_t model, uint32_t br
363425
case 0x37:
364426
case 0x4a:
365427
case 0x4d:
366-
case 0x5a:
367428
case 0x5d:
368-
case 0x4c: // really airmont
429+
// Airmont
430+
case 0x4c:
431+
case 0x5a:
432+
case 0x75:
369433
return CPU::intel_atom_silvermont;
370434

371435
// Goldmont:
372436
case 0x5c:
373437
case 0x5f:
374438
return CPU::intel_atom_goldmont;
439+
case 0x7a:
440+
return CPU::intel_atom_goldmont_plus;
441+
case 0x86:
442+
case 0x96:
443+
case 0x9c:
444+
return CPU::intel_atom_tremont;
375445

376446
case 0x57:
377447
return CPU::intel_knights_landing;
378448

449+
case 0x85:
450+
return CPU::intel_knights_mill;
451+
379452
default:
380453
return CPU::generic;
381454
}
@@ -449,8 +522,6 @@ static CPU get_amd_processor_name(uint32_t family, uint32_t model, const uint32_
449522
case 20:
450523
return CPU::amd_btver1;
451524
case 21:
452-
if (!test_nbit(features, Feature::avx))
453-
return CPU::amd_btver1;
454525
if (model >= 0x50 && model <= 0x6f)
455526
return CPU::amd_bdver4;
456527
if (model >= 0x30 && model <= 0x3f)
@@ -461,11 +532,11 @@ static CPU get_amd_processor_name(uint32_t family, uint32_t model, const uint32_
461532
return CPU::amd_bdver1;
462533
return CPU::amd_btver1; // fallback
463534
case 22:
464-
if (!test_nbit(features, Feature::avx))
465-
return CPU::amd_btver1;
466535
return CPU::amd_btver2;
467536
case 23:
468-
if (test_nbit(features, Feature::adx))
537+
if ((model >= 0x30 && model <= 0x3f) || model == 0x71)
538+
return CPU::amd_znver2;
539+
if (model <= 0x0f)
469540
return CPU::amd_znver1;
470541
return CPU::amd_btver1;
471542
}
@@ -794,9 +865,10 @@ static void ensure_jit_target(bool imaging)
794865
// The most useful one in general...
795866
t.en.flags |= JL_TARGET_CLONE_LOOP;
796867
auto &features0 = jit_targets[t.base].en.features;
797-
// Special case for KNL since it's so different
868+
// Special case for KNL/KNM since they're so different
798869
if (!(t.dis.flags & JL_TARGET_CLONE_ALL)) {
799-
if (t.name == "knl" && jit_targets[t.base].name != "knl") {
870+
if ((t.name == "knl" || t.name == "knm") &&
871+
jit_targets[t.base].name != "knl" && jit_targets[t.base].name != "knm") {
800872
t.en.flags |= JL_TARGET_CLONE_ALL;
801873
break;
802874
}

0 commit comments

Comments
 (0)