@@ -60,6 +60,8 @@ enum class CPU : uint32_t {
6060 intel_atom_bonnell,
6161 intel_atom_silvermont,
6262 intel_atom_goldmont,
63+ intel_atom_goldmont_plus,
64+ intel_atom_tremont,
6365 intel_core2,
6466 intel_core2_penryn,
6567 intel_yonah,
@@ -71,8 +73,14 @@ enum class CPU : uint32_t {
7173 intel_corei7_broadwell,
7274 intel_corei7_skylake,
7375 intel_corei7_skylake_avx512,
76+ intel_corei7_cascadelake,
77+ intel_corei7_cooperlake,
7478 intel_corei7_cannonlake,
79+ intel_corei7_icelake_client,
80+ intel_corei7_icelake_server,
81+ intel_corei7_tigerlake,
7582 intel_knights_landing,
83+ intel_knights_mill,
7684
7785 amd_fam10h,
7886 amd_athlon_fx,
@@ -90,6 +98,7 @@ enum class CPU : uint32_t {
9098 amd_opteron_sse3,
9199 amd_barcelona,
92100 amd_znver1,
101+ amd_znver2,
93102};
94103
95104static constexpr size_t feature_sz = 11 ;
@@ -160,27 +169,41 @@ static constexpr FeatureDep deps[] = {
160169constexpr auto generic = get_feature_masks(cx16);
161170constexpr auto bonnell = get_feature_masks(sse3, ssse3, cx16, movbe, sahf);
162171constexpr auto silvermont = bonnell | get_feature_masks(sse41, sse42, popcnt,
163- pclmul, aes, prfchw);
164- constexpr auto goldmont = silvermont | get_feature_masks(sha, rdrnd, rdseed, xsave,
165- xsaveopt, xsavec, xsaves, clflushopt);
172+ pclmul, prfchw, rdrnd);
173+ constexpr auto goldmont = silvermont | get_feature_masks(aes, sha, rdseed, xsave, xsaveopt,
174+ xsavec, xsaves, clflushopt, fsgsbase);
175+ constexpr auto goldmont_plus = goldmont | get_feature_masks(ptwrite, rdpid); // sgx
176+ constexpr auto tremont = goldmont_plus | get_feature_masks(clwb, gfni);
177+ constexpr auto knl = get_feature_masks(sse3, ssse3, sse41, sse42, cx16, sahf, popcnt,
178+ aes, pclmul, avx, xsave, xsaveopt, rdrnd, f16c, fsgsbase,
179+ avx2, bmi, bmi2, fma, lzcnt, movbe, adx, rdseed, prfchw,
180+ avx512f, avx512er, avx512cd, avx512pf, prefetchwt1);
181+ constexpr auto knm = knl | get_feature_masks(avx512vpopcntdq);
166182constexpr auto yonah = get_feature_masks(sse3);
167183constexpr auto prescott = yonah;
168184constexpr auto core2 = get_feature_masks(sse3, ssse3, cx16, sahf);
169185constexpr auto nocona = get_feature_masks(sse3, cx16);
170186constexpr auto penryn = nocona | get_feature_masks(ssse3, sse41, sahf);
171187constexpr auto nehalem = penryn | get_feature_masks(sse42, popcnt);
172- constexpr auto westmere = nehalem | get_feature_masks(aes, pclmul);
188+ constexpr auto westmere = nehalem | get_feature_masks(pclmul);
173189constexpr auto sandybridge = westmere | get_feature_masks(avx, xsave, xsaveopt);
174190constexpr auto ivybridge = sandybridge | get_feature_masks(rdrnd, f16c, fsgsbase);
175191constexpr auto haswell = ivybridge | get_feature_masks(avx2, bmi, bmi2, fma, lzcnt, movbe);
176192constexpr auto broadwell = haswell | get_feature_masks(adx, rdseed, prfchw);
177- constexpr auto skylake = broadwell | get_feature_masks(rtm, xsavec, xsaves,
178- clflushopt); // ignore sgx; hle
179- constexpr auto knl = broadwell | get_feature_masks(avx512f, avx512er, avx512cd, avx512pf,
180- prefetchwt1);
193+ constexpr auto skylake = broadwell | get_feature_masks(aes, xsavec, xsaves, clflushopt); // sgx
181194constexpr auto skx = skylake | get_feature_masks(avx512f, avx512cd, avx512dq, avx512bw, avx512vl,
182195 pku, clwb);
183- constexpr auto cannonlake = skx | get_feature_masks(avx512vbmi, avx512ifma, sha);
196+ constexpr auto cascadelake = skx | get_feature_masks(avx512vnni);
197+ constexpr auto cooperlake = cascadelake | get_feature_masks(avx512bf16);
198+ constexpr auto cannonlake = skylake | get_feature_masks(avx512f, avx512cd, avx512dq, avx512bw,
199+ avx512vl, pku, avx512vbmi, avx512ifma,
200+ sha); // sgx
201+ constexpr auto icelake = cannonlake | get_feature_masks(avx512bitalg, vaes, avx512vbmi2,
202+ vpclmulqdq, avx512vpopcntdq,
203+ gfni, clwb, rdpid);
204+ constexpr auto icelake_server = icelake | get_feature_masks(pconfig, wbnoinvd);
205+ constexpr auto tigerlake = icelake | get_feature_masks(avx512vp2intersect, movdiri,
206+ movdir64b, shstk);
184207
185208constexpr auto k8_sse3 = get_feature_masks(sse3, cx16);
186209constexpr auto amdfam10 = k8_sse3 | get_feature_masks(sse4a, lzcnt, popcnt, sahf);
@@ -195,8 +218,9 @@ constexpr auto bdver2 = bdver1 | get_feature_masks(f16c, bmi, tbm, fma);
195218constexpr auto bdver3 = bdver2 | get_feature_masks(xsaveopt, fsgsbase);
196219constexpr auto bdver4 = bdver3 | get_feature_masks(avx2, bmi2, mwaitx);
197220
198- constexpr auto znver1 = haswell | get_feature_masks(adx, clflushopt, clzero, mwaitx, prfchw,
221+ constexpr auto znver1 = haswell | get_feature_masks(adx, aes, clflushopt, clzero, mwaitx, prfchw,
199222 rdseed, sha, sse4a, xsavec, xsaves);
223+ constexpr auto znver2 = znver1 | get_feature_masks(clwb, rdpid, wbnoinvd);
200224
201225}
202226
@@ -205,6 +229,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
205229 {" bonnell" , CPU::intel_atom_bonnell, CPU::generic, 0 , Feature::bonnell},
206230 {" silvermont" , CPU::intel_atom_silvermont, CPU::generic, 0 , Feature::silvermont},
207231 {" goldmont" , CPU::intel_atom_goldmont, CPU::generic, 0 , Feature::goldmont},
232+ {" goldmont-plus" , CPU::intel_atom_goldmont_plus, CPU::generic, 0 , Feature::goldmont_plus},
233+ {" tremont" , CPU::intel_atom_tremont, CPU::generic, 0 , Feature::tremont},
208234 {" core2" , CPU::intel_core2, CPU::generic, 0 , Feature::core2},
209235 {" yonah" , CPU::intel_yonah, CPU::generic, 0 , Feature::yonah},
210236 {" prescott" , CPU::intel_prescott, CPU::generic, 0 , Feature::prescott},
@@ -218,8 +244,17 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
218244 {" broadwell" , CPU::intel_corei7_broadwell, CPU::generic, 0 , Feature::broadwell},
219245 {" skylake" , CPU::intel_corei7_skylake, CPU::generic, 0 , Feature::skylake},
220246 {" knl" , CPU::intel_knights_landing, CPU::generic, 0 , Feature::knl},
247+ {" knm" , CPU::intel_knights_mill, CPU::generic, 0 , Feature::knm},
221248 {" skylake-avx512" , CPU::intel_corei7_skylake_avx512, CPU::generic, 0 , Feature::skx},
249+ {" cascadelake" , CPU::intel_corei7_cascadelake, CPU::generic, 0 , Feature::cascadelake},
250+ {" cooperlake" , CPU::intel_corei7_cooperlake, CPU::intel_corei7_cascadelake,
251+ 90000 , Feature::cooperlake},
222252 {" cannonlake" , CPU::intel_corei7_cannonlake, CPU::generic, 0 , Feature::cannonlake},
253+ {" icelake-client" , CPU::intel_corei7_icelake_client, CPU::generic, 0 , Feature::icelake},
254+ {" icelake-server" , CPU::intel_corei7_icelake_server, CPU::generic, 0 ,
255+ Feature::icelake_server},
256+ {" tigerlake" , CPU::intel_corei7_tigerlake, CPU::intel_corei7_icelake_client, 100000 ,
257+ Feature::tigerlake},
223258
224259 {" athlon64" , CPU::amd_athlon_64, CPU::generic, 0 , Feature::generic},
225260 {" athlon-fx" , CPU::amd_athlon_fx, CPU::generic, 0 , Feature::generic},
@@ -242,6 +277,7 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
242277 {" bdver4" , CPU::amd_bdver4, CPU::generic, 0 , Feature::bdver4},
243278
244279 {" znver1" , CPU::amd_znver1, CPU::generic, 0 , Feature::znver1},
280+ {" znver2" , CPU::amd_znver2, CPU::amd_znver1, 90000 , Feature::znver2},
245281};
246282static constexpr size_t ncpu_names = sizeof (cpus) / sizeof (cpus[0 ]);
247283
@@ -346,11 +382,37 @@ static CPU get_intel_processor_name(uint32_t family, uint32_t model, uint32_t br
346382 case 0x5e : // Skylake desktop
347383 case 0x8e : // Kaby Lake mobile
348384 case 0x9e : // Kaby Lake desktop
385+ case 0xa5 : // Comet Lake-H/S
386+ case 0xa6 : // Comet Lake-U
349387 return CPU::intel_corei7_skylake;
350388
351389 // Skylake Xeon:
352390 case 0x55 :
353- return CPU::intel_corei7_skylake;
391+ if (test_nbit (features, Feature::avx512bf16))
392+ return CPU::intel_corei7_cooperlake;
393+ if (test_nbit (features, Feature::avx512vnni))
394+ return CPU::intel_corei7_cascadelake;
395+ return CPU::intel_corei7_skylake_avx512;
396+
397+ // Cannonlake:
398+ case 0x66 :
399+ return CPU::intel_corei7_cannonlake;
400+
401+ // Icelake:
402+ case 0x7d :
403+ case 0x7e :
404+ case 0x9d :
405+ return CPU::intel_corei7_icelake_client;
406+
407+ // Icelake Xeon:
408+ case 0x6a :
409+ case 0x6c :
410+ return CPU::intel_corei7_icelake_server;
411+
412+ // Tiger Lake
413+ case 0x8c :
414+ case 0x8d :
415+ return CPU::intel_corei7_tigerlake;
354416
355417 case 0x1c : // Most 45 nm Intel Atom processors
356418 case 0x26 : // 45 nm Atom Lincroft
@@ -363,19 +425,30 @@ static CPU get_intel_processor_name(uint32_t family, uint32_t model, uint32_t br
363425 case 0x37 :
364426 case 0x4a :
365427 case 0x4d :
366- case 0x5a :
367428 case 0x5d :
368- case 0x4c : // really airmont
429+ // Airmont
430+ case 0x4c :
431+ case 0x5a :
432+ case 0x75 :
369433 return CPU::intel_atom_silvermont;
370434
371435 // Goldmont:
372436 case 0x5c :
373437 case 0x5f :
374438 return CPU::intel_atom_goldmont;
439+ case 0x7a :
440+ return CPU::intel_atom_goldmont_plus;
441+ case 0x86 :
442+ case 0x96 :
443+ case 0x9c :
444+ return CPU::intel_atom_tremont;
375445
376446 case 0x57 :
377447 return CPU::intel_knights_landing;
378448
449+ case 0x85 :
450+ return CPU::intel_knights_mill;
451+
379452 default :
380453 return CPU::generic;
381454 }
@@ -449,8 +522,6 @@ static CPU get_amd_processor_name(uint32_t family, uint32_t model, const uint32_
449522 case 20 :
450523 return CPU::amd_btver1;
451524 case 21 :
452- if (!test_nbit (features, Feature::avx))
453- return CPU::amd_btver1;
454525 if (model >= 0x50 && model <= 0x6f )
455526 return CPU::amd_bdver4;
456527 if (model >= 0x30 && model <= 0x3f )
@@ -461,11 +532,11 @@ static CPU get_amd_processor_name(uint32_t family, uint32_t model, const uint32_
461532 return CPU::amd_bdver1;
462533 return CPU::amd_btver1; // fallback
463534 case 22 :
464- if (!test_nbit (features, Feature::avx))
465- return CPU::amd_btver1;
466535 return CPU::amd_btver2;
467536 case 23 :
468- if (test_nbit (features, Feature::adx))
537+ if ((model >= 0x30 && model <= 0x3f ) || model == 0x71 )
538+ return CPU::amd_znver2;
539+ if (model <= 0x0f )
469540 return CPU::amd_znver1;
470541 return CPU::amd_btver1;
471542 }
@@ -794,9 +865,10 @@ static void ensure_jit_target(bool imaging)
794865 // The most useful one in general...
795866 t.en .flags |= JL_TARGET_CLONE_LOOP;
796867 auto &features0 = jit_targets[t.base ].en .features ;
797- // Special case for KNL since it's so different
868+ // Special case for KNL/KNM since they're so different
798869 if (!(t.dis .flags & JL_TARGET_CLONE_ALL)) {
799- if (t.name == " knl" && jit_targets[t.base ].name != " knl" ) {
870+ if ((t.name == " knl" || t.name == " knm" ) &&
871+ jit_targets[t.base ].name != " knl" && jit_targets[t.base ].name != " knm" ) {
800872 t.en .flags |= JL_TARGET_CLONE_ALL;
801873 break ;
802874 }
0 commit comments