Skip to content

Commit 3670a7d

Browse files
yuyichaoKristofferC
authored andcommitted
A few processor detection/features tweaks (#36831)
* Missing feature from Apple A13 * Enable Cortex-A78 and Cortex-X1 on LLVM 11 llvm/llvm-project@954db63 https://reviews.llvm.org/D83206 * More relaxed Zen detection: treat all family 23 as Zen* and treat all model >= 0x30 as Zen2. GCC uses a similar fallback structure albeit based on feature. This should still generate **correct** code since that is always controlled by available features. It should be as good a scheduling model estimate as anything else. Fix #36826 (cherry picked from commit cd3fb4d)
1 parent a19446e commit 3670a7d

File tree

2 files changed

+13
-11
lines changed

2 files changed

+13
-11
lines changed

src/processor_arm.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -296,8 +296,8 @@ constexpr auto arm_cortex_a73 = armv8a_crc;
296296
constexpr auto arm_cortex_a75 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16);
297297
constexpr auto arm_cortex_a76 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs);
298298
constexpr auto arm_cortex_a77 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs);
299-
constexpr auto arm_cortex_a78 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs);
300-
constexpr auto arm_cortex_x1 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs);
299+
constexpr auto arm_cortex_a78 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs); // spe
300+
constexpr auto arm_cortex_x1 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs); // spe
301301
constexpr auto arm_neoverse_e1 = armv8_2a | get_feature_masks(rcpc, fullfp16, ssbs);
302302
constexpr auto arm_neoverse_n1 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs);
303303
constexpr auto arm_zeus = armv8_4a | get_feature_masks(sve, i8mm, bf16, fullfp16, ssbs, rand);
@@ -331,7 +331,7 @@ constexpr auto apple_a7 = armv8a_crc_crypto;
331331
constexpr auto apple_a10 = armv8a_crc_crypto | get_feature_masks(rdm);
332332
constexpr auto apple_a11 = armv8_2a_crypto | get_feature_masks(fullfp16);
333333
constexpr auto apple_a12 = armv8_3a_crypto | get_feature_masks(fullfp16);
334-
constexpr auto apple_a13 = armv8_4a_crypto | get_feature_masks(fullfp16, sha3);
334+
constexpr auto apple_a13 = armv8_4a_crypto | get_feature_masks(fp16fml, fullfp16, sha3);
335335
constexpr auto apple_s4 = apple_a12;
336336
constexpr auto apple_s5 = apple_a12;
337337

@@ -358,8 +358,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
358358
{"cortex-a76", CPU::arm_cortex_a76, CPU::arm_cortex_a75, 90000, Feature::arm_cortex_a76},
359359
{"cortex-a76ae", CPU::arm_cortex_a76ae, CPU::arm_cortex_a75, 90000, Feature::arm_cortex_a76},
360360
{"cortex-a77", CPU::arm_cortex_a77, CPU::arm_cortex_a76, 110000, Feature::arm_cortex_a77},
361-
{"cortex-a78", CPU::arm_cortex_a78, CPU::arm_cortex_a77, UINT32_MAX, Feature::arm_cortex_a78},
362-
{"cortex-x1", CPU::arm_cortex_x1, CPU::arm_cortex_a78, UINT32_MAX, Feature::arm_cortex_x1},
361+
{"cortex-a78", CPU::arm_cortex_a78, CPU::arm_cortex_a77, 110000, Feature::arm_cortex_a78},
362+
{"cortex-x1", CPU::arm_cortex_x1, CPU::arm_cortex_a78, 110000, Feature::arm_cortex_x1},
363363
{"neoverse-e1", CPU::arm_neoverse_e1, CPU::arm_cortex_a76, 100000, Feature::arm_neoverse_e1},
364364
{"neoverse-n1", CPU::arm_neoverse_n1, CPU::arm_cortex_a76, 100000, Feature::arm_neoverse_n1},
365365
{"zeus", CPU::arm_zeus, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_zeus},
@@ -632,8 +632,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
632632
{"cortex-a76", CPU::arm_cortex_a76, CPU::arm_cortex_a75, 90000, Feature::arm_cortex_a76},
633633
{"cortex-a76ae", CPU::arm_cortex_a76ae, CPU::arm_cortex_a75, 90000, Feature::arm_cortex_a76},
634634
{"cortex-a77", CPU::arm_cortex_a77, CPU::arm_cortex_a76, 110000, Feature::arm_cortex_a77},
635-
{"cortex-a78", CPU::arm_cortex_a78, CPU::arm_cortex_a77, UINT32_MAX, Feature::arm_cortex_a78},
636-
{"cortex-x1", CPU::arm_cortex_x1, CPU::arm_cortex_a78, UINT32_MAX, Feature::arm_cortex_x1},
635+
{"cortex-a78", CPU::arm_cortex_a78, CPU::arm_cortex_a77, 110000, Feature::arm_cortex_a78},
636+
{"cortex-x1", CPU::arm_cortex_x1, CPU::arm_cortex_a78, 110000, Feature::arm_cortex_x1},
637637
{"neoverse-n1", CPU::arm_neoverse_n1, CPU::arm_cortex_a76, 100000, Feature::arm_neoverse_n1},
638638
{"denver1", CPU::nvidia_denver1, CPU::arm_cortex_a53, UINT32_MAX, Feature::nvidia_denver1},
639639
{"denver2", CPU::nvidia_denver2, CPU::arm_cortex_a57, UINT32_MAX, Feature::nvidia_denver2},

src/processor_x86.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -539,11 +539,13 @@ static CPU get_amd_processor_name(uint32_t family, uint32_t model, const uint32_
539539
case 22:
540540
return CPU::amd_btver2;
541541
case 23:
542-
if ((model >= 0x30 && model <= 0x3f) || model == 0x71)
542+
// Known models:
543+
// Zen: 1, 17
544+
// Zen+: 8, 24
545+
// Zen2: 96, 113
546+
if (model >= 0x30)
543547
return CPU::amd_znver2;
544-
if (model <= 0x0f)
545-
return CPU::amd_znver1;
546-
return CPU::amd_btver1;
548+
return CPU::amd_znver1;
547549
}
548550
}
549551

0 commit comments

Comments
 (0)