Skip to content

Commit 25e5684

Browse files
sandip4nPeter Zijlstra
authored andcommitted
perf/x86/amd/uncore: Add memory controller support
Unified Memory Controller (UMC) events were introduced with Zen 4 as a part of the Performance Monitoring Version 2 (PerfMonV2) enhancements. An event is specified using the EventSelect bits and the RdWrMask bits can be used for additional filtering of read and write requests. As of now, a maximum of 12 channels of DDR5 are available on each socket and each channel is controlled by a dedicated UMC. Each UMC, in turn, has its own set of performance monitoring counters. Since the MSR address space for the UMC PERF_CTL and PERF_CTR registers are reused across sockets, uncore groups are created on the basis of socket IDs. Hence, group exclusivity is mandatory while opening events so that events for an UMC can only be opened on CPUs which are on the same socket as the corresponding memory channel. For each socket, the total number of available UMC counters and active memory channels are determined from CPUID leaf 0x80000022 EBX and ECX respectively. Usually, on Zen 4, each UMC has four counters. MSR assignments are determined on the basis of active UMCs. E.g. if UMCs 1, 4 and 9 are active for a given socket, then * UMC 1 gets MSRs 0xc0010800 to 0xc0010807 as PERF_CTLs and PERF_CTRs * UMC 4 gets MSRs 0xc0010808 to 0xc001080f as PERF_CTLs and PERF_CTRs * UMC 9 gets MSRs 0xc0010810 to 0xc0010817 as PERF_CTLs and PERF_CTRs If there are sockets without any online CPUs when the amd_uncore driver is loaded, UMCs for such sockets will not be discoverable since the mechanism relies on executing the CPUID instruction on an online CPU from the socket. Signed-off-by: Sandipan Das <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lore.kernel.org/r/b25f391205c22733493abec1ed850b71784edc5f.1696425185.git.sandipan.das@amd.com
1 parent 83a43c6 commit 25e5684

File tree

3 files changed

+168
-1
lines changed

3 files changed

+168
-1
lines changed

arch/x86/events/amd/uncore.c

Lines changed: 155 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ struct amd_uncore_pmu {
5555
enum {
5656
UNCORE_TYPE_DF,
5757
UNCORE_TYPE_L3,
58+
UNCORE_TYPE_UMC,
5859

5960
UNCORE_TYPE_MAX
6061
};
@@ -286,7 +287,7 @@ static struct device_attribute format_attr_##_var = \
286287
DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35");
287288
DEFINE_UNCORE_FORMAT_ATTR(event14, event, "config:0-7,32-35,59-60"); /* F17h+ DF */
288289
DEFINE_UNCORE_FORMAT_ATTR(event14v2, event, "config:0-7,32-37"); /* PerfMonV2 DF */
289-
DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3 */
290+
DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3, PerfMonV2 UMC */
290291
DEFINE_UNCORE_FORMAT_ATTR(umask8, umask, "config:8-15");
291292
DEFINE_UNCORE_FORMAT_ATTR(umask12, umask, "config:8-15,24-27"); /* PerfMonV2 DF */
292293
DEFINE_UNCORE_FORMAT_ATTR(coreid, coreid, "config:42-44"); /* F19h L3 */
@@ -296,6 +297,7 @@ DEFINE_UNCORE_FORMAT_ATTR(threadmask2, threadmask, "config:56-57"); /* F19h L
296297
DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3 */
297298
DEFINE_UNCORE_FORMAT_ATTR(enallcores, enallcores, "config:47"); /* F19h L3 */
298299
DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */
300+
DEFINE_UNCORE_FORMAT_ATTR(rdwrmask, rdwrmask, "config:8-9"); /* PerfMonV2 UMC */
299301

300302
/* Common DF and NB attributes */
301303
static struct attribute *amd_uncore_df_format_attr[] = {
@@ -312,6 +314,13 @@ static struct attribute *amd_uncore_l3_format_attr[] = {
312314
NULL,
313315
};
314316

317+
/* Common UMC attributes */
318+
static struct attribute *amd_uncore_umc_format_attr[] = {
319+
&format_attr_event8.attr, /* event */
320+
&format_attr_rdwrmask.attr, /* rdwrmask */
321+
NULL,
322+
};
323+
315324
/* F17h unique L3 attributes */
316325
static struct attribute *amd_f17h_uncore_l3_format_attr[] = {
317326
&format_attr_slicemask.attr, /* slicemask */
@@ -349,6 +358,11 @@ static struct attribute_group amd_f19h_uncore_l3_format_group = {
349358
.is_visible = amd_f19h_uncore_is_visible,
350359
};
351360

361+
static struct attribute_group amd_uncore_umc_format_group = {
362+
.name = "format",
363+
.attrs = amd_uncore_umc_format_attr,
364+
};
365+
352366
static const struct attribute_group *amd_uncore_df_attr_groups[] = {
353367
&amd_uncore_attr_group,
354368
&amd_uncore_df_format_group,
@@ -367,6 +381,12 @@ static const struct attribute_group *amd_uncore_l3_attr_update[] = {
367381
NULL,
368382
};
369383

384+
static const struct attribute_group *amd_uncore_umc_attr_groups[] = {
385+
&amd_uncore_attr_group,
386+
&amd_uncore_umc_format_group,
387+
NULL,
388+
};
389+
370390
static __always_inline
371391
int amd_uncore_ctx_cid(struct amd_uncore *uncore, unsigned int cpu)
372392
{
@@ -835,6 +855,133 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
835855
return amd_uncore_ctx_init(uncore, cpu);
836856
}
837857

858+
static int amd_uncore_umc_event_init(struct perf_event *event)
859+
{
860+
struct hw_perf_event *hwc = &event->hw;
861+
int ret = amd_uncore_event_init(event);
862+
863+
if (ret)
864+
return ret;
865+
866+
hwc->config = event->attr.config & AMD64_PERFMON_V2_RAW_EVENT_MASK_UMC;
867+
868+
return 0;
869+
}
870+
871+
static void amd_uncore_umc_start(struct perf_event *event, int flags)
872+
{
873+
struct hw_perf_event *hwc = &event->hw;
874+
875+
if (flags & PERF_EF_RELOAD)
876+
wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
877+
878+
hwc->state = 0;
879+
wrmsrl(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC));
880+
perf_event_update_userpage(event);
881+
}
882+
883+
static
884+
void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
885+
{
886+
union cpuid_0x80000022_ebx ebx;
887+
union amd_uncore_info info;
888+
unsigned int eax, ecx, edx;
889+
890+
if (pmu_version < 2)
891+
return;
892+
893+
cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx);
894+
info.split.aux_data = ecx; /* stash active mask */
895+
info.split.num_pmcs = ebx.split.num_umc_pmc;
896+
info.split.gid = topology_die_id(cpu);
897+
info.split.cid = topology_die_id(cpu);
898+
*per_cpu_ptr(uncore->info, cpu) = info;
899+
}
900+
901+
static
902+
int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
903+
{
904+
DECLARE_BITMAP(gmask, UNCORE_GROUP_MAX) = { 0 };
905+
u8 group_num_pmus[UNCORE_GROUP_MAX] = { 0 };
906+
u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 };
907+
union amd_uncore_info info;
908+
struct amd_uncore_pmu *pmu;
909+
int index = 0, gid, i;
910+
911+
if (pmu_version < 2)
912+
return 0;
913+
914+
/* Run just once */
915+
if (uncore->init_done)
916+
return amd_uncore_ctx_init(uncore, cpu);
917+
918+
/* Find unique groups */
919+
for_each_online_cpu(i) {
920+
info = *per_cpu_ptr(uncore->info, i);
921+
gid = info.split.gid;
922+
if (test_bit(gid, gmask))
923+
continue;
924+
925+
__set_bit(gid, gmask);
926+
group_num_pmus[gid] = hweight32(info.split.aux_data);
927+
group_num_pmcs[gid] = info.split.num_pmcs;
928+
uncore->num_pmus += group_num_pmus[gid];
929+
}
930+
931+
uncore->pmus = kzalloc(sizeof(*uncore->pmus) * uncore->num_pmus,
932+
GFP_KERNEL);
933+
if (!uncore->pmus) {
934+
uncore->num_pmus = 0;
935+
goto done;
936+
}
937+
938+
for_each_set_bit(gid, gmask, UNCORE_GROUP_MAX) {
939+
for (i = 0; i < group_num_pmus[gid]; i++) {
940+
pmu = &uncore->pmus[index];
941+
snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%d", index);
942+
pmu->num_counters = group_num_pmcs[gid] / group_num_pmus[gid];
943+
pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2;
944+
pmu->rdpmc_base = -1;
945+
pmu->group = gid;
946+
947+
pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
948+
if (!pmu->ctx)
949+
goto done;
950+
951+
pmu->pmu = (struct pmu) {
952+
.task_ctx_nr = perf_invalid_context,
953+
.attr_groups = amd_uncore_umc_attr_groups,
954+
.name = pmu->name,
955+
.event_init = amd_uncore_umc_event_init,
956+
.add = amd_uncore_add,
957+
.del = amd_uncore_del,
958+
.start = amd_uncore_umc_start,
959+
.stop = amd_uncore_stop,
960+
.read = amd_uncore_read,
961+
.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
962+
.module = THIS_MODULE,
963+
};
964+
965+
if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
966+
free_percpu(pmu->ctx);
967+
pmu->ctx = NULL;
968+
goto done;
969+
}
970+
971+
pr_info("%d %s counters detected\n", pmu->num_counters,
972+
pmu->pmu.name);
973+
974+
index++;
975+
}
976+
}
977+
978+
done:
979+
uncore->num_pmus = index;
980+
uncore->init_done = true;
981+
982+
return amd_uncore_ctx_init(uncore, cpu);
983+
}
984+
838985
static struct amd_uncore uncores[UNCORE_TYPE_MAX] = {
839986
/* UNCORE_TYPE_DF */
840987
{
@@ -850,6 +997,13 @@ static struct amd_uncore uncores[UNCORE_TYPE_MAX] = {
850997
.move = amd_uncore_ctx_move,
851998
.free = amd_uncore_ctx_free,
852999
},
1000+
/* UNCORE_TYPE_UMC */
1001+
{
1002+
.scan = amd_uncore_umc_ctx_scan,
1003+
.init = amd_uncore_umc_ctx_init,
1004+
.move = amd_uncore_ctx_move,
1005+
.free = amd_uncore_ctx_free,
1006+
},
8531007
};
8541008

8551009
static int __init amd_uncore_init(void)

arch/x86/include/asm/msr-index.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,10 @@
637637
/* AMD Last Branch Record MSRs */
638638
#define MSR_AMD64_LBR_SELECT 0xc000010e
639639

640+
/* Fam 19h MSRs */
641+
#define MSR_F19H_UMC_PERF_CTL 0xc0010800
642+
#define MSR_F19H_UMC_PERF_CTR 0xc0010801
643+
640644
/* Fam 17h MSRs */
641645
#define MSR_F17H_IRPERF 0xc00000e9
642646

arch/x86/include/asm/perf_event.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,13 @@
112112
(AMD64_PERFMON_V2_EVENTSEL_EVENT_NB | \
113113
AMD64_PERFMON_V2_EVENTSEL_UMASK_NB)
114114

115+
#define AMD64_PERFMON_V2_ENABLE_UMC BIT_ULL(31)
116+
#define AMD64_PERFMON_V2_EVENTSEL_EVENT_UMC GENMASK_ULL(7, 0)
117+
#define AMD64_PERFMON_V2_EVENTSEL_RDWRMASK_UMC GENMASK_ULL(9, 8)
118+
#define AMD64_PERFMON_V2_RAW_EVENT_MASK_UMC \
119+
(AMD64_PERFMON_V2_EVENTSEL_EVENT_UMC | \
120+
AMD64_PERFMON_V2_EVENTSEL_RDWRMASK_UMC)
121+
115122
#define AMD64_NUM_COUNTERS 4
116123
#define AMD64_NUM_COUNTERS_CORE 6
117124
#define AMD64_NUM_COUNTERS_NB 4
@@ -232,6 +239,8 @@ union cpuid_0x80000022_ebx {
232239
unsigned int lbr_v2_stack_sz:6;
233240
/* Number of Data Fabric Counters */
234241
unsigned int num_df_pmc:6;
242+
/* Number of Unified Memory Controller Counters */
243+
unsigned int num_umc_pmc:6;
235244
} split;
236245
unsigned int full;
237246
};

0 commit comments

Comments
 (0)