Skip to content

Commit 71396cf

Browse files
ilkka-koskinenwilldeacon
authored andcommitted
perf/dwc_pcie: Support counting multiple lane events in parallel
While Designware PCIe PMU allows to count only one time based event at a time, it allows to count all the lane events simultaneously. After the patch one is able to count a group of lane events: $ perf stat -e '{dwc_rootport/tx_memory_write,lane=1/,dwc_rootport/rx_memory_read,lane=0/}' dd if=/dev/nvme0n1 of=/dev/null bs=1M count=1 Earlier the events wouldn't have been counted successfully. Signed-off-by: Ilkka Koskinen <[email protected]> Signed-off-by: Will Deacon <[email protected]>
1 parent 1e558fb commit 71396cf

File tree

2 files changed

+132
-33
lines changed

2 files changed

+132
-33
lines changed

Documentation/admin-guide/perf/dwc_pcie_pmu.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ provides the following two features:
1616

1717
- one 64-bit counter for Time Based Analysis (RX/TX data throughput and
1818
time spent in each low-power LTSSM state) and
19-
- one 32-bit counter for Event Counting (error and non-error events for
20-
a specified lane)
19+
- one 32-bit counter per event for Event Counting (error and non-error
20+
events for a specified lane)
2121

2222
Note: There is no interrupt for counter overflow.
2323

drivers/perf/dwc_pcie_pmu.c

Lines changed: 130 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@
3939
#define DWC_PCIE_EVENT_CLEAR GENMASK(1, 0)
4040
#define DWC_PCIE_EVENT_PER_CLEAR 0x1
4141

42+
/* Event Selection Field has two subfields */
43+
#define DWC_PCIE_CNT_EVENT_SEL_GROUP GENMASK(11, 8)
44+
#define DWC_PCIE_CNT_EVENT_SEL_EVID GENMASK(7, 0)
45+
4246
#define DWC_PCIE_EVENT_CNT_DATA 0xC
4347

4448
#define DWC_PCIE_TIME_BASED_ANAL_CTL 0x10
@@ -73,6 +77,10 @@ enum dwc_pcie_event_type {
7377
DWC_PCIE_EVENT_TYPE_MAX,
7478
};
7579

80+
#define DWC_PCIE_LANE_GROUP_6 6
81+
#define DWC_PCIE_LANE_GROUP_7 7
82+
#define DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP 256
83+
7684
#define DWC_PCIE_LANE_EVENT_MAX_PERIOD GENMASK_ULL(31, 0)
7785
#define DWC_PCIE_MAX_PERIOD GENMASK_ULL(63, 0)
7886

@@ -82,8 +90,11 @@ struct dwc_pcie_pmu {
8290
u16 ras_des_offset;
8391
u32 nr_lanes;
8492

93+
/* Groups #6 and #7 */
94+
DECLARE_BITMAP(lane_events, 2 * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP);
95+
struct perf_event *time_based_event;
96+
8597
struct hlist_node cpuhp_node;
86-
struct perf_event *event[DWC_PCIE_EVENT_TYPE_MAX];
8798
int on_cpu;
8899
};
89100

@@ -246,19 +257,26 @@ static const struct attribute_group *dwc_pcie_attr_groups[] = {
246257
};
247258

248259
static void dwc_pcie_pmu_lane_event_enable(struct dwc_pcie_pmu *pcie_pmu,
260+
struct perf_event *event,
249261
bool enable)
250262
{
251263
struct pci_dev *pdev = pcie_pmu->pdev;
252264
u16 ras_des_offset = pcie_pmu->ras_des_offset;
265+
int event_id = DWC_PCIE_EVENT_ID(event);
266+
int lane = DWC_PCIE_EVENT_LANE(event);
267+
u32 ctrl;
268+
269+
ctrl = FIELD_PREP(DWC_PCIE_CNT_EVENT_SEL, event_id) |
270+
FIELD_PREP(DWC_PCIE_CNT_LANE_SEL, lane) |
271+
FIELD_PREP(DWC_PCIE_EVENT_CLEAR, DWC_PCIE_EVENT_PER_CLEAR);
253272

254273
if (enable)
255-
pci_clear_and_set_config_dword(pdev,
256-
ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
257-
DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_ON);
274+
ctrl |= FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_ON);
258275
else
259-
pci_clear_and_set_config_dword(pdev,
260-
ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
261-
DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_OFF);
276+
ctrl |= FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_OFF);
277+
278+
pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
279+
ctrl);
262280
}
263281

264282
static void dwc_pcie_pmu_time_based_event_enable(struct dwc_pcie_pmu *pcie_pmu,
@@ -276,11 +294,22 @@ static u64 dwc_pcie_pmu_read_lane_event_counter(struct perf_event *event)
276294
{
277295
struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
278296
struct pci_dev *pdev = pcie_pmu->pdev;
297+
int event_id = DWC_PCIE_EVENT_ID(event);
298+
int lane = DWC_PCIE_EVENT_LANE(event);
279299
u16 ras_des_offset = pcie_pmu->ras_des_offset;
280-
u32 val;
300+
u32 val, ctrl;
281301

302+
ctrl = FIELD_PREP(DWC_PCIE_CNT_EVENT_SEL, event_id) |
303+
FIELD_PREP(DWC_PCIE_CNT_LANE_SEL, lane) |
304+
FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_ON);
305+
pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
306+
ctrl);
282307
pci_read_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_DATA, &val);
283308

309+
ctrl |= FIELD_PREP(DWC_PCIE_EVENT_CLEAR, DWC_PCIE_EVENT_PER_CLEAR);
310+
pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
311+
ctrl);
312+
284313
return val;
285314
}
286315

@@ -329,26 +358,77 @@ static void dwc_pcie_pmu_event_update(struct perf_event *event)
329358
{
330359
struct hw_perf_event *hwc = &event->hw;
331360
enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
332-
u64 delta, prev, now = 0;
361+
u64 delta, prev, now;
362+
363+
if (type == DWC_PCIE_LANE_EVENT) {
364+
now = dwc_pcie_pmu_read_lane_event_counter(event) &
365+
DWC_PCIE_LANE_EVENT_MAX_PERIOD;
366+
local64_add(now, &event->count);
367+
return;
368+
}
333369

334370
do {
335371
prev = local64_read(&hwc->prev_count);
336-
337-
if (type == DWC_PCIE_LANE_EVENT)
338-
now = dwc_pcie_pmu_read_lane_event_counter(event);
339-
else if (type == DWC_PCIE_TIME_BASE_EVENT)
340-
now = dwc_pcie_pmu_read_time_based_counter(event);
372+
now = dwc_pcie_pmu_read_time_based_counter(event);
341373

342374
} while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
343375

344376
delta = (now - prev) & DWC_PCIE_MAX_PERIOD;
345-
/* 32-bit counter for Lane Event Counting */
346-
if (type == DWC_PCIE_LANE_EVENT)
347-
delta &= DWC_PCIE_LANE_EVENT_MAX_PERIOD;
348-
349377
local64_add(delta, &event->count);
350378
}
351379

380+
static int dwc_pcie_pmu_validate_add_lane_event(struct perf_event *event,
381+
unsigned long val_lane_events[])
382+
{
383+
int event_id, event_nr, group;
384+
385+
event_id = DWC_PCIE_EVENT_ID(event);
386+
event_nr = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_EVID, event_id);
387+
group = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_GROUP, event_id);
388+
389+
if (group != DWC_PCIE_LANE_GROUP_6 && group != DWC_PCIE_LANE_GROUP_7)
390+
return -EINVAL;
391+
392+
group -= DWC_PCIE_LANE_GROUP_6;
393+
394+
if (test_and_set_bit(group * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP + event_nr,
395+
val_lane_events))
396+
return -EINVAL;
397+
398+
return 0;
399+
}
400+
401+
static int dwc_pcie_pmu_validate_group(struct perf_event *event)
402+
{
403+
struct perf_event *sibling, *leader = event->group_leader;
404+
DECLARE_BITMAP(val_lane_events, 2 * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP);
405+
bool time_event;
406+
int type;
407+
408+
type = DWC_PCIE_EVENT_TYPE(leader);
409+
if (type == DWC_PCIE_TIME_BASE_EVENT)
410+
time_event = true;
411+
else
412+
if (dwc_pcie_pmu_validate_add_lane_event(leader, val_lane_events))
413+
return -ENOSPC;
414+
415+
for_each_sibling_event(sibling, leader) {
416+
type = DWC_PCIE_EVENT_TYPE(sibling);
417+
if (type == DWC_PCIE_TIME_BASE_EVENT) {
418+
if (time_event)
419+
return -ENOSPC;
420+
421+
time_event = true;
422+
continue;
423+
}
424+
425+
if (dwc_pcie_pmu_validate_add_lane_event(sibling, val_lane_events))
426+
return -ENOSPC;
427+
}
428+
429+
return 0;
430+
}
431+
352432
static int dwc_pcie_pmu_event_init(struct perf_event *event)
353433
{
354434
struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
@@ -367,10 +447,6 @@ static int dwc_pcie_pmu_event_init(struct perf_event *event)
367447
if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK)
368448
return -EINVAL;
369449

370-
if (event->group_leader != event &&
371-
!is_software_event(event->group_leader))
372-
return -EINVAL;
373-
374450
for_each_sibling_event(sibling, event->group_leader) {
375451
if (sibling->pmu != event->pmu && !is_software_event(sibling))
376452
return -EINVAL;
@@ -385,6 +461,9 @@ static int dwc_pcie_pmu_event_init(struct perf_event *event)
385461
return -EINVAL;
386462
}
387463

464+
if (dwc_pcie_pmu_validate_group(event))
465+
return -ENOSPC;
466+
388467
event->cpu = pcie_pmu->on_cpu;
389468

390469
return 0;
@@ -400,7 +479,7 @@ static void dwc_pcie_pmu_event_start(struct perf_event *event, int flags)
400479
local64_set(&hwc->prev_count, 0);
401480

402481
if (type == DWC_PCIE_LANE_EVENT)
403-
dwc_pcie_pmu_lane_event_enable(pcie_pmu, true);
482+
dwc_pcie_pmu_lane_event_enable(pcie_pmu, event, true);
404483
else if (type == DWC_PCIE_TIME_BASE_EVENT)
405484
dwc_pcie_pmu_time_based_event_enable(pcie_pmu, true);
406485
}
@@ -414,12 +493,13 @@ static void dwc_pcie_pmu_event_stop(struct perf_event *event, int flags)
414493
if (event->hw.state & PERF_HES_STOPPED)
415494
return;
416495

496+
dwc_pcie_pmu_event_update(event);
497+
417498
if (type == DWC_PCIE_LANE_EVENT)
418-
dwc_pcie_pmu_lane_event_enable(pcie_pmu, false);
499+
dwc_pcie_pmu_lane_event_enable(pcie_pmu, event, false);
419500
else if (type == DWC_PCIE_TIME_BASE_EVENT)
420501
dwc_pcie_pmu_time_based_event_enable(pcie_pmu, false);
421502

422-
dwc_pcie_pmu_event_update(event);
423503
hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
424504
}
425505

@@ -434,14 +514,17 @@ static int dwc_pcie_pmu_event_add(struct perf_event *event, int flags)
434514
u16 ras_des_offset = pcie_pmu->ras_des_offset;
435515
u32 ctrl;
436516

437-
/* one counter for each type and it is in use */
438-
if (pcie_pmu->event[type])
439-
return -ENOSPC;
440-
441-
pcie_pmu->event[type] = event;
442517
hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
443518

444519
if (type == DWC_PCIE_LANE_EVENT) {
520+
int event_nr = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_EVID, event_id);
521+
int group = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_GROUP, event_id) -
522+
DWC_PCIE_LANE_GROUP_6;
523+
524+
if (test_and_set_bit(group * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP + event_nr,
525+
pcie_pmu->lane_events))
526+
return -ENOSPC;
527+
445528
/* EVENT_COUNTER_DATA_REG needs clear manually */
446529
ctrl = FIELD_PREP(DWC_PCIE_CNT_EVENT_SEL, event_id) |
447530
FIELD_PREP(DWC_PCIE_CNT_LANE_SEL, lane) |
@@ -450,6 +533,11 @@ static int dwc_pcie_pmu_event_add(struct perf_event *event, int flags)
450533
pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
451534
ctrl);
452535
} else if (type == DWC_PCIE_TIME_BASE_EVENT) {
536+
if (pcie_pmu->time_based_event)
537+
return -ENOSPC;
538+
539+
pcie_pmu->time_based_event = event;
540+
453541
/*
454542
* TIME_BASED_ANAL_DATA_REG is a 64 bit register, we can safely
455543
* use it with any manually controlled duration. And it is
@@ -478,7 +566,18 @@ static void dwc_pcie_pmu_event_del(struct perf_event *event, int flags)
478566

479567
dwc_pcie_pmu_event_stop(event, flags | PERF_EF_UPDATE);
480568
perf_event_update_userpage(event);
481-
pcie_pmu->event[type] = NULL;
569+
570+
if (type == DWC_PCIE_TIME_BASE_EVENT) {
571+
pcie_pmu->time_based_event = NULL;
572+
} else {
573+
int event_id = DWC_PCIE_EVENT_ID(event);
574+
int event_nr = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_EVID, event_id);
575+
int group = FIELD_GET(DWC_PCIE_CNT_EVENT_SEL_GROUP, event_id) -
576+
DWC_PCIE_LANE_GROUP_6;
577+
578+
clear_bit(group * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP + event_nr,
579+
pcie_pmu->lane_events);
580+
}
482581
}
483582

484583
static void dwc_pcie_pmu_remove_cpuhp_instance(void *hotplug_node)

0 commit comments

Comments
 (0)