Skip to content

Commit f2fb6be

Browse files
kliang2Ingo Molnar
authored andcommitted
perf/core: Optimize side-band event delivery
The perf_event_aux() function iterates all PMUs and all events in their respective per-CPU contexts to find the events to deliver side-band records to. For example, the brk test case in lkp triggers many mmap() operations, which, if we're also running perf, results in many perf_event_aux() invocations. If we enable uncore PMU support (even when uncore events are not used), dozens of uncore PMUs will be iterated, which can significantly decrease brk_test's throughput. For example, the brk throughput: without uncore PMUs: 2647573 ops_per_sec with uncore PMUs: 1768444 ops_per_sec ... a 33% reduction. To get at the per-CPU events that need side-band records, this patch puts these events on a per-CPU list, this avoids iterating the PMUs and any events that do not need side-band records. Per task events are unchanged to avoid extra overhead on the context switch paths. Suggested-by: Peter Zijlstra (Intel) <[email protected]> Reported-by: Huang, Ying <[email protected]> Signed-off-by: Kan Liang <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Cc: Alexander Shishkin <[email protected]> Cc: Arnaldo Carvalho de Melo <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Stephane Eranian <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Vince Weaver <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent 42c4fb7 commit f2fb6be

File tree

2 files changed

+79
-12
lines changed

2 files changed

+79
-12
lines changed

include/linux/perf_event.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,11 @@ struct swevent_hlist {
517517
struct perf_cgroup;
518518
struct ring_buffer;
519519

520+
struct pmu_event_list {
521+
raw_spinlock_t lock;
522+
struct list_head list;
523+
};
524+
520525
/**
521526
* struct perf_event - performance event kernel representation:
522527
*/
@@ -675,6 +680,7 @@ struct perf_event {
675680
int cgrp_defer_enabled;
676681
#endif
677682

683+
struct list_head sb_list;
678684
#endif /* CONFIG_PERF_EVENTS */
679685
};
680686

kernel/events/core.c

Lines changed: 73 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,7 @@ static atomic_t perf_sched_count;
335335

336336
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
337337
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
338+
static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
338339

339340
static atomic_t nr_mmap_events __read_mostly;
340341
static atomic_t nr_comm_events __read_mostly;
@@ -3665,6 +3666,26 @@ static void free_event_rcu(struct rcu_head *head)
36653666
static void ring_buffer_attach(struct perf_event *event,
36663667
struct ring_buffer *rb);
36673668

3669+
static void detach_sb_event(struct perf_event *event)
3670+
{
3671+
struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
3672+
3673+
raw_spin_lock(&pel->lock);
3674+
list_del_rcu(&event->sb_list);
3675+
raw_spin_unlock(&pel->lock);
3676+
}
3677+
3678+
static void unaccount_pmu_sb_event(struct perf_event *event)
3679+
{
3680+
if (event->parent)
3681+
return;
3682+
3683+
if (event->attach_state & PERF_ATTACH_TASK)
3684+
return;
3685+
3686+
detach_sb_event(event);
3687+
}
3688+
36683689
static void unaccount_event_cpu(struct perf_event *event, int cpu)
36693690
{
36703691
if (event->parent)
@@ -3728,6 +3749,8 @@ static void unaccount_event(struct perf_event *event)
37283749
}
37293750

37303751
unaccount_event_cpu(event, event->cpu);
3752+
3753+
unaccount_pmu_sb_event(event);
37313754
}
37323755

37333756
static void perf_sched_delayed(struct work_struct *work)
@@ -5888,13 +5911,25 @@ perf_event_aux_task_ctx(perf_event_aux_output_cb output, void *data,
58885911
rcu_read_unlock();
58895912
}
58905913

5914+
static void perf_event_sb_iterate(perf_event_aux_output_cb output, void *data)
5915+
{
5916+
struct pmu_event_list *pel = this_cpu_ptr(&pmu_sb_events);
5917+
struct perf_event *event;
5918+
5919+
list_for_each_entry_rcu(event, &pel->list, sb_list) {
5920+
if (event->state < PERF_EVENT_STATE_INACTIVE)
5921+
continue;
5922+
if (!event_filter_match(event))
5923+
continue;
5924+
output(event, data);
5925+
}
5926+
}
5927+
58915928
static void
58925929
perf_event_aux(perf_event_aux_output_cb output, void *data,
58935930
struct perf_event_context *task_ctx)
58945931
{
5895-
struct perf_cpu_context *cpuctx;
58965932
struct perf_event_context *ctx;
5897-
struct pmu *pmu;
58985933
int ctxn;
58995934

59005935
/*
@@ -5909,20 +5944,15 @@ perf_event_aux(perf_event_aux_output_cb output, void *data,
59095944
}
59105945

59115946
rcu_read_lock();
5912-
list_for_each_entry_rcu(pmu, &pmus, entry) {
5913-
cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
5914-
if (cpuctx->unique_pmu != pmu)
5915-
goto next;
5916-
perf_event_aux_ctx(&cpuctx->ctx, output, data, false);
5917-
ctxn = pmu->task_ctx_nr;
5918-
if (ctxn < 0)
5919-
goto next;
5947+
preempt_disable();
5948+
perf_event_sb_iterate(output, data);
5949+
5950+
for_each_task_context_nr(ctxn) {
59205951
ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
59215952
if (ctx)
59225953
perf_event_aux_ctx(ctx, output, data, false);
5923-
next:
5924-
put_cpu_ptr(pmu->pmu_cpu_context);
59255954
}
5955+
preempt_enable();
59265956
rcu_read_unlock();
59275957
}
59285958

@@ -8615,6 +8645,32 @@ static struct pmu *perf_init_event(struct perf_event *event)
86158645
return pmu;
86168646
}
86178647

8648+
static void attach_sb_event(struct perf_event *event)
8649+
{
8650+
struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
8651+
8652+
raw_spin_lock(&pel->lock);
8653+
list_add_rcu(&event->sb_list, &pel->list);
8654+
raw_spin_unlock(&pel->lock);
8655+
}
8656+
8657+
static void account_pmu_sb_event(struct perf_event *event)
8658+
{
8659+
struct perf_event_attr *attr = &event->attr;
8660+
8661+
if (event->parent)
8662+
return;
8663+
8664+
if (event->attach_state & PERF_ATTACH_TASK)
8665+
return;
8666+
8667+
if (attr->mmap || attr->mmap_data || attr->mmap2 ||
8668+
attr->comm || attr->comm_exec ||
8669+
attr->task ||
8670+
attr->context_switch)
8671+
attach_sb_event(event);
8672+
}
8673+
86188674
static void account_event_cpu(struct perf_event *event, int cpu)
86198675
{
86208676
if (event->parent)
@@ -8695,6 +8751,8 @@ static void account_event(struct perf_event *event)
86958751
enabled:
86968752

86978753
account_event_cpu(event, event->cpu);
8754+
8755+
account_pmu_sb_event(event);
86988756
}
86998757

87008758
/*
@@ -10203,6 +10261,9 @@ static void __init perf_event_init_all_cpus(void)
1020310261
swhash = &per_cpu(swevent_htable, cpu);
1020410262
mutex_init(&swhash->hlist_mutex);
1020510263
INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu));
10264+
10265+
INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu));
10266+
raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu));
1020610267
}
1020710268
}
1020810269

0 commit comments

Comments
 (0)