Skip to content

Commit ab43762

Browse files
virtuosoPeter Zijlstra
authored andcommitted
perf: Allow normal events to output AUX data
In some cases, ordinary (non-AUX) events can generate data for AUX events. For example, PEBS events can come out as records in the Intel PT stream instead of their usual DS records, if configured to do so. One requirement for such events is to consistently schedule together, to ensure that the data from the "AUX output" events isn't lost while their corresponding AUX event is not scheduled. We use grouping to provide this guarantee: an "AUX output" event can be added to a group where an AUX event is a group leader, and provided that the former supports writing to the latter. Signed-off-by: Alexander Shishkin <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Arnaldo Carvalho de Melo <[email protected]> Cc: [email protected] Link: https://lkml.kernel.org/r/[email protected]
1 parent 794b8be commit ab43762

File tree

3 files changed

+109
-1
lines changed

3 files changed

+109
-1
lines changed

include/linux/perf_event.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,7 @@ struct perf_event;
246246
#define PERF_PMU_CAP_ITRACE 0x20
247247
#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40
248248
#define PERF_PMU_CAP_NO_EXCLUDE 0x80
249+
#define PERF_PMU_CAP_AUX_OUTPUT 0x100
249250

250251
/**
251252
* struct pmu - generic performance monitoring unit
@@ -446,6 +447,16 @@ struct pmu {
446447
void (*addr_filters_sync) (struct perf_event *event);
447448
/* optional */
448449

450+
/*
451+
* Check if event can be used for aux_output purposes for
452+
* events of this PMU.
453+
*
454+
* Runs from perf_event_open(). Should return 0 for "no match"
455+
* or non-zero for "match".
456+
*/
457+
int (*aux_output_match) (struct perf_event *event);
458+
/* optional */
459+
449460
/*
450461
* Filter events for PMU-specific reasons.
451462
*/
@@ -681,6 +692,9 @@ struct perf_event {
681692
struct perf_addr_filter_range *addr_filter_ranges;
682693
unsigned long addr_filters_gen;
683694

695+
/* for aux_output events */
696+
struct perf_event *aux_event;
697+
684698
void (*destroy)(struct perf_event *);
685699
struct rcu_head rcu_head;
686700

include/uapi/linux/perf_event.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,8 @@ struct perf_event_attr {
374374
namespaces : 1, /* include namespaces data */
375375
ksymbol : 1, /* include ksymbol events */
376376
bpf_event : 1, /* include bpf events */
377-
__reserved_1 : 33;
377+
aux_output : 1, /* generate AUX records instead of events */
378+
__reserved_1 : 32;
378379

379380
union {
380381
__u32 wakeup_events; /* wakeup every n events */

kernel/events/core.c

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1887,6 +1887,89 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
18871887
ctx->generation++;
18881888
}
18891889

1890+
static int
1891+
perf_aux_output_match(struct perf_event *event, struct perf_event *aux_event)
1892+
{
1893+
if (!has_aux(aux_event))
1894+
return 0;
1895+
1896+
if (!event->pmu->aux_output_match)
1897+
return 0;
1898+
1899+
return event->pmu->aux_output_match(aux_event);
1900+
}
1901+
1902+
static void put_event(struct perf_event *event);
1903+
static void event_sched_out(struct perf_event *event,
1904+
struct perf_cpu_context *cpuctx,
1905+
struct perf_event_context *ctx);
1906+
1907+
static void perf_put_aux_event(struct perf_event *event)
1908+
{
1909+
struct perf_event_context *ctx = event->ctx;
1910+
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1911+
struct perf_event *iter;
1912+
1913+
/*
1914+
* If event uses aux_event tear down the link
1915+
*/
1916+
if (event->aux_event) {
1917+
iter = event->aux_event;
1918+
event->aux_event = NULL;
1919+
put_event(iter);
1920+
return;
1921+
}
1922+
1923+
/*
1924+
* If the event is an aux_event, tear down all links to
1925+
* it from other events.
1926+
*/
1927+
for_each_sibling_event(iter, event->group_leader) {
1928+
if (iter->aux_event != event)
1929+
continue;
1930+
1931+
iter->aux_event = NULL;
1932+
put_event(event);
1933+
1934+
/*
1935+
* If it's ACTIVE, schedule it out and put it into ERROR
1936+
* state so that we don't try to schedule it again. Note
1937+
* that perf_event_enable() will clear the ERROR status.
1938+
*/
1939+
event_sched_out(iter, cpuctx, ctx);
1940+
perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
1941+
}
1942+
}
1943+
1944+
static int perf_get_aux_event(struct perf_event *event,
1945+
struct perf_event *group_leader)
1946+
{
1947+
/*
1948+
* Our group leader must be an aux event if we want to be
1949+
* an aux_output. This way, the aux event will precede its
1950+
* aux_output events in the group, and therefore will always
1951+
* schedule first.
1952+
*/
1953+
if (!group_leader)
1954+
return 0;
1955+
1956+
if (!perf_aux_output_match(event, group_leader))
1957+
return 0;
1958+
1959+
if (!atomic_long_inc_not_zero(&group_leader->refcount))
1960+
return 0;
1961+
1962+
/*
1963+
* Link aux_outputs to their aux event; this is undone in
1964+
* perf_group_detach() by perf_put_aux_event(). When the
1965+
* group in torn down, the aux_output events loose their
1966+
* link to the aux_event and can't schedule any more.
1967+
*/
1968+
event->aux_event = group_leader;
1969+
1970+
return 1;
1971+
}
1972+
18901973
static void perf_group_detach(struct perf_event *event)
18911974
{
18921975
struct perf_event *sibling, *tmp;
@@ -1902,6 +1985,8 @@ static void perf_group_detach(struct perf_event *event)
19021985

19031986
event->attach_state &= ~PERF_ATTACH_GROUP;
19041987

1988+
perf_put_aux_event(event);
1989+
19051990
/*
19061991
* If this is a sibling, remove it from its group.
19071992
*/
@@ -10426,6 +10511,12 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
1042610511
goto err_ns;
1042710512
}
1042810513

10514+
if (event->attr.aux_output &&
10515+
!(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT)) {
10516+
err = -EOPNOTSUPP;
10517+
goto err_pmu;
10518+
}
10519+
1042910520
err = exclusive_event_init(event);
1043010521
if (err)
1043110522
goto err_pmu;
@@ -11082,6 +11173,8 @@ SYSCALL_DEFINE5(perf_event_open,
1108211173
}
1108311174
}
1108411175

11176+
if (event->attr.aux_output && !perf_get_aux_event(event, group_leader))
11177+
goto err_locked;
1108511178

1108611179
/*
1108711180
* Must be under the same ctx::mutex as perf_install_in_context(),

0 commit comments

Comments
 (0)