Skip to content

Commit 42c4fb7

Browse files
author
Ingo Molnar
committed
Merge tag 'perf-core-for-mingo-20160530' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: User visible/kernel ABI changes: - Per event callchain limit: Recently we introduced a sysctl to tune the max-stack for all events for which callchains were requested: $ sysctl kernel.perf_event_max_stack kernel.perf_event_max_stack = 127 Now this patch introduces a way to configure this per event, i.e. this becomes possible: $ perf record -e sched:*/max-stack=2/ -e block:*/max-stack=10/ -a allowing finer tuning of how much buffer space callchains use. This uses an u16 from the reserved space at the end, leaving another u16 for future use. There has been interest in even finer tuning, namely to control the max stack for kernel and userspace callchains separately. Further discussion is needed, we may for instance use the remaining u16 for that and when it is present, assume that the sample_max_stack introduced in this patch applies for the kernel, and the u16 left is used for limiting the userspace callchain. (Arnaldo Carvalho de Melo) Infrastructure changes: - Adopt get_main_thread from db-export.c (Andi Kleen) - More prep work for backward ring buffer support (Wang Nan) - Prep work for supporting SDT (Statically Defined Tracing) tracepoints (Masami Hiramatsu) - Add arch/*/include/generated/ to .gitignore (Taeung Song) Signed-off-by: Arnaldo Carvalho de Melo <[email protected]> Signed-off-by: Ingo Molnar <[email protected]>
2 parents 7114605 + 0141226 commit 42c4fb7

File tree

27 files changed

+252
-100
lines changed

27 files changed

+252
-100
lines changed

include/linux/perf_event.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1076,7 +1076,7 @@ extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct
10761076
extern struct perf_callchain_entry *
10771077
get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
10781078
u32 max_stack, bool crosstask, bool add_mark);
1079-
extern int get_callchain_buffers(void);
1079+
extern int get_callchain_buffers(int max_stack);
10801080
extern void put_callchain_buffers(void);
10811081

10821082
extern int sysctl_perf_event_max_stack;

include/uapi/linux/perf_event.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,9 @@ enum perf_event_read_format {
276276

277277
/*
278278
* Hardware event_id to monitor via a performance monitoring event:
279+
*
280+
* @sample_max_stack: Max number of frame pointers in a callchain,
281+
* should be < /proc/sys/kernel/perf_event_max_stack
279282
*/
280283
struct perf_event_attr {
281284

@@ -385,7 +388,8 @@ struct perf_event_attr {
385388
* Wakeup watermark for AUX area
386389
*/
387390
__u32 aux_watermark;
388-
__u32 __reserved_2; /* align to __u64 */
391+
__u16 sample_max_stack;
392+
__u16 __reserved_2; /* align to __u64 */
389393
};
390394

391395
#define perf_flags(attr) (*(&(attr)->read_format + 1))

kernel/bpf/stackmap.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
9999
if (err)
100100
goto free_smap;
101101

102-
err = get_callchain_buffers();
102+
err = get_callchain_buffers(sysctl_perf_event_max_stack);
103103
if (err)
104104
goto free_smap;
105105

kernel/events/callchain.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ static int alloc_callchain_buffers(void)
104104
return -ENOMEM;
105105
}
106106

107-
int get_callchain_buffers(void)
107+
int get_callchain_buffers(int event_max_stack)
108108
{
109109
int err = 0;
110110
int count;
@@ -121,6 +121,15 @@ int get_callchain_buffers(void)
121121
/* If the allocation failed, give up */
122122
if (!callchain_cpus_entries)
123123
err = -ENOMEM;
124+
/*
125+
* If requesting per event more than the global cap,
126+
* return a different error to help userspace figure
127+
* this out.
128+
*
129+
* And also do it here so that we have &callchain_mutex held.
130+
*/
131+
if (event_max_stack > sysctl_perf_event_max_stack)
132+
err = -EOVERFLOW;
124133
goto exit;
125134
}
126135

@@ -174,11 +183,12 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
174183
bool user = !event->attr.exclude_callchain_user;
175184
/* Disallow cross-task user callchains. */
176185
bool crosstask = event->ctx->task && event->ctx->task != current;
186+
const u32 max_stack = event->attr.sample_max_stack;
177187

178188
if (!kernel && !user)
179189
return NULL;
180190

181-
return get_perf_callchain(regs, 0, kernel, user, sysctl_perf_event_max_stack, crosstask, true);
191+
return get_perf_callchain(regs, 0, kernel, user, max_stack, crosstask, true);
182192
}
183193

184194
struct perf_callchain_entry *

kernel/events/core.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8843,7 +8843,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
88438843

88448844
if (!event->parent) {
88458845
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
8846-
err = get_callchain_buffers();
8846+
err = get_callchain_buffers(attr->sample_max_stack);
88478847
if (err)
88488848
goto err_addr_filters;
88498849
}
@@ -9165,6 +9165,9 @@ SYSCALL_DEFINE5(perf_event_open,
91659165
return -EINVAL;
91669166
}
91679167

9168+
if (!attr.sample_max_stack)
9169+
attr.sample_max_stack = sysctl_perf_event_max_stack;
9170+
91689171
/*
91699172
* In cgroup mode, the pid argument is used to pass the fd
91709173
* opened to the cgroup directory in cgroupfs. The cpu argument

tools/lib/api/fd/array.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,8 @@ int fdarray__add(struct fdarray *fda, int fd, short revents)
8585
}
8686

8787
int fdarray__filter(struct fdarray *fda, short revents,
88-
void (*entry_destructor)(struct fdarray *fda, int fd))
88+
void (*entry_destructor)(struct fdarray *fda, int fd, void *arg),
89+
void *arg)
8990
{
9091
int fd, nr = 0;
9192

@@ -95,7 +96,7 @@ int fdarray__filter(struct fdarray *fda, short revents,
9596
for (fd = 0; fd < fda->nr; ++fd) {
9697
if (fda->entries[fd].revents & revents) {
9798
if (entry_destructor)
98-
entry_destructor(fda, fd);
99+
entry_destructor(fda, fd, arg);
99100

100101
continue;
101102
}

tools/lib/api/fd/array.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ void fdarray__delete(struct fdarray *fda);
3434
int fdarray__add(struct fdarray *fda, int fd, short revents);
3535
int fdarray__poll(struct fdarray *fda, int timeout);
3636
int fdarray__filter(struct fdarray *fda, short revents,
37-
void (*entry_destructor)(struct fdarray *fda, int fd));
37+
void (*entry_destructor)(struct fdarray *fda, int fd, void *arg),
38+
void *arg);
3839
int fdarray__grow(struct fdarray *fda, int extra);
3940
int fdarray__fprintf(struct fdarray *fda, FILE *fp);
4041

tools/perf/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,4 @@ config.mak.autogen
3030
*.pyo
3131
.config-detected
3232
util/intel-pt-decoder/inat-tables.c
33+
arch/*/include/generated/

tools/perf/arch/x86/util/tsc.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
6262
struct perf_tsc_conversion tc;
6363
int err;
6464

65+
if (!pc)
66+
return 0;
6567
err = perf_read_tsc_conversion(pc, &tc);
6668
if (err == -EOPNOTSUPP)
6769
return 0;

tools/perf/builtin-record.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -655,6 +655,13 @@ perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused
655655
return 0;
656656
}
657657

658+
static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
659+
{
660+
if (rec->evlist && rec->evlist->mmap && rec->evlist->mmap[0].base)
661+
return rec->evlist->mmap[0].base;
662+
return NULL;
663+
}
664+
658665
static int record__synthesize(struct record *rec)
659666
{
660667
struct perf_session *session = rec->session;
@@ -692,7 +699,7 @@ static int record__synthesize(struct record *rec)
692699
}
693700
}
694701

695-
err = perf_event__synth_time_conv(rec->evlist->mmap[0].base, tool,
702+
err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
696703
process_synthesized_event, machine);
697704
if (err)
698705
goto out;

0 commit comments

Comments
 (0)