Skip to content

Commit 102c932

Browse files
committed
tracing: Add __tracepoint_string() to export string pointers
There are several tracepoints (mostly in RCU), that reference a string pointer and uses the print format of "%s" to display the string that exists in the kernel, instead of copying the actual string to the ring buffer (saves time and ring buffer space). But this has an issue with userspace tools that read the binary buffers that has the address of the string but has no access to what the string itself is. The end result is just output that looks like: rcu_dyntick: ffffffff818adeaa 1 0 rcu_dyntick: ffffffff818adeb5 0 140000000000000 rcu_dyntick: ffffffff818adeb5 0 140000000000000 rcu_utilization: ffffffff8184333b rcu_utilization: ffffffff8184333b The above is pretty useless when read by the userspace tools. Ideally we would want something that looks like this: rcu_dyntick: Start 1 0 rcu_dyntick: End 0 140000000000000 rcu_dyntick: Start 140000000000000 0 rcu_callback: rcu_preempt rhp=0xffff880037aff710 func=put_cred_rcu 0/4 rcu_callback: rcu_preempt rhp=0xffff880078961980 func=file_free_rcu 0/5 rcu_dyntick: End 0 1 The trace_printk() which also only stores the address of the string format instead of recording the string into the buffer itself, exports the mapping of kernel addresses to format strings via the printk_format file in the debugfs tracing directory. The tracepoint strings can use this same method and output the format to the same file and the userspace tools will be able to decipher the address without any modification. The tracepoint strings need its own section to save the strings because the trace_printk section will cause the trace_printk() buffers to be allocated if anything exists within the section. trace_printk() is only used for debugging and should never exist in the kernel, we can not use the trace_printk sections. Add a new tracepoint_str section that will also be examined by the output of the printk_format file. Cc: Paul E. McKenney <[email protected]> Signed-off-by: Steven Rostedt <[email protected]>
1 parent 3b2f64d commit 102c932

File tree

4 files changed

+62
-1
lines changed

4 files changed

+62
-1
lines changed

include/asm-generic/vmlinux.lds.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,12 @@
122122
#define TRACE_PRINTKS() VMLINUX_SYMBOL(__start___trace_bprintk_fmt) = .; \
123123
*(__trace_printk_fmt) /* Trace_printk fmt' pointer */ \
124124
VMLINUX_SYMBOL(__stop___trace_bprintk_fmt) = .;
125+
#define TRACEPOINT_STR() VMLINUX_SYMBOL(__start___tracepoint_str) = .; \
126+
*(__tracepoint_str) /* Trace_printk fmt' pointer */ \
127+
VMLINUX_SYMBOL(__stop___tracepoint_str) = .;
125128
#else
126129
#define TRACE_PRINTKS()
130+
#define TRACEPOINT_STR()
127131
#endif
128132

129133
#ifdef CONFIG_FTRACE_SYSCALLS
@@ -190,7 +194,8 @@
190194
VMLINUX_SYMBOL(__stop___verbose) = .; \
191195
LIKELY_PROFILE() \
192196
BRANCH_PROFILE() \
193-
TRACE_PRINTKS()
197+
TRACE_PRINTKS() \
198+
TRACEPOINT_STR()
194199

195200
/*
196201
* Data section helpers

include/linux/ftrace_event.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,40 @@ do { \
357357
__trace_printk(ip, fmt, ##args); \
358358
} while (0)
359359

360+
/**
361+
* tracepoint_string - register constant persistent string to trace system
362+
* @str - a constant persistent string that will be referenced in tracepoints
363+
*
364+
* If constant strings are being used in tracepoints, it is faster and
365+
* more efficient to just save the pointer to the string and reference
366+
* that with a printf "%s" instead of saving the string in the ring buffer
367+
* and wasting space and time.
368+
*
369+
* The problem with the above approach is that userspace tools that read
370+
* the binary output of the trace buffers do not have access to the string.
371+
* Instead they just show the address of the string which is not very
372+
* useful to users.
373+
*
374+
* With tracepoint_string(), the string will be registered to the tracing
375+
* system and exported to userspace via the debugfs/tracing/printk_formats
376+
* file that maps the string address to the string text. This way userspace
377+
* tools that read the binary buffers have a way to map the pointers to
378+
* the ASCII strings they represent.
379+
*
380+
* The @str used must be a constant string and persistent as it would not
381+
* make sense to show a string that no longer exists. But it is still fine
382+
* to be used with modules, because when modules are unloaded, if they
383+
* had tracepoints, the ring buffers are cleared too. As long as the string
384+
* does not change during the life of the module, it is fine to use
385+
* tracepoint_string() within a module.
386+
*/
387+
#define tracepoint_string(str) \
388+
({ \
389+
static const char *___tp_str __tracepoint_string = str; \
390+
___tp_str; \
391+
})
392+
#define __tracepoint_string __attribute__((section("__tracepoint_str")))
393+
360394
#ifdef CONFIG_PERF_EVENTS
361395
struct perf_event;
362396

kernel/trace/trace.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,6 +1022,9 @@ extern struct list_head ftrace_events;
10221022
extern const char *__start___trace_bprintk_fmt[];
10231023
extern const char *__stop___trace_bprintk_fmt[];
10241024

1025+
extern const char *__start___tracepoint_str[];
1026+
extern const char *__stop___tracepoint_str[];
1027+
10251028
void trace_printk_init_buffers(void);
10261029
void trace_printk_start_comm(void);
10271030
int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set);

kernel/trace/trace_printk.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,12 +244,31 @@ static const char **find_next(void *v, loff_t *pos)
244244
{
245245
const char **fmt = v;
246246
int start_index;
247+
int last_index;
247248

248249
start_index = __stop___trace_bprintk_fmt - __start___trace_bprintk_fmt;
249250

250251
if (*pos < start_index)
251252
return __start___trace_bprintk_fmt + *pos;
252253

254+
/*
255+
* The __tracepoint_str section is treated the same as the
256+
* __trace_printk_fmt section. The difference is that the
257+
* __trace_printk_fmt section should only be used by trace_printk()
258+
* in a debugging environment, as if anything exists in that section
259+
* the trace_prink() helper buffers are allocated, which would just
260+
* waste space in a production environment.
261+
*
262+
* The __tracepoint_str sections on the other hand are used by
263+
* tracepoints which need to map pointers to their strings to
264+
* the ASCII text for userspace.
265+
*/
266+
last_index = start_index;
267+
start_index = __stop___tracepoint_str - __start___tracepoint_str;
268+
269+
if (*pos < last_index + start_index)
270+
return __start___tracepoint_str + (*pos - last_index);
271+
253272
return find_next_mod_format(start_index, v, fmt, pos);
254273
}
255274

0 commit comments

Comments
 (0)