Skip to content

Commit 2a820bf

Browse files
committed
tracing: Use percpu stack trace buffer more intelligently
The per cpu stack trace buffer usage pattern is odd at best. The buffer has place for 512 stack trace entries on 64-bit and 1024 on 32-bit. When interrupts or exceptions nest after the per cpu buffer was acquired the stacktrace length is hardcoded to 8 entries. 512/1024 stack trace entries in kernel stacks are unrealistic so the buffer is a complete waste. Split the buffer into 4 nest levels, which are 128/256 entries per level. This allows nesting contexts (interrupts, exceptions) to utilize the cpu buffer for stack retrieval and avoids the fixed length allocation along with the conditional execution pathes. Signed-off-by: Thomas Gleixner <[email protected]> Reviewed-by: Josh Poimboeuf <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Steven Rostedt <[email protected]> Cc: Alexander Potapenko <[email protected]> Cc: Alexey Dobriyan <[email protected]> Cc: Andrew Morton <[email protected]> Cc: Christoph Lameter <[email protected]> Cc: Pekka Enberg <[email protected]> Cc: [email protected] Cc: David Rientjes <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: Dmitry Vyukov <[email protected]> Cc: Andrey Ryabinin <[email protected]> Cc: [email protected] Cc: Mike Rapoport <[email protected]> Cc: Akinobu Mita <[email protected]> Cc: Christoph Hellwig <[email protected]> Cc: [email protected] Cc: Robin Murphy <[email protected]> Cc: Marek Szyprowski <[email protected]> Cc: Johannes Thumshirn <[email protected]> Cc: David Sterba <[email protected]> Cc: Chris Mason <[email protected]> Cc: Josef Bacik <[email protected]> Cc: [email protected] Cc: [email protected] Cc: Mike Snitzer <[email protected]> Cc: Alasdair Kergon <[email protected]> Cc: Daniel Vetter <[email protected]> Cc: [email protected] Cc: Joonas Lahtinen <[email protected]> Cc: Maarten Lankhorst <[email protected]> Cc: [email protected] Cc: David Airlie <[email protected]> Cc: Jani Nikula <[email protected]> Cc: Rodrigo Vivi <[email protected]> Cc: Tom Zanussi <[email protected]> Cc: Miroslav Benes <[email protected]> Cc: [email protected] Link: https://lkml.kernel.org/r/[email protected]
1 parent e7d9166 commit 2a820bf

File tree

1 file changed

+37
-36
lines changed

1 file changed

+37
-36
lines changed

kernel/trace/trace.c

Lines changed: 37 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2749,12 +2749,21 @@ trace_function(struct trace_array *tr,
27492749

27502750
#ifdef CONFIG_STACKTRACE
27512751

2752-
#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2752+
/* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2753+
#define FTRACE_KSTACK_NESTING 4
2754+
2755+
#define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2756+
27532757
struct ftrace_stack {
2754-
unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
2758+
unsigned long calls[FTRACE_KSTACK_ENTRIES];
2759+
};
2760+
2761+
2762+
struct ftrace_stacks {
2763+
struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
27552764
};
27562765

2757-
static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2766+
static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
27582767
static DEFINE_PER_CPU(int, ftrace_stack_reserve);
27592768

27602769
static void __ftrace_trace_stack(struct ring_buffer *buffer,
@@ -2763,10 +2772,11 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
27632772
{
27642773
struct trace_event_call *call = &event_kernel_stack;
27652774
struct ring_buffer_event *event;
2775+
struct ftrace_stack *fstack;
27662776
struct stack_entry *entry;
27672777
struct stack_trace trace;
2768-
int use_stack;
2769-
int size = FTRACE_STACK_ENTRIES;
2778+
int size = FTRACE_KSTACK_ENTRIES;
2779+
int stackidx;
27702780

27712781
trace.nr_entries = 0;
27722782
trace.skip = skip;
@@ -2788,29 +2798,32 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
27882798
*/
27892799
preempt_disable_notrace();
27902800

2791-
use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2801+
stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2802+
2803+
/* This should never happen. If it does, yell once and skip */
2804+
if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2805+
goto out;
2806+
27922807
/*
2793-
* We don't need any atomic variables, just a barrier.
2794-
* If an interrupt comes in, we don't care, because it would
2795-
* have exited and put the counter back to what we want.
2796-
* We just need a barrier to keep gcc from moving things
2797-
* around.
2808+
* The above __this_cpu_inc_return() is 'atomic' cpu local. An
2809+
* interrupt will either see the value pre increment or post
2810+
* increment. If the interrupt happens pre increment it will have
2811+
* restored the counter when it returns. We just need a barrier to
2812+
* keep gcc from moving things around.
27982813
*/
27992814
barrier();
2800-
if (use_stack == 1) {
2801-
trace.entries = this_cpu_ptr(ftrace_stack.calls);
2802-
trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
28032815

2804-
if (regs)
2805-
save_stack_trace_regs(regs, &trace);
2806-
else
2807-
save_stack_trace(&trace);
2816+
fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2817+
trace.entries = fstack->calls;
2818+
trace.max_entries = FTRACE_KSTACK_ENTRIES;
28082819

2809-
if (trace.nr_entries > size)
2810-
size = trace.nr_entries;
2811-
} else
2812-
/* From now on, use_stack is a boolean */
2813-
use_stack = 0;
2820+
if (regs)
2821+
save_stack_trace_regs(regs, &trace);
2822+
else
2823+
save_stack_trace(&trace);
2824+
2825+
if (trace.nr_entries > size)
2826+
size = trace.nr_entries;
28142827

28152828
size *= sizeof(unsigned long);
28162829

@@ -2820,19 +2833,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
28202833
goto out;
28212834
entry = ring_buffer_event_data(event);
28222835

2823-
memset(&entry->caller, 0, size);
2824-
2825-
if (use_stack)
2826-
memcpy(&entry->caller, trace.entries,
2827-
trace.nr_entries * sizeof(unsigned long));
2828-
else {
2829-
trace.max_entries = FTRACE_STACK_ENTRIES;
2830-
trace.entries = entry->caller;
2831-
if (regs)
2832-
save_stack_trace_regs(regs, &trace);
2833-
else
2834-
save_stack_trace(&trace);
2835-
}
2836+
memcpy(&entry->caller, trace.entries, size);
28362837

28372838
entry->size = trace.nr_entries;
28382839

0 commit comments

Comments
 (0)