Skip to content

Commit 08b875b

Browse files
committed
perf ftrace latency: Introduce --min-latency to narrow down into a latency range
Things below and over will be in the first and last, outlier, buckets. Without it: # perf ftrace latency --use-nsec --use-bpf \ --bucket-range=200 \ -T switch_mm_irqs_off -a sleep 2 # DURATION | COUNT | GRAPH | 0 - 200 ns | 0 | | 200 - 400 ns | 44 | | 400 - 600 ns | 291 | # | 600 - 800 ns | 506 | ## | 800 - 1000 ns | 148 | | 1.00 - 1.20 us | 581 | ## | 1.20 - 1.40 us | 2199 | ########## | 1.40 - 1.60 us | 1048 | #### | 1.60 - 1.80 us | 1448 | ###### | 1.80 - 2.00 us | 1091 | ##### | 2.00 - 2.20 us | 517 | ## | 2.20 - 2.40 us | 318 | # | 2.40 - 2.60 us | 370 | # | 2.60 - 2.80 us | 271 | # | 2.80 - 3.00 us | 150 | | 3.00 - 3.20 us | 85 | | 3.20 - 3.40 us | 48 | | 3.40 - 3.60 us | 40 | | 3.60 - 3.80 us | 22 | | 3.80 - 4.00 us | 13 | | 4.00 - 4.20 us | 14 | | 4.20 - ... us | 626 | ## | # # perf ftrace latency --use-nsec --use-bpf \ --bucket-range=20 --min-latency=1200 \ -T switch_mm_irqs_off -a sleep 2 # DURATION | COUNT | GRAPH | 0 - 1200 ns | 1243 | ##### | 1.20 - 1.22 us | 141 | | 1.22 - 1.24 us | 202 | | 1.24 - 1.26 us | 209 | | 1.26 - 1.28 us | 219 | | 1.28 - 1.30 us | 208 | | 1.30 - 1.32 us | 245 | # | 1.32 - 1.34 us | 246 | # | 1.34 - 1.36 us | 224 | # | 1.36 - 1.38 us | 219 | | 1.38 - 1.40 us | 206 | | 1.40 - 1.42 us | 190 | | 1.42 - 1.44 us | 190 | | 1.44 - 1.46 us | 146 | | 1.46 - 1.48 us | 140 | | 1.48 - 1.50 us | 125 | | 1.50 - 1.52 us | 115 | | 1.52 - 1.54 us | 102 | | 1.54 - 1.56 us | 87 | | 1.56 - 1.58 us | 90 | | 1.58 - 1.60 us | 85 | | 1.60 - ... us | 5487 | ######################## | # Now we want focus on the latencies starting at 1.2us, with a finer grained range of 20ns: This is all on a live system, so statistically interesting, but not narrowing down on the same numbers, so a 'perf ftrace latency record' seems interesting to then use all on the same snapshot of latencies. A --max-latency counterpart should come next, at first limiting the max-latency to 20 * bucket-size, as we have a fixed buckets array with 20 + 2 entries (+ for the outliers) and thus would need to make it larger for higher latencies. We also may need a way to ask for not considering the out of range values (first and last buckets) when drawing the buckets bars. Co-developed-by: Gabriele Monaco <[email protected]> Cc: Adrian Hunter <[email protected]> Cc: Clark Williams <[email protected]> Cc: Ian Rogers <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Kan Liang <[email protected]> Cc: Namhyung Kim <[email protected]> Cc: Thomas Gleixner <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Gabriele Monaco <[email protected]> Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent e8536dd commit 08b875b

File tree

5 files changed

+43
-7
lines changed

5 files changed

+43
-7
lines changed

tools/perf/Documentation/perf-ftrace.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,10 @@ OPTIONS for 'perf ftrace latency'
151151
--bucket-range=::
152152
Bucket range in ms or ns (according to -n/--use-nsec), default is log2() mode.
153153

154+
--min-latency=::
155+
Minimum latency for the start of the first bucket, in ms or ns (according to
156+
-n/--use-nsec).
157+
154158

155159
OPTIONS for 'perf ftrace profile'
156160
---------------------------------

tools/perf/builtin-ftrace.c

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -729,6 +729,7 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace)
729729
static void make_histogram(struct perf_ftrace *ftrace, int buckets[],
730730
char *buf, size_t len, char *linebuf)
731731
{
732+
int min_latency = ftrace->min_latency;
732733
char *p, *q;
733734
char *unit;
734735
double num;
@@ -777,20 +778,26 @@ static void make_histogram(struct perf_ftrace *ftrace, int buckets[],
777778
if (ftrace->use_nsec)
778779
num *= 1000;
779780

781+
i = 0;
782+
if (num < min_latency)
783+
goto do_inc;
784+
785+
num -= min_latency;
786+
780787
if (!ftrace->bucket_range) {
781788
i = log2(num);
782789
if (i < 0)
783790
i = 0;
784791
} else {
785792
// Less than 1 unit (ms or ns), or, in the future,
786793
// than the min latency desired.
787-
i = 0;
788794
if (num > 0) // 1st entry: [ 1 unit .. bucket_range units ]
789795
i = num / ftrace->bucket_range + 1;
790796
}
791797
if (i >= NUM_BUCKET)
792798
i = NUM_BUCKET - 1;
793799

800+
do_inc:
794801
buckets[i]++;
795802

796803
next:
@@ -804,6 +811,7 @@ static void make_histogram(struct perf_ftrace *ftrace, int buckets[],
804811

805812
static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
806813
{
814+
int min_latency = ftrace->min_latency;
807815
bool use_nsec = ftrace->use_nsec;
808816
int i;
809817
int total = 0;
@@ -825,7 +833,8 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
825833
bar_len = buckets[0] * bar_total / total;
826834

827835
printf(" %4d - %4d %s | %10d | %.*s%*s |\n",
828-
0, 1, use_nsec ? "ns" : "us", buckets[0], bar_len, bar, bar_total - bar_len, "");
836+
0, min_latency, use_nsec ? "ns" : "us",
837+
buckets[0], bar_len, bar, bar_total - bar_len, "");
829838

830839
for (i = 1; i < NUM_BUCKET - 1; i++) {
831840
int start, stop;
@@ -841,8 +850,8 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
841850
unit = use_nsec ? "us" : "ms";
842851
}
843852
} else {
844-
start = (i - 1) * ftrace->bucket_range + 1;
845-
stop = i * ftrace->bucket_range + 1;
853+
start = (i - 1) * ftrace->bucket_range + min_latency;
854+
stop = i * ftrace->bucket_range + min_latency;
846855

847856
if (start >= 1000) {
848857
double dstart = start / 1000.0,
@@ -864,7 +873,7 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
864873
if (!ftrace->bucket_range) {
865874
printf(" %4d - %-4s %s", 1, "...", use_nsec ? "ms" : "s ");
866875
} else {
867-
int upper_outlier = (NUM_BUCKET - 2) * ftrace->bucket_range;
876+
int upper_outlier = (NUM_BUCKET - 2) * ftrace->bucket_range + min_latency;
868877

869878
if (upper_outlier >= 1000) {
870879
double dstart = upper_outlier / 1000.0;
@@ -1598,6 +1607,8 @@ int cmd_ftrace(int argc, const char **argv)
15981607
"Use nano-second histogram"),
15991608
OPT_UINTEGER(0, "bucket-range", &ftrace.bucket_range,
16001609
"Bucket range in ms or ns (-n/--use-nsec), default is log2() mode"),
1610+
OPT_UINTEGER(0, "min-latency", &ftrace.min_latency,
1611+
"Minimum latency (1st bucket). Works only with --bucket-range."),
16011612
OPT_PARENT(common_options),
16021613
};
16031614
const struct option profile_options[] = {
@@ -1693,6 +1704,17 @@ int cmd_ftrace(int argc, const char **argv)
16931704
ret = -EINVAL;
16941705
goto out_delete_filters;
16951706
}
1707+
if (!ftrace.bucket_range && ftrace.min_latency) {
1708+
pr_err("--min-latency works only with --bucket-range\n");
1709+
parse_options_usage(ftrace_usage, options,
1710+
"min-latency", /*short_opt=*/false);
1711+
ret = -EINVAL;
1712+
goto out_delete_filters;
1713+
}
1714+
if (!ftrace.min_latency) {
1715+
/* default min latency should be the bucket range */
1716+
ftrace.min_latency = ftrace.bucket_range;
1717+
}
16961718
cmd_func = __cmd_latency;
16971719
break;
16981720
case PERF_FTRACE_PROFILE:

tools/perf/util/bpf_ftrace.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
3737
}
3838

3939
skel->rodata->bucket_range = ftrace->bucket_range;
40+
skel->rodata->min_latency = ftrace->min_latency;
4041

4142
/* don't need to set cpu filter for system-wide mode */
4243
if (ftrace->target.cpu_list) {

tools/perf/util/bpf_skel/func_latency.bpf.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ const volatile int has_cpu = 0;
4242
const volatile int has_task = 0;
4343
const volatile int use_nsec = 0;
4444
const volatile unsigned int bucket_range;
45+
const volatile unsigned int min_latency;
4546

4647
SEC("kprobe/func")
4748
int BPF_PROG(func_begin)
@@ -93,7 +94,7 @@ int BPF_PROG(func_end)
9394
start = bpf_map_lookup_elem(&functime, &tid);
9495
if (start) {
9596
__s64 delta = bpf_ktime_get_ns() - *start;
96-
__u32 key;
97+
__u32 key = 0;
9798
__u64 *hist;
9899

99100
bpf_map_delete_elem(&functime, &tid);
@@ -103,9 +104,16 @@ int BPF_PROG(func_end)
103104

104105
if (bucket_range != 0) {
105106
delta /= cmp_base;
107+
108+
if (min_latency > 0) {
109+
if (delta > min_latency)
110+
delta -= min_latency;
111+
else
112+
goto do_lookup;
113+
}
114+
106115
// Less than 1 unit (ms or ns), or, in the future,
107116
// than the min latency desired.
108-
key = 0;
109117
if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units )
110118
key = delta / bucket_range + 1;
111119
if (key >= NUM_BUCKET)

tools/perf/util/ftrace.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ struct perf_ftrace {
2121
bool inherit;
2222
bool use_nsec;
2323
unsigned int bucket_range;
24+
unsigned int min_latency;
2425
int graph_depth;
2526
int func_stack_trace;
2627
int func_irq_info;

0 commit comments

Comments
 (0)