Skip to content

Commit 087fefa

Browse files
Xu KuohaiKernel Patches Daemon
authored andcommitted
selftests/bpf/benchs: Add producer and overwrite bench for ring buffer
Add rb-prod test for bpf ring buffer to bench producer performance without counsumer thread. And add --rb-overwrite option to bench ring buffer in overwrite mode. For reference, below are bench numbers collected from x86_64 and arm64 CPUs. - AMD EPYC 9654 (x86_64) Ringbuf, overwrite mode with multi-producer contention, no consumer =================================================================== rb-prod nr_prod 1 32.295 ± 0.004M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 2 9.591 ± 0.003M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 3 8.895 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 4 9.206 ± 0.003M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 8 9.220 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 12 4.595 ± 0.022M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 16 4.348 ± 0.016M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 20 3.957 ± 0.017M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 24 3.787 ± 0.014M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 28 3.603 ± 0.011M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 32 3.707 ± 0.011M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 36 3.562 ± 0.012M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 40 3.616 ± 0.012M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 44 3.598 ± 0.016M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 48 3.555 ± 0.014M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 52 3.463 ± 0.020M/s (drops 0.000 ± 0.000M/s) - HiSilicon Kunpeng 920 (arm64) Ringbuf, overwrite mode with multi-producer contention, no consumer =================================================================== rb-prod nr_prod 1 14.687 ± 0.058M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 2 22.263 ± 0.007M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 3 5.736 ± 0.003M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 4 4.934 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 8 4.661 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 12 3.753 ± 0.013M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 16 3.706 ± 0.018M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 20 3.660 ± 0.015M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 24 3.610 ± 0.016M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 28 3.238 ± 0.010M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 32 3.270 ± 0.018M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 36 2.892 ± 0.021M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 40 2.995 ± 0.018M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 44 2.830 ± 0.019M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 48 2.877 ± 0.015M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 52 2.814 ± 0.015M/s (drops 0.000 ± 0.000M/s) Signed-off-by: Xu Kuohai <[email protected]>
1 parent dae3436 commit 087fefa

File tree

4 files changed

+103
-8
lines changed

4 files changed

+103
-8
lines changed

tools/testing/selftests/bpf/bench.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,7 @@ extern const struct bench bench_trig_uretprobe_multi_nop5;
541541

542542
extern const struct bench bench_rb_libbpf;
543543
extern const struct bench bench_rb_custom;
544+
extern const struct bench bench_rb_prod;
544545
extern const struct bench bench_pb_libbpf;
545546
extern const struct bench bench_pb_custom;
546547
extern const struct bench bench_bloom_lookup;
@@ -617,6 +618,7 @@ static const struct bench *benchs[] = {
617618
/* ringbuf/perfbuf benchmarks */
618619
&bench_rb_libbpf,
619620
&bench_rb_custom,
621+
&bench_rb_prod,
620622
&bench_pb_libbpf,
621623
&bench_pb_custom,
622624
&bench_bloom_lookup,

tools/testing/selftests/bpf/benchs/bench_ringbufs.c

Lines changed: 87 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ static struct {
1919
int ringbuf_sz; /* per-ringbuf, in bytes */
2020
bool ringbuf_use_output; /* use slower output API */
2121
int perfbuf_sz; /* per-CPU size, in pages */
22+
bool overwrite;
2223
} args = {
2324
.back2back = false,
2425
.batch_cnt = 500,
@@ -27,6 +28,7 @@ static struct {
2728
.ringbuf_sz = 512 * 1024,
2829
.ringbuf_use_output = false,
2930
.perfbuf_sz = 128,
31+
.overwrite = false,
3032
};
3133

3234
enum {
@@ -35,6 +37,7 @@ enum {
3537
ARG_RB_BATCH_CNT = 2002,
3638
ARG_RB_SAMPLED = 2003,
3739
ARG_RB_SAMPLE_RATE = 2004,
40+
ARG_RB_OVERWRITE = 2005,
3841
};
3942

4043
static const struct argp_option opts[] = {
@@ -43,6 +46,7 @@ static const struct argp_option opts[] = {
4346
{ "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
4447
{ "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
4548
{ "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
49+
{ "rb-overwrite", ARG_RB_OVERWRITE, NULL, 0, "Overwrite mode"},
4650
{},
4751
};
4852

@@ -72,6 +76,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
7276
argp_usage(state);
7377
}
7478
break;
79+
case ARG_RB_OVERWRITE:
80+
args.overwrite = true;
81+
break;
7582
default:
7683
return ARGP_ERR_UNKNOWN;
7784
}
@@ -95,8 +102,30 @@ static inline void bufs_trigger_batch(void)
95102

96103
static void bufs_validate(void)
97104
{
98-
if (env.consumer_cnt != 1) {
99-
fprintf(stderr, "rb-libbpf benchmark needs one consumer!\n");
105+
bool bench_prod = !strcmp(env.bench_name, "rb-prod");
106+
107+
if (args.overwrite && !bench_prod) {
108+
fprintf(stderr, "overwite mode only works with benchmakr rb-prod!\n");
109+
exit(1);
110+
}
111+
112+
if (bench_prod && env.consumer_cnt != 0) {
113+
fprintf(stderr, "rb-prod benchmark does not need consumer!\n");
114+
exit(1);
115+
}
116+
117+
if (bench_prod && args.back2back) {
118+
fprintf(stderr, "back-to-back mode makes no sense for rb-prod!\n");
119+
exit(1);
120+
}
121+
122+
if (bench_prod && args.sampled) {
123+
fprintf(stderr, "sampling mode makes no sense for rb-prod!\n");
124+
exit(1);
125+
}
126+
127+
if (!bench_prod && env.consumer_cnt != 1) {
128+
fprintf(stderr, "benchmarks excluding rb-prod need one consumer!\n");
100129
exit(1);
101130
}
102131

@@ -132,8 +161,10 @@ static void ringbuf_libbpf_measure(struct bench_res *res)
132161
res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
133162
}
134163

135-
static struct ringbuf_bench *ringbuf_setup_skeleton(void)
164+
static struct ringbuf_bench *ringbuf_setup_skeleton(int bench_prod)
136165
{
166+
__u32 flags;
167+
struct bpf_map *ringbuf;
137168
struct ringbuf_bench *skel;
138169

139170
setup_libbpf();
@@ -146,12 +177,19 @@ static struct ringbuf_bench *ringbuf_setup_skeleton(void)
146177

147178
skel->rodata->batch_cnt = args.batch_cnt;
148179
skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
180+
skel->rodata->bench_prod = bench_prod;
149181

150182
if (args.sampled)
151183
/* record data + header take 16 bytes */
152184
skel->rodata->wakeup_data_size = args.sample_rate * 16;
153185

154-
bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz);
186+
ringbuf = skel->maps.ringbuf;
187+
if (args.overwrite) {
188+
flags = bpf_map__map_flags(ringbuf) | BPF_F_OVERWRITE;
189+
bpf_map__set_map_flags(ringbuf, flags);
190+
}
191+
192+
bpf_map__set_max_entries(ringbuf, args.ringbuf_sz);
155193

156194
if (ringbuf_bench__load(skel)) {
157195
fprintf(stderr, "failed to load skeleton\n");
@@ -171,10 +209,13 @@ static void ringbuf_libbpf_setup(void)
171209
{
172210
struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
173211
struct bpf_link *link;
212+
int map_fd;
174213

175-
ctx->skel = ringbuf_setup_skeleton();
176-
ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf),
177-
buf_process_sample, NULL, NULL);
214+
ctx->skel = ringbuf_setup_skeleton(0);
215+
216+
map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
217+
ctx->ringbuf = ring_buffer__new(map_fd, buf_process_sample,
218+
NULL, NULL);
178219
if (!ctx->ringbuf) {
179220
fprintf(stderr, "failed to create ringbuf\n");
180221
exit(1);
@@ -232,7 +273,7 @@ static void ringbuf_custom_setup(void)
232273
void *tmp;
233274
int err;
234275

235-
ctx->skel = ringbuf_setup_skeleton();
276+
ctx->skel = ringbuf_setup_skeleton(0);
236277

237278
ctx->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
238279
if (ctx->epoll_fd < 0) {
@@ -277,6 +318,33 @@ static void ringbuf_custom_setup(void)
277318
}
278319
}
279320

321+
/* RINGBUF-PRODUCER benchmark */
322+
static struct ringbuf_prod_ctx {
323+
struct ringbuf_bench *skel;
324+
} ringbuf_prod_ctx;
325+
326+
static void ringbuf_prod_measure(struct bench_res *res)
327+
{
328+
struct ringbuf_prod_ctx *ctx = &ringbuf_prod_ctx;
329+
330+
res->hits = atomic_swap(&ctx->skel->bss->hits, 0);
331+
res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
332+
}
333+
334+
static void ringbuf_prod_setup(void)
335+
{
336+
struct ringbuf_prod_ctx *ctx = &ringbuf_prod_ctx;
337+
struct bpf_link *link;
338+
339+
ctx->skel = ringbuf_setup_skeleton(1);
340+
341+
link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
342+
if (!link) {
343+
fprintf(stderr, "failed to attach program!\n");
344+
exit(1);
345+
}
346+
}
347+
280348
#define RINGBUF_BUSY_BIT (1 << 31)
281349
#define RINGBUF_DISCARD_BIT (1 << 30)
282350
#define RINGBUF_META_LEN 8
@@ -540,6 +608,17 @@ const struct bench bench_rb_custom = {
540608
.report_final = hits_drops_report_final,
541609
};
542610

611+
const struct bench bench_rb_prod = {
612+
.name = "rb-prod",
613+
.argp = &bench_ringbufs_argp,
614+
.validate = bufs_validate,
615+
.setup = ringbuf_prod_setup,
616+
.producer_thread = bufs_sample_producer,
617+
.measure = ringbuf_prod_measure,
618+
.report_progress = hits_drops_report_progress,
619+
.report_final = hits_drops_report_final,
620+
};
621+
543622
const struct bench bench_pb_libbpf = {
544623
.name = "pb-libbpf",
545624
.argp = &bench_ringbufs_argp,

tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,7 @@ for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
4949
summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
5050
done
5151

52+
header "Ringbuf, overwrite mode with multi-producer contention, no consumer"
53+
for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
54+
summarize "rb-prod nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 --rb-overwrite rb-prod)"
55+
done

tools/testing/selftests/bpf/progs/ringbuf_bench.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,21 @@ struct {
1414

1515
const volatile int batch_cnt = 0;
1616
const volatile long use_output = 0;
17+
const volatile long bench_prod = 0;
1718

1819
long sample_val = 42;
1920
long dropped __attribute__((aligned(128))) = 0;
21+
long hits __attribute__((aligned(128))) = 0;
2022

2123
const volatile long wakeup_data_size = 0;
2224

2325
static __always_inline long get_flags()
2426
{
2527
long sz;
2628

29+
if (bench_prod)
30+
return BPF_RB_NO_WAKEUP;
31+
2732
if (!wakeup_data_size)
2833
return 0;
2934

@@ -47,6 +52,8 @@ int bench_ringbuf(void *ctx)
4752
*sample = sample_val;
4853
flags = get_flags();
4954
bpf_ringbuf_submit(sample, flags);
55+
if (bench_prod)
56+
__sync_add_and_fetch(&hits, 1);
5057
}
5158
}
5259
} else {
@@ -55,6 +62,9 @@ int bench_ringbuf(void *ctx)
5562
if (bpf_ringbuf_output(&ringbuf, &sample_val,
5663
sizeof(sample_val), flags))
5764
__sync_add_and_fetch(&dropped, 1);
65+
else if (bench_prod)
66+
__sync_add_and_fetch(&hits, 1);
67+
5868
}
5969
}
6070
return 0;

0 commit comments

Comments
 (0)