Skip to content

Commit 20f2505

Browse files
fomichevAlexei Starovoitov
authored andcommitted
bpf: Try to avoid kzalloc in cgroup/{s,g}etsockopt
When we attach a bpf program to cgroup/getsockopt any other getsockopt() syscall starts incurring kzalloc/kfree cost. Let add a small buffer on the stack and use it for small (majority) {s,g}etsockopt values. The buffer is small enough to fit into the cache line and cover the majority of simple options (most of them are 4 byte ints). It seems natural to do the same for setsockopt, but it's a bit more involved when the BPF program modifies the data (where we have to kmalloc). The assumption is that for the majority of setsockopt calls (which are doing pure BPF options or apply policy) this will bring some benefit as well. Without this patch (we remove about 1% __kmalloc): 3.38% 0.07% tcp_mmap [kernel.kallsyms] [k] __cgroup_bpf_run_filter_getsockopt | --3.30%--__cgroup_bpf_run_filter_getsockopt | --0.81%--__kmalloc Signed-off-by: Stanislav Fomichev <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]> Acked-by: Martin KaFai Lau <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent 9cacf81 commit 20f2505

File tree

2 files changed

+50
-7
lines changed

2 files changed

+50
-7
lines changed

include/linux/filter.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1298,6 +1298,11 @@ struct bpf_sysctl_kern {
12981298
u64 tmp_reg;
12991299
};
13001300

1301+
#define BPF_SOCKOPT_KERN_BUF_SIZE 32
1302+
struct bpf_sockopt_buf {
1303+
u8 data[BPF_SOCKOPT_KERN_BUF_SIZE];
1304+
};
1305+
13011306
struct bpf_sockopt_kern {
13021307
struct sock *sk;
13031308
u8 *optval;

kernel/bpf/cgroup.c

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1298,7 +1298,8 @@ static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
12981298
return empty;
12991299
}
13001300

1301-
static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen)
1301+
static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen,
1302+
struct bpf_sockopt_buf *buf)
13021303
{
13031304
if (unlikely(max_optlen < 0))
13041305
return -EINVAL;
@@ -1310,6 +1311,15 @@ static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen)
13101311
max_optlen = PAGE_SIZE;
13111312
}
13121313

1314+
if (max_optlen <= sizeof(buf->data)) {
1315+
/* When the optval fits into BPF_SOCKOPT_KERN_BUF_SIZE
1316+
* bytes avoid the cost of kzalloc.
1317+
*/
1318+
ctx->optval = buf->data;
1319+
ctx->optval_end = ctx->optval + max_optlen;
1320+
return max_optlen;
1321+
}
1322+
13131323
ctx->optval = kzalloc(max_optlen, GFP_USER);
13141324
if (!ctx->optval)
13151325
return -ENOMEM;
@@ -1319,16 +1329,26 @@ static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen)
13191329
return max_optlen;
13201330
}
13211331

1322-
static void sockopt_free_buf(struct bpf_sockopt_kern *ctx)
1332+
static void sockopt_free_buf(struct bpf_sockopt_kern *ctx,
1333+
struct bpf_sockopt_buf *buf)
13231334
{
1335+
if (ctx->optval == buf->data)
1336+
return;
13241337
kfree(ctx->optval);
13251338
}
13261339

1340+
static bool sockopt_buf_allocated(struct bpf_sockopt_kern *ctx,
1341+
struct bpf_sockopt_buf *buf)
1342+
{
1343+
return ctx->optval != buf->data;
1344+
}
1345+
13271346
int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
13281347
int *optname, char __user *optval,
13291348
int *optlen, char **kernel_optval)
13301349
{
13311350
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1351+
struct bpf_sockopt_buf buf = {};
13321352
struct bpf_sockopt_kern ctx = {
13331353
.sk = sk,
13341354
.level = *level,
@@ -1350,7 +1370,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
13501370
*/
13511371
max_optlen = max_t(int, 16, *optlen);
13521372

1353-
max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
1373+
max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf);
13541374
if (max_optlen < 0)
13551375
return max_optlen;
13561376

@@ -1390,14 +1410,31 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
13901410
*/
13911411
if (ctx.optlen != 0) {
13921412
*optlen = ctx.optlen;
1393-
*kernel_optval = ctx.optval;
1413+
/* We've used bpf_sockopt_kern->buf as an intermediary
1414+
* storage, but the BPF program indicates that we need
1415+
* to pass this data to the kernel setsockopt handler.
1416+
* No way to export on-stack buf, have to allocate a
1417+
* new buffer.
1418+
*/
1419+
if (!sockopt_buf_allocated(&ctx, &buf)) {
1420+
void *p = kmalloc(ctx.optlen, GFP_USER);
1421+
1422+
if (!p) {
1423+
ret = -ENOMEM;
1424+
goto out;
1425+
}
1426+
memcpy(p, ctx.optval, ctx.optlen);
1427+
*kernel_optval = p;
1428+
} else {
1429+
*kernel_optval = ctx.optval;
1430+
}
13941431
/* export and don't free sockopt buf */
13951432
return 0;
13961433
}
13971434
}
13981435

13991436
out:
1400-
sockopt_free_buf(&ctx);
1437+
sockopt_free_buf(&ctx, &buf);
14011438
return ret;
14021439
}
14031440

@@ -1407,6 +1444,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
14071444
int retval)
14081445
{
14091446
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1447+
struct bpf_sockopt_buf buf = {};
14101448
struct bpf_sockopt_kern ctx = {
14111449
.sk = sk,
14121450
.level = level,
@@ -1425,7 +1463,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
14251463

14261464
ctx.optlen = max_optlen;
14271465

1428-
max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
1466+
max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf);
14291467
if (max_optlen < 0)
14301468
return max_optlen;
14311469

@@ -1483,7 +1521,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
14831521
ret = ctx.retval;
14841522

14851523
out:
1486-
sockopt_free_buf(&ctx);
1524+
sockopt_free_buf(&ctx, &buf);
14871525
return ret;
14881526
}
14891527

0 commit comments

Comments
 (0)