Skip to content

Commit e6ea60b

Browse files
committed
Alexei Starovoitov says: ==================== 1) libbpf should not attempt to load unused subprogs, from Andrii. 2) Make strncpy_from_user() mask out bytes after NUL terminator, from Daniel. 3) Relax return code check for subprograms in the BPF verifier, from Dmitrii. 4) Fix several sockmap issues, from John. * https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: fail_function: Remove a redundant mutex unlock selftest/bpf: Test bpf_probe_read_user_str() strips trailing bytes after NUL lib/strncpy_from_user.c: Mask out bytes after NUL terminator. libbpf: Fix VERSIONED_SYM_COUNT number parsing bpf, sockmap: Avoid failures from skb_to_sgvec when skb has frag_list bpf, sockmap: Handle memory acct if skb_verdict prog redirects to self bpf, sockmap: Avoid returning unneeded EAGAIN when redirecting to self bpf, sockmap: Use truesize with sk_rmem_schedule() bpf, sockmap: Ensure SO_RCVBUF memory is observed on ingress redirect bpf, sockmap: Fix partial copy_page_to_iter so progress can still be made selftests/bpf: Fix error return code in run_getsockopt_test() bpf: Relax return code check for subprograms tools, bpftool: Add missing close before bpftool net attach exit MAINTAINERS/bpf: Update Andrii's entry. selftests/bpf: Fix unused attribute usage in subprogs_unused test bpf: Fix unsigned 'datasec_id' compared with zero in check_pseudo_btf_id bpf: Fix passing zero to PTR_ERR() in bpf_btf_printf_prepare libbpf: Don't attempt to load unused subprog as an entry-point BPF program ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 90b4978 + 2801a5d commit e6ea60b

File tree

17 files changed

+301
-49
lines changed

17 files changed

+301
-49
lines changed

MAINTAINERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3243,10 +3243,10 @@ F: drivers/iio/accel/bma400*
32433243
BPF (Safe dynamic programs and tools)
32443244
M: Alexei Starovoitov <[email protected]>
32453245
M: Daniel Borkmann <[email protected]>
3246+
M: Andrii Nakryiko <[email protected]>
32463247
R: Martin KaFai Lau <[email protected]>
32473248
R: Song Liu <[email protected]>
32483249
R: Yonghong Song <[email protected]>
3249-
R: Andrii Nakryiko <[email protected]>
32503250
R: John Fastabend <[email protected]>
32513251
R: KP Singh <[email protected]>
32523252

kernel/bpf/verifier.c

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7786,9 +7786,11 @@ static int check_return_code(struct bpf_verifier_env *env)
77867786
struct tnum range = tnum_range(0, 1);
77877787
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
77887788
int err;
7789+
const bool is_subprog = env->cur_state->frame[0]->subprogno;
77897790

77907791
/* LSM and struct_ops func-ptr's return type could be "void" */
7791-
if ((prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
7792+
if (!is_subprog &&
7793+
(prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
77927794
prog_type == BPF_PROG_TYPE_LSM) &&
77937795
!prog->aux->attach_func_proto->type)
77947796
return 0;
@@ -7808,6 +7810,16 @@ static int check_return_code(struct bpf_verifier_env *env)
78087810
return -EACCES;
78097811
}
78107812

7813+
reg = cur_regs(env) + BPF_REG_0;
7814+
if (is_subprog) {
7815+
if (reg->type != SCALAR_VALUE) {
7816+
verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
7817+
reg_type_str[reg->type]);
7818+
return -EINVAL;
7819+
}
7820+
return 0;
7821+
}
7822+
78117823
switch (prog_type) {
78127824
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
78137825
if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
@@ -7861,7 +7873,6 @@ static int check_return_code(struct bpf_verifier_env *env)
78617873
return 0;
78627874
}
78637875

7864-
reg = cur_regs(env) + BPF_REG_0;
78657876
if (reg->type != SCALAR_VALUE) {
78667877
verbose(env, "At program exit the register R0 is not a known value (%s)\n",
78677878
reg_type_str[reg->type]);
@@ -9572,12 +9583,13 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
95729583
struct bpf_insn *insn,
95739584
struct bpf_insn_aux_data *aux)
95749585
{
9575-
u32 datasec_id, type, id = insn->imm;
95769586
const struct btf_var_secinfo *vsi;
95779587
const struct btf_type *datasec;
95789588
const struct btf_type *t;
95799589
const char *sym_name;
95809590
bool percpu = false;
9591+
u32 type, id = insn->imm;
9592+
s32 datasec_id;
95819593
u64 addr;
95829594
int i;
95839595

kernel/fail_function.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ static ssize_t fei_write(struct file *file, const char __user *buffer,
253253

254254
if (copy_from_user(buf, buffer, count)) {
255255
ret = -EFAULT;
256-
goto out;
256+
goto out_free;
257257
}
258258
buf[count] = '\0';
259259
sym = strstrip(buf);
@@ -307,8 +307,9 @@ static ssize_t fei_write(struct file *file, const char __user *buffer,
307307
ret = count;
308308
}
309309
out:
310-
kfree(buf);
311310
mutex_unlock(&fei_lock);
311+
out_free:
312+
kfree(buf);
312313
return ret;
313314
}
314315

kernel/trace/bpf_trace.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,16 @@ bpf_probe_read_user_str_common(void *dst, u32 size,
181181
{
182182
int ret;
183183

184+
/*
185+
* NB: We rely on strncpy_from_user() not copying junk past the NUL
186+
* terminator into `dst`.
187+
*
188+
* strncpy_from_user() does long-sized strides in the fast path. If the
189+
* strncpy does not mask out the bytes after the NUL in `unsafe_ptr`,
190+
* then there could be junk after the NUL in `dst`. If user takes `dst`
191+
* and keys a hash map with it, then semantically identical strings can
192+
* occupy multiple entries in the map.
193+
*/
184194
ret = strncpy_from_user_nofault(dst, unsafe_ptr, size);
185195
if (unlikely(ret < 0))
186196
memset(dst, 0, size);
@@ -1198,7 +1208,7 @@ static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
11981208
*btf = bpf_get_btf_vmlinux();
11991209

12001210
if (IS_ERR_OR_NULL(*btf))
1201-
return PTR_ERR(*btf);
1211+
return IS_ERR(*btf) ? PTR_ERR(*btf) : -EINVAL;
12021212

12031213
if (ptr->type_id > 0)
12041214
*btf_id = ptr->type_id;

lib/strncpy_from_user.c

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,17 +35,32 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src,
3535
goto byte_at_a_time;
3636

3737
while (max >= sizeof(unsigned long)) {
38-
unsigned long c, data;
38+
unsigned long c, data, mask;
3939

4040
/* Fall back to byte-at-a-time if we get a page fault */
4141
unsafe_get_user(c, (unsigned long __user *)(src+res), byte_at_a_time);
4242

43-
*(unsigned long *)(dst+res) = c;
43+
/*
44+
* Note that we mask out the bytes following the NUL. This is
45+
* important to do because string oblivious code may read past
46+
* the NUL. For those routines, we don't want to give them
47+
* potentially random bytes after the NUL in `src`.
48+
*
49+
* One example of such code is BPF map keys. BPF treats map keys
50+
* as an opaque set of bytes. Without the post-NUL mask, any BPF
51+
* maps keyed by strings returned from strncpy_from_user() may
52+
* have multiple entries for semantically identical strings.
53+
*/
4454
if (has_zero(c, &data, &constants)) {
4555
data = prep_zero_mask(c, data, &constants);
4656
data = create_zero_mask(data);
57+
mask = zero_bytemask(data);
58+
*(unsigned long *)(dst+res) = c & mask;
4759
return res + find_zero(data);
4860
}
61+
62+
*(unsigned long *)(dst+res) = c;
63+
4964
res += sizeof(unsigned long);
5065
max -= sizeof(unsigned long);
5166
}

net/core/skmsg.c

Lines changed: 74 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -170,10 +170,12 @@ static int sk_msg_free_elem(struct sock *sk, struct sk_msg *msg, u32 i,
170170
struct scatterlist *sge = sk_msg_elem(msg, i);
171171
u32 len = sge->length;
172172

173-
if (charge)
174-
sk_mem_uncharge(sk, len);
175-
if (!msg->skb)
173+
/* When the skb owns the memory we free it from consume_skb path. */
174+
if (!msg->skb) {
175+
if (charge)
176+
sk_mem_uncharge(sk, len);
176177
put_page(sg_page(sge));
178+
}
177179
memset(sge, 0, sizeof(*sge));
178180
return len;
179181
}
@@ -397,28 +399,45 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
397399
}
398400
EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
399401

400-
static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
402+
static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
403+
struct sk_buff *skb)
401404
{
402-
struct sock *sk = psock->sk;
403-
int copied = 0, num_sge;
404405
struct sk_msg *msg;
405406

407+
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
408+
return NULL;
409+
410+
if (!sk_rmem_schedule(sk, skb, skb->truesize))
411+
return NULL;
412+
406413
msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
407414
if (unlikely(!msg))
408-
return -EAGAIN;
409-
if (!sk_rmem_schedule(sk, skb, skb->len)) {
410-
kfree(msg);
411-
return -EAGAIN;
412-
}
415+
return NULL;
413416

414417
sk_msg_init(msg);
418+
return msg;
419+
}
420+
421+
static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
422+
struct sk_psock *psock,
423+
struct sock *sk,
424+
struct sk_msg *msg)
425+
{
426+
int num_sge, copied;
427+
428+
/* skb linearize may fail with ENOMEM, but lets simply try again
429+
* later if this happens. Under memory pressure we don't want to
430+
* drop the skb. We need to linearize the skb so that the mapping
431+
* in skb_to_sgvec can not error.
432+
*/
433+
if (skb_linearize(skb))
434+
return -EAGAIN;
415435
num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
416436
if (unlikely(num_sge < 0)) {
417437
kfree(msg);
418438
return num_sge;
419439
}
420440

421-
sk_mem_charge(sk, skb->len);
422441
copied = skb->len;
423442
msg->sg.start = 0;
424443
msg->sg.size = copied;
@@ -430,6 +449,48 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
430449
return copied;
431450
}
432451

452+
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb);
453+
454+
static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
455+
{
456+
struct sock *sk = psock->sk;
457+
struct sk_msg *msg;
458+
459+
/* If we are receiving on the same sock skb->sk is already assigned,
460+
* skip memory accounting and owner transition seeing it already set
461+
* correctly.
462+
*/
463+
if (unlikely(skb->sk == sk))
464+
return sk_psock_skb_ingress_self(psock, skb);
465+
msg = sk_psock_create_ingress_msg(sk, skb);
466+
if (!msg)
467+
return -EAGAIN;
468+
469+
/* This will transition ownership of the data from the socket where
470+
* the BPF program was run initiating the redirect to the socket
471+
* we will eventually receive this data on. The data will be released
472+
* from skb_consume found in __tcp_bpf_recvmsg() after its been copied
473+
* into user buffers.
474+
*/
475+
skb_set_owner_r(skb, sk);
476+
return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
477+
}
478+
479+
/* Puts an skb on the ingress queue of the socket already assigned to the
480+
* skb. In this case we do not need to check memory limits or skb_set_owner_r
481+
* because the skb is already accounted for here.
482+
*/
483+
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb)
484+
{
485+
struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
486+
struct sock *sk = psock->sk;
487+
488+
if (unlikely(!msg))
489+
return -EAGAIN;
490+
sk_msg_init(msg);
491+
return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
492+
}
493+
433494
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
434495
u32 off, u32 len, bool ingress)
435496
{
@@ -789,7 +850,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
789850
* retrying later from workqueue.
790851
*/
791852
if (skb_queue_empty(&psock->ingress_skb)) {
792-
err = sk_psock_skb_ingress(psock, skb);
853+
err = sk_psock_skb_ingress_self(psock, skb);
793854
}
794855
if (err < 0) {
795856
skb_queue_tail(&psock->ingress_skb, skb);

net/ipv4/tcp_bpf.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
1515
{
1616
struct iov_iter *iter = &msg->msg_iter;
1717
int peek = flags & MSG_PEEK;
18-
int i, ret, copied = 0;
1918
struct sk_msg *msg_rx;
19+
int i, copied = 0;
2020

2121
msg_rx = list_first_entry_or_null(&psock->ingress_msg,
2222
struct sk_msg, list);
@@ -37,17 +37,16 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
3737
page = sg_page(sge);
3838
if (copied + copy > len)
3939
copy = len - copied;
40-
ret = copy_page_to_iter(page, sge->offset, copy, iter);
41-
if (ret != copy) {
42-
msg_rx->sg.start = i;
43-
return -EFAULT;
44-
}
40+
copy = copy_page_to_iter(page, sge->offset, copy, iter);
41+
if (!copy)
42+
return copied ? copied : -EFAULT;
4543

4644
copied += copy;
4745
if (likely(!peek)) {
4846
sge->offset += copy;
4947
sge->length -= copy;
50-
sk_mem_uncharge(sk, copy);
48+
if (!msg_rx->skb)
49+
sk_mem_uncharge(sk, copy);
5150
msg_rx->sg.size -= copy;
5251

5352
if (!sge->length) {
@@ -56,6 +55,11 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
5655
put_page(page);
5756
}
5857
} else {
58+
/* Lets not optimize peek case if copy_page_to_iter
59+
* didn't copy the entire length lets just break.
60+
*/
61+
if (copy != sge->length)
62+
return copied;
5963
sk_msg_iter_var_next(i);
6064
}
6165

tools/bpf/bpftool/net.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -578,35 +578,35 @@ static int do_attach(int argc, char **argv)
578578

579579
ifindex = net_parse_dev(&argc, &argv);
580580
if (ifindex < 1) {
581-
close(progfd);
582-
return -EINVAL;
581+
err = -EINVAL;
582+
goto cleanup;
583583
}
584584

585585
if (argc) {
586586
if (is_prefix(*argv, "overwrite")) {
587587
overwrite = true;
588588
} else {
589589
p_err("expected 'overwrite', got: '%s'?", *argv);
590-
close(progfd);
591-
return -EINVAL;
590+
err = -EINVAL;
591+
goto cleanup;
592592
}
593593
}
594594

595595
/* attach xdp prog */
596596
if (is_prefix("xdp", attach_type_strings[attach_type]))
597597
err = do_attach_detach_xdp(progfd, attach_type, ifindex,
598598
overwrite);
599-
600-
if (err < 0) {
599+
if (err) {
601600
p_err("interface %s attach failed: %s",
602601
attach_type_strings[attach_type], strerror(-err));
603-
return err;
602+
goto cleanup;
604603
}
605604

606605
if (json_output)
607606
jsonw_null(json_wtr);
608-
609-
return 0;
607+
cleanup:
608+
close(progfd);
609+
return err;
610610
}
611611

612612
static int do_detach(int argc, char **argv)

tools/lib/bpf/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
146146
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
147147
sort -u | wc -l)
148148
VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
149+
sed 's/\[.*\]//' | \
149150
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
150151
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
151152

@@ -214,6 +215,7 @@ check_abi: $(OUTPUT)libbpf.so
214215
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \
215216
sort -u > $(OUTPUT)libbpf_global_syms.tmp; \
216217
readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
218+
sed 's/\[.*\]//' | \
217219
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \
218220
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \
219221
sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \

0 commit comments

Comments
 (0)