Skip to content

Commit c85e559

Browse files
committed
Daniel Borkmann says: ==================== pull-request: bpf 2023-12-06 We've added 4 non-merge commits during the last 6 day(s) which contain a total of 7 files changed, 185 insertions(+), 55 deletions(-). The main changes are: 1) Fix race found by syzkaller on prog_array_map_poke_run when a BPF program's kallsym symbols were still missing, from Jiri Olsa. 2) Fix BPF verifier's branch offset comparison for BPF_JMP32 | BPF_JA, from Yonghong Song. 3) Fix xsk's poll handling to only set mask on bound xsk sockets, from Yewon Choi. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: selftests/bpf: Add test for early update in prog_array_map_poke_run bpf: Fix prog_array_map_poke_run map poke update xsk: Skip polling event check for unbound socket bpf: Fix a verifier bug due to incorrect branch offset comparison with cpu=v4 ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 0ad722b + ffed24e commit c85e559

File tree

7 files changed

+185
-55
lines changed

7 files changed

+185
-55
lines changed

arch/x86/net/bpf_jit_comp.c

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3025,3 +3025,49 @@ void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp
30253025
#endif
30263026
WARN(1, "verification of programs using bpf_throw should have failed\n");
30273027
}
3028+
3029+
void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
3030+
struct bpf_prog *new, struct bpf_prog *old)
3031+
{
3032+
u8 *old_addr, *new_addr, *old_bypass_addr;
3033+
int ret;
3034+
3035+
old_bypass_addr = old ? NULL : poke->bypass_addr;
3036+
old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
3037+
new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
3038+
3039+
/*
3040+
* On program loading or teardown, the program's kallsym entry
3041+
* might not be in place, so we use __bpf_arch_text_poke to skip
3042+
* the kallsyms check.
3043+
*/
3044+
if (new) {
3045+
ret = __bpf_arch_text_poke(poke->tailcall_target,
3046+
BPF_MOD_JUMP,
3047+
old_addr, new_addr);
3048+
BUG_ON(ret < 0);
3049+
if (!old) {
3050+
ret = __bpf_arch_text_poke(poke->tailcall_bypass,
3051+
BPF_MOD_JUMP,
3052+
poke->bypass_addr,
3053+
NULL);
3054+
BUG_ON(ret < 0);
3055+
}
3056+
} else {
3057+
ret = __bpf_arch_text_poke(poke->tailcall_bypass,
3058+
BPF_MOD_JUMP,
3059+
old_bypass_addr,
3060+
poke->bypass_addr);
3061+
BUG_ON(ret < 0);
3062+
/* let other CPUs finish the execution of program
3063+
* so that it will not possible to expose them
3064+
* to invalid nop, stack unwind, nop state
3065+
*/
3066+
if (!ret)
3067+
synchronize_rcu();
3068+
ret = __bpf_arch_text_poke(poke->tailcall_target,
3069+
BPF_MOD_JUMP,
3070+
old_addr, NULL);
3071+
BUG_ON(ret < 0);
3072+
}
3073+
}

include/linux/bpf.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3175,6 +3175,9 @@ enum bpf_text_poke_type {
31753175
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
31763176
void *addr1, void *addr2);
31773177

3178+
void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
3179+
struct bpf_prog *new, struct bpf_prog *old);
3180+
31783181
void *bpf_arch_text_copy(void *dst, void *src, size_t len);
31793182
int bpf_arch_text_invalidate(void *dst, size_t len);
31803183

kernel/bpf/arraymap.c

Lines changed: 10 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,11 +1012,16 @@ static void prog_array_map_poke_untrack(struct bpf_map *map,
10121012
mutex_unlock(&aux->poke_mutex);
10131013
}
10141014

1015+
void __weak bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
1016+
struct bpf_prog *new, struct bpf_prog *old)
1017+
{
1018+
WARN_ON_ONCE(1);
1019+
}
1020+
10151021
static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
10161022
struct bpf_prog *old,
10171023
struct bpf_prog *new)
10181024
{
1019-
u8 *old_addr, *new_addr, *old_bypass_addr;
10201025
struct prog_poke_elem *elem;
10211026
struct bpf_array_aux *aux;
10221027

@@ -1025,7 +1030,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
10251030

10261031
list_for_each_entry(elem, &aux->poke_progs, list) {
10271032
struct bpf_jit_poke_descriptor *poke;
1028-
int i, ret;
1033+
int i;
10291034

10301035
for (i = 0; i < elem->aux->size_poke_tab; i++) {
10311036
poke = &elem->aux->poke_tab[i];
@@ -1044,21 +1049,10 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
10441049
* activated, so tail call updates can arrive from here
10451050
* while JIT is still finishing its final fixup for
10461051
* non-activated poke entries.
1047-
* 3) On program teardown, the program's kallsym entry gets
1048-
* removed out of RCU callback, but we can only untrack
1049-
* from sleepable context, therefore bpf_arch_text_poke()
1050-
* might not see that this is in BPF text section and
1051-
* bails out with -EINVAL. As these are unreachable since
1052-
* RCU grace period already passed, we simply skip them.
1053-
* 4) Also programs reaching refcount of zero while patching
1052+
* 3) Also programs reaching refcount of zero while patching
10541053
* is in progress is okay since we're protected under
10551054
* poke_mutex and untrack the programs before the JIT
1056-
* buffer is freed. When we're still in the middle of
1057-
* patching and suddenly kallsyms entry of the program
1058-
* gets evicted, we just skip the rest which is fine due
1059-
* to point 3).
1060-
* 5) Any other error happening below from bpf_arch_text_poke()
1061-
* is a unexpected bug.
1055+
* buffer is freed.
10621056
*/
10631057
if (!READ_ONCE(poke->tailcall_target_stable))
10641058
continue;
@@ -1068,39 +1062,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
10681062
poke->tail_call.key != key)
10691063
continue;
10701064

1071-
old_bypass_addr = old ? NULL : poke->bypass_addr;
1072-
old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
1073-
new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
1074-
1075-
if (new) {
1076-
ret = bpf_arch_text_poke(poke->tailcall_target,
1077-
BPF_MOD_JUMP,
1078-
old_addr, new_addr);
1079-
BUG_ON(ret < 0 && ret != -EINVAL);
1080-
if (!old) {
1081-
ret = bpf_arch_text_poke(poke->tailcall_bypass,
1082-
BPF_MOD_JUMP,
1083-
poke->bypass_addr,
1084-
NULL);
1085-
BUG_ON(ret < 0 && ret != -EINVAL);
1086-
}
1087-
} else {
1088-
ret = bpf_arch_text_poke(poke->tailcall_bypass,
1089-
BPF_MOD_JUMP,
1090-
old_bypass_addr,
1091-
poke->bypass_addr);
1092-
BUG_ON(ret < 0 && ret != -EINVAL);
1093-
/* let other CPUs finish the execution of program
1094-
* so that it will not possible to expose them
1095-
* to invalid nop, stack unwind, nop state
1096-
*/
1097-
if (!ret)
1098-
synchronize_rcu();
1099-
ret = bpf_arch_text_poke(poke->tailcall_target,
1100-
BPF_MOD_JUMP,
1101-
old_addr, NULL);
1102-
BUG_ON(ret < 0 && ret != -EINVAL);
1103-
}
1065+
bpf_arch_poke_desc_update(poke, new, old);
11041066
}
11051067
}
11061068
}

kernel/bpf/core.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -371,14 +371,18 @@ static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old,
371371
static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old,
372372
s32 end_new, s32 curr, const bool probe_pass)
373373
{
374-
const s32 off_min = S16_MIN, off_max = S16_MAX;
374+
s64 off_min, off_max, off;
375375
s32 delta = end_new - end_old;
376-
s32 off;
377376

378-
if (insn->code == (BPF_JMP32 | BPF_JA))
377+
if (insn->code == (BPF_JMP32 | BPF_JA)) {
379378
off = insn->imm;
380-
else
379+
off_min = S32_MIN;
380+
off_max = S32_MAX;
381+
} else {
381382
off = insn->off;
383+
off_min = S16_MIN;
384+
off_max = S16_MAX;
385+
}
382386

383387
if (curr < pos && curr + off + 1 >= end_old)
384388
off += delta;

net/xdp/xsk.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -947,7 +947,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
947947

948948
rcu_read_lock();
949949
if (xsk_check_common(xs))
950-
goto skip_tx;
950+
goto out;
951951

952952
pool = xs->pool;
953953

@@ -959,12 +959,11 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
959959
xsk_generic_xmit(sk);
960960
}
961961

962-
skip_tx:
963962
if (xs->rx && !xskq_prod_is_empty(xs->rx))
964963
mask |= EPOLLIN | EPOLLRDNORM;
965964
if (xs->tx && xsk_tx_writeable(xs))
966965
mask |= EPOLLOUT | EPOLLWRNORM;
967-
966+
out:
968967
rcu_read_unlock();
969968
return mask;
970969
}

tools/testing/selftests/bpf/prog_tests/tailcalls.c

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
// SPDX-License-Identifier: GPL-2.0
2+
#include <unistd.h>
23
#include <test_progs.h>
34
#include <network_helpers.h>
5+
#include "tailcall_poke.skel.h"
6+
47

58
/* test_tailcall_1 checks basic functionality by patching multiple locations
69
* in a single program for a single tail call slot with nop->jmp, jmp->nop
@@ -1105,6 +1108,85 @@ static void test_tailcall_bpf2bpf_fentry_entry(void)
11051108
bpf_object__close(tgt_obj);
11061109
}
11071110

1111+
#define JMP_TABLE "/sys/fs/bpf/jmp_table"
1112+
1113+
static int poke_thread_exit;
1114+
1115+
static void *poke_update(void *arg)
1116+
{
1117+
__u32 zero = 0, prog1_fd, prog2_fd, map_fd;
1118+
struct tailcall_poke *call = arg;
1119+
1120+
map_fd = bpf_map__fd(call->maps.jmp_table);
1121+
prog1_fd = bpf_program__fd(call->progs.call1);
1122+
prog2_fd = bpf_program__fd(call->progs.call2);
1123+
1124+
while (!poke_thread_exit) {
1125+
bpf_map_update_elem(map_fd, &zero, &prog1_fd, BPF_ANY);
1126+
bpf_map_update_elem(map_fd, &zero, &prog2_fd, BPF_ANY);
1127+
}
1128+
1129+
return NULL;
1130+
}
1131+
1132+
/*
1133+
* We are trying to hit prog array update during another program load
1134+
* that shares the same prog array map.
1135+
*
1136+
* For that we share the jmp_table map between two skeleton instances
1137+
* by pinning the jmp_table to same path. Then first skeleton instance
1138+
* periodically updates jmp_table in 'poke update' thread while we load
1139+
* the second skeleton instance in the main thread.
1140+
*/
1141+
static void test_tailcall_poke(void)
1142+
{
1143+
struct tailcall_poke *call, *test;
1144+
int err, cnt = 10;
1145+
pthread_t thread;
1146+
1147+
unlink(JMP_TABLE);
1148+
1149+
call = tailcall_poke__open_and_load();
1150+
if (!ASSERT_OK_PTR(call, "tailcall_poke__open"))
1151+
return;
1152+
1153+
err = bpf_map__pin(call->maps.jmp_table, JMP_TABLE);
1154+
if (!ASSERT_OK(err, "bpf_map__pin"))
1155+
goto out;
1156+
1157+
err = pthread_create(&thread, NULL, poke_update, call);
1158+
if (!ASSERT_OK(err, "new toggler"))
1159+
goto out;
1160+
1161+
while (cnt--) {
1162+
test = tailcall_poke__open();
1163+
if (!ASSERT_OK_PTR(test, "tailcall_poke__open"))
1164+
break;
1165+
1166+
err = bpf_map__set_pin_path(test->maps.jmp_table, JMP_TABLE);
1167+
if (!ASSERT_OK(err, "bpf_map__pin")) {
1168+
tailcall_poke__destroy(test);
1169+
break;
1170+
}
1171+
1172+
bpf_program__set_autoload(test->progs.test, true);
1173+
bpf_program__set_autoload(test->progs.call1, false);
1174+
bpf_program__set_autoload(test->progs.call2, false);
1175+
1176+
err = tailcall_poke__load(test);
1177+
tailcall_poke__destroy(test);
1178+
if (!ASSERT_OK(err, "tailcall_poke__load"))
1179+
break;
1180+
}
1181+
1182+
poke_thread_exit = 1;
1183+
ASSERT_OK(pthread_join(thread, NULL), "pthread_join");
1184+
1185+
out:
1186+
bpf_map__unpin(call->maps.jmp_table, JMP_TABLE);
1187+
tailcall_poke__destroy(call);
1188+
}
1189+
11081190
void test_tailcalls(void)
11091191
{
11101192
if (test__start_subtest("tailcall_1"))
@@ -1139,4 +1221,6 @@ void test_tailcalls(void)
11391221
test_tailcall_bpf2bpf_fentry_fexit();
11401222
if (test__start_subtest("tailcall_bpf2bpf_fentry_entry"))
11411223
test_tailcall_bpf2bpf_fentry_entry();
1224+
if (test__start_subtest("tailcall_poke"))
1225+
test_tailcall_poke();
11421226
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
#include <linux/bpf.h>
3+
#include <bpf/bpf_helpers.h>
4+
#include <bpf/bpf_tracing.h>
5+
6+
char _license[] SEC("license") = "GPL";
7+
8+
struct {
9+
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
10+
__uint(max_entries, 1);
11+
__uint(key_size, sizeof(__u32));
12+
__uint(value_size, sizeof(__u32));
13+
} jmp_table SEC(".maps");
14+
15+
SEC("?fentry/bpf_fentry_test1")
16+
int BPF_PROG(test, int a)
17+
{
18+
bpf_tail_call_static(ctx, &jmp_table, 0);
19+
return 0;
20+
}
21+
22+
SEC("fentry/bpf_fentry_test1")
23+
int BPF_PROG(call1, int a)
24+
{
25+
return 0;
26+
}
27+
28+
SEC("fentry/bpf_fentry_test1")
29+
int BPF_PROG(call2, int a)
30+
{
31+
return 0;
32+
}

0 commit comments

Comments
 (0)