From ccbacf0d3d2a2f470cbe5734298809008a047554 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Tue, 23 Sep 2025 14:49:13 +0100 Subject: [PATCH 01/10] selftests/bpf: remove unnecessary kfunc prototypes Remove unnecessary kfunc prototypes from test programs, these are provided by vmlinux.h Signed-off-by: Mykyta Yatsenko --- tools/testing/selftests/bpf/progs/ip_check_defrag.c | 5 ----- tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c | 5 ----- 2 files changed, 10 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/ip_check_defrag.c b/tools/testing/selftests/bpf/progs/ip_check_defrag.c index 645b2c9f7867c..0e87ad1ebcfa5 100644 --- a/tools/testing/selftests/bpf/progs/ip_check_defrag.c +++ b/tools/testing/selftests/bpf/progs/ip_check_defrag.c @@ -12,11 +12,6 @@ #define IP_OFFSET 0x1FFF #define NEXTHDR_FRAGMENT 44 -extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, - struct bpf_dynptr *ptr__uninit) __ksym; -extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, - void *buffer, uint32_t buffer__sz) __ksym; - volatile int shootdowns = 0; static bool is_frag_v4(struct iphdr *iph) diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c index ab9f9f2620edc..e2cbc5bda65ea 100644 --- a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c +++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c @@ -79,11 +79,6 @@ int with_invalid_ctx_access_test5(struct bpf_nf_ctx *ctx) return NF_ACCEPT; } -extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, - struct bpf_dynptr *ptr__uninit) __ksym; -extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, - void *buffer, uint32_t buffer__sz) __ksym; - SEC("netfilter") __description("netfilter test prog with skb and state read access") __success __failure_unpriv From a3cafa6aa3b0a9092eade8a0ca4fe3548d49c803 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Tue, 23 Sep 2025 15:24:38 +0100 Subject: [PATCH 02/10] bpf: widen dynptr size/offset to 64 bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dynptr currently caps size and offset at 24 bits, which isn’t sufficient for file-backed use cases; even 32 bits can be limiting. Refactor dynptr helpers/kfuncs to use 64-bit size and offset, ensuring consistency across the APIs. This change does not affect internals of xdp, skb or other dynptrs, which continue to behave as before. The widening enables large-file access support via dynptr, implemented in the next patches. Signed-off-by: Mykyta Yatsenko --- include/linux/bpf.h | 20 +++---- kernel/bpf/helpers.c | 58 +++++++++---------- kernel/trace/bpf_trace.c | 46 +++++++-------- tools/testing/selftests/bpf/bpf_kfuncs.h | 12 ++-- .../selftests/bpf/progs/dynptr_success.c | 12 ++-- 5 files changed, 74 insertions(+), 74 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ea2ed6771cc60..9ab38eaa6af93 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1380,19 +1380,19 @@ enum bpf_dynptr_type { BPF_DYNPTR_TYPE_SKB_META, }; -int bpf_dynptr_check_size(u32 size); -u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); -const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len); -void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len); +int bpf_dynptr_check_size(u64 size); +u64 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); +const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u64 len); +void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u64 len); bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr); -int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, - void *src, u32 len, u64 flags); -void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset, - void *buffer__opt, u32 buffer__szk); +int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u64 offset, + void *src, u64 len, u64 flags); +void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset, + void *buffer__opt, u64 buffer__szk); -static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len) +static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u64 offset, u64 len) { - u32 size = __bpf_dynptr_size(ptr); + u64 size = __bpf_dynptr_size(ptr); if (len > size || offset > size - len) return -E2BIG; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index c9fab9a356dfc..44f4a561f8453 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1685,19 +1685,19 @@ static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *pt return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT; } -u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr) +u64 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr) { return ptr->size & DYNPTR_SIZE_MASK; } -static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u32 new_size) +static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u64 new_size) { u32 metadata = ptr->size & ~DYNPTR_SIZE_MASK; - ptr->size = new_size | metadata; + ptr->size = (u32)new_size | metadata; } -int bpf_dynptr_check_size(u32 size) +int bpf_dynptr_check_size(u64 size) { return size > DYNPTR_MAX_SIZE ? -E2BIG : 0; } @@ -1751,8 +1751,8 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = { .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT | MEM_WRITE, }; -static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *src, - u32 offset, u64 flags) +static int __bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr_kern *src, + u64 offset, u64 flags) { enum bpf_dynptr_type type; int err; @@ -1788,8 +1788,8 @@ static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *s } } -BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src, - u32, offset, u64, flags) +BPF_CALL_5(bpf_dynptr_read, void *, dst, u64, len, const struct bpf_dynptr_kern *, src, + u64, offset, u64, flags) { return __bpf_dynptr_read(dst, len, src, offset, flags); } @@ -1805,8 +1805,8 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = { .arg5_type = ARG_ANYTHING, }; -int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src, - u32 len, u64 flags) +int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u64 offset, void *src, + u64 len, u64 flags) { enum bpf_dynptr_type type; int err; @@ -2681,12 +2681,12 @@ __bpf_kfunc struct task_struct *bpf_task_from_vpid(s32 vpid) * provided buffer, with its contents containing the data, if unable to obtain * direct pointer) */ -__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset, - void *buffer__opt, u32 buffer__szk) +__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u64 offset, + void *buffer__opt, u64 buffer__szk) { const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; enum bpf_dynptr_type type; - u32 len = buffer__szk; + u64 len = buffer__szk; int err; if (!ptr->data) @@ -2768,8 +2768,8 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset, * provided buffer, with its contents containing the data, if unable to obtain * direct pointer) */ -__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset, - void *buffer__opt, u32 buffer__szk) +__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset, + void *buffer__opt, u64 buffer__szk) { const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; @@ -2801,10 +2801,10 @@ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset, return bpf_dynptr_slice(p, offset, buffer__opt, buffer__szk); } -__bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u32 start, u32 end) +__bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u64 start, u64 end) { struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; - u32 size; + u64 size; if (!ptr->data || start > end) return -EINVAL; @@ -2837,7 +2837,7 @@ __bpf_kfunc bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *p) return __bpf_dynptr_is_rdonly(ptr); } -__bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr *p) +__bpf_kfunc u64 bpf_dynptr_size(const struct bpf_dynptr *p) { struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; @@ -2874,14 +2874,14 @@ __bpf_kfunc int bpf_dynptr_clone(const struct bpf_dynptr *p, * Copies data from source dynptr to destination dynptr. * Returns 0 on success; negative error, otherwise. */ -__bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off, - struct bpf_dynptr *src_ptr, u32 src_off, u32 size) +__bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u64 dst_off, + struct bpf_dynptr *src_ptr, u64 src_off, u64 size) { struct bpf_dynptr_kern *dst = (struct bpf_dynptr_kern *)dst_ptr; struct bpf_dynptr_kern *src = (struct bpf_dynptr_kern *)src_ptr; void *src_slice, *dst_slice; char buf[256]; - u32 off; + u64 off; src_slice = bpf_dynptr_slice(src_ptr, src_off, NULL, size); dst_slice = bpf_dynptr_slice_rdwr(dst_ptr, dst_off, NULL, size); @@ -2903,7 +2903,7 @@ __bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off, off = 0; while (off < size) { - u32 chunk_sz = min_t(u32, sizeof(buf), size - off); + u64 chunk_sz = min_t(u64, sizeof(buf), size - off); int err; err = __bpf_dynptr_read(buf, chunk_sz, src, src_off + off, 0); @@ -2929,10 +2929,10 @@ __bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off, * at @offset with the constant byte @val. * Returns 0 on success; negative error, otherwise. */ - __bpf_kfunc int bpf_dynptr_memset(struct bpf_dynptr *p, u32 offset, u32 size, u8 val) - { +__bpf_kfunc int bpf_dynptr_memset(struct bpf_dynptr *p, u64 offset, u64 size, u8 val) +{ struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; - u32 chunk_sz, write_off; + u64 chunk_sz, write_off; char buf[256]; void* slice; int err; @@ -2951,11 +2951,11 @@ __bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off, return err; /* Non-linear data under the dynptr, write from a local buffer */ - chunk_sz = min_t(u32, sizeof(buf), size); + chunk_sz = min_t(u64, sizeof(buf), size); memset(buf, val, chunk_sz); for (write_off = 0; write_off < size; write_off += chunk_sz) { - chunk_sz = min_t(u32, sizeof(buf), size - write_off); + chunk_sz = min_t(u64, sizeof(buf), size - write_off); err = __bpf_dynptr_write(ptr, offset + write_off, buf, chunk_sz, 0); if (err) return err; @@ -4414,7 +4414,7 @@ late_initcall(kfunc_init); /* Get a pointer to dynptr data up to len bytes for read only access. If * the dynptr doesn't have continuous data up to len bytes, return NULL. */ -const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len) +const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u64 len) { const struct bpf_dynptr *p = (struct bpf_dynptr *)ptr; @@ -4425,7 +4425,7 @@ const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len) * the dynptr doesn't have continuous data up to len bytes, or the dynptr * is read only, return NULL. */ -void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len) +void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u64 len) { if (__bpf_dynptr_is_rdonly(ptr)) return NULL; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 8f23f5273babf..7cc4f2e05ed2c 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -3372,13 +3372,13 @@ typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struc * direct calls into all the specific callback implementations * (copy_user_data_sleepable, copy_user_data_nofault, and so on) */ -static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 doff, u32 size, +static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u64 doff, u64 size, const void *unsafe_src, copy_fn_t str_copy_fn, struct task_struct *tsk) { struct bpf_dynptr_kern *dst; - u32 chunk_sz, off; + u64 chunk_sz, off; void *dst_slice; int cnt, err; char buf[256]; @@ -3392,7 +3392,7 @@ static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 do return -E2BIG; for (off = 0; off < size; off += chunk_sz - 1) { - chunk_sz = min_t(u32, sizeof(buf), size - off); + chunk_sz = min_t(u64, sizeof(buf), size - off); /* Expect str_copy_fn to return count of copied bytes, including * zero terminator. Next iteration increment off by chunk_sz - 1 to * overwrite NUL. @@ -3409,14 +3409,14 @@ static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 do return off; } -static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u32 doff, - u32 size, const void *unsafe_src, +static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u64 doff, + u64 size, const void *unsafe_src, copy_fn_t copy_fn, struct task_struct *tsk) { struct bpf_dynptr_kern *dst; void *dst_slice; char buf[256]; - u32 off, chunk_sz; + u64 off, chunk_sz; int err; dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size); @@ -3428,7 +3428,7 @@ static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u32 return -E2BIG; for (off = 0; off < size; off += chunk_sz) { - chunk_sz = min_t(u32, sizeof(buf), size - off); + chunk_sz = min_t(u64, sizeof(buf), size - off); err = copy_fn(buf, unsafe_src + off, chunk_sz, tsk); if (err) return err; @@ -3514,58 +3514,58 @@ __bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid return bpf_send_signal_common(sig, type, task, value); } -__bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u32 off, - u32 size, const void __user *unsafe_ptr__ign) +__bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void __user *unsafe_ptr__ign) { return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign, copy_user_data_nofault, NULL); } -__bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u32 off, - u32 size, const void *unsafe_ptr__ign) +__bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void *unsafe_ptr__ign) { return __bpf_dynptr_copy(dptr, off, size, unsafe_ptr__ign, copy_kernel_data_nofault, NULL); } -__bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u32 off, - u32 size, const void __user *unsafe_ptr__ign) +__bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void __user *unsafe_ptr__ign) { return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign, copy_user_str_nofault, NULL); } -__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u32 off, - u32 size, const void *unsafe_ptr__ign) +__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void *unsafe_ptr__ign) { return __bpf_dynptr_copy_str(dptr, off, size, unsafe_ptr__ign, copy_kernel_str_nofault, NULL); } -__bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u32 off, - u32 size, const void __user *unsafe_ptr__ign) +__bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void __user *unsafe_ptr__ign) { return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign, copy_user_data_sleepable, NULL); } -__bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u32 off, - u32 size, const void __user *unsafe_ptr__ign) +__bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void __user *unsafe_ptr__ign) { return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign, copy_user_str_sleepable, NULL); } -__bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u32 off, - u32 size, const void __user *unsafe_ptr__ign, +__bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void __user *unsafe_ptr__ign, struct task_struct *tsk) { return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign, copy_user_data_sleepable, tsk); } -__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u32 off, - u32 size, const void __user *unsafe_ptr__ign, +__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void __user *unsafe_ptr__ign, struct task_struct *tsk) { return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign, diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 794d44d19c886..60405477deb61 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -28,8 +28,8 @@ extern int bpf_dynptr_from_skb_meta(struct __sk_buff *skb, __u64 flags, * Either a direct pointer to the dynptr data or a pointer to the user-provided * buffer if unable to obtain a direct pointer */ -extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset, - void *buffer, __u32 buffer__szk) __ksym __weak; +extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u64 offset, + void *buffer, __u64 buffer__szk) __ksym __weak; /* Description * Obtain a read-write pointer to the dynptr's data @@ -37,13 +37,13 @@ extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset, * Either a direct pointer to the dynptr data or a pointer to the user-provided * buffer if unable to obtain a direct pointer */ -extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset, - void *buffer, __u32 buffer__szk) __ksym __weak; +extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u64 offset, + void *buffer, __u64 buffer__szk) __ksym __weak; -extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym __weak; +extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u64 start, __u64 end) __ksym __weak; extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym __weak; extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym __weak; -extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak; +extern __u64 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak; extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym __weak; /* Description diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index 127dea342e5a6..e0d672d93adf8 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -914,8 +914,8 @@ void *user_ptr; char expected_str[384]; __u32 test_len[7] = {0/* placeholder */, 0, 1, 2, 255, 256, 257}; -typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u32 off, - u32 size, const void *unsafe_ptr); +typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u64 off, + u64 size, const void *unsafe_ptr); /* Returns the offset just before the end of the maximum sized xdp fragment. * Any write larger than 32 bytes will be split between 2 fragments. @@ -1106,16 +1106,16 @@ int test_copy_from_user_str_dynptr(void *ctx) return 0; } -static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u32 off, - u32 size, const void *unsafe_ptr) +static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u64 off, + u64 size, const void *unsafe_ptr) { struct task_struct *task = bpf_get_current_task_btf(); return bpf_copy_from_user_task_dynptr(dptr, off, size, unsafe_ptr, task); } -static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u32 off, - u32 size, const void *unsafe_ptr) +static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u64 off, + u64 size, const void *unsafe_ptr) { struct task_struct *task = bpf_get_current_task_btf(); From b2b3535f7554fdd65ac7141b7eeba3786dbaca2d Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Tue, 23 Sep 2025 15:27:41 +0100 Subject: [PATCH 03/10] lib: extract freader into a separate files Move the freader implementation from buildid.{c,h} into a dedicated compilation unit, freader.{c,h}. This allows reuse of freader outside buildid, e.g. for file dynptr support added later. Includes are updated and symbols are exported as needed. No functional change intended. Signed-off-by: Mykyta Yatsenko --- include/linux/freader.h | 32 +++++++++ lib/Makefile | 2 +- lib/buildid.c | 145 +--------------------------------------- lib/freader.c | 133 ++++++++++++++++++++++++++++++++++++ 4 files changed, 167 insertions(+), 145 deletions(-) create mode 100644 include/linux/freader.h create mode 100644 lib/freader.c diff --git a/include/linux/freader.h b/include/linux/freader.h new file mode 100644 index 0000000000000..a08ea9f59945a --- /dev/null +++ b/include/linux/freader.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_FREADER_H +#define _LINUX_FREADER_H + +#include + +struct freader { + void *buf; + u32 buf_sz; + int err; + union { + struct { + struct file *file; + struct folio *folio; + void *addr; + loff_t folio_off; + bool may_fault; + }; + struct { + const char *data; + u64 data_sz; + }; + }; +}; + +void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz, + struct file *file, bool may_fault); +void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz); +const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz); +void freader_cleanup(struct freader *r); +int freader_err(struct freader *r); +#endif diff --git a/lib/Makefile b/lib/Makefile index 392ff808c9b90..4761885228fae 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -40,7 +40,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ nmi_backtrace.o win_minmax.o memcat_p.o \ - buildid.o objpool.o iomem_copy.o sys_info.o + buildid.o objpool.o iomem_copy.o sys_info.o freader.o lib-$(CONFIG_UNION_FIND) += union_find.o lib-$(CONFIG_PRINTK) += dump_stack.o diff --git a/lib/buildid.c b/lib/buildid.c index c4b0f376fb341..e770dc4b73d33 100644 --- a/lib/buildid.c +++ b/lib/buildid.c @@ -6,155 +6,12 @@ #include #include #include +#include #define BUILD_ID 3 #define MAX_PHDR_CNT 256 -struct freader { - void *buf; - u32 buf_sz; - int err; - union { - struct { - struct file *file; - struct folio *folio; - void *addr; - loff_t folio_off; - bool may_fault; - }; - struct { - const char *data; - u64 data_sz; - }; - }; -}; - -static void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz, - struct file *file, bool may_fault) -{ - memset(r, 0, sizeof(*r)); - r->buf = buf; - r->buf_sz = buf_sz; - r->file = file; - r->may_fault = may_fault; -} - -static void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz) -{ - memset(r, 0, sizeof(*r)); - r->data = data; - r->data_sz = data_sz; -} - -static void freader_put_folio(struct freader *r) -{ - if (!r->folio) - return; - kunmap_local(r->addr); - folio_put(r->folio); - r->folio = NULL; -} - -static int freader_get_folio(struct freader *r, loff_t file_off) -{ - /* check if we can just reuse current folio */ - if (r->folio && file_off >= r->folio_off && - file_off < r->folio_off + folio_size(r->folio)) - return 0; - - freader_put_folio(r); - - /* reject secretmem folios created with memfd_secret() */ - if (secretmem_mapping(r->file->f_mapping)) - return -EFAULT; - - r->folio = filemap_get_folio(r->file->f_mapping, file_off >> PAGE_SHIFT); - - /* if sleeping is allowed, wait for the page, if necessary */ - if (r->may_fault && (IS_ERR(r->folio) || !folio_test_uptodate(r->folio))) { - filemap_invalidate_lock_shared(r->file->f_mapping); - r->folio = read_cache_folio(r->file->f_mapping, file_off >> PAGE_SHIFT, - NULL, r->file); - filemap_invalidate_unlock_shared(r->file->f_mapping); - } - - if (IS_ERR(r->folio) || !folio_test_uptodate(r->folio)) { - if (!IS_ERR(r->folio)) - folio_put(r->folio); - r->folio = NULL; - return -EFAULT; - } - - r->folio_off = folio_pos(r->folio); - r->addr = kmap_local_folio(r->folio, 0); - - return 0; -} - -static const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz) -{ - size_t folio_sz; - - /* provided internal temporary buffer should be sized correctly */ - if (WARN_ON(r->buf && sz > r->buf_sz)) { - r->err = -E2BIG; - return NULL; - } - - if (unlikely(file_off + sz < file_off)) { - r->err = -EOVERFLOW; - return NULL; - } - - /* working with memory buffer is much more straightforward */ - if (!r->buf) { - if (file_off + sz > r->data_sz) { - r->err = -ERANGE; - return NULL; - } - return r->data + file_off; - } - - /* fetch or reuse folio for given file offset */ - r->err = freader_get_folio(r, file_off); - if (r->err) - return NULL; - - /* if requested data is crossing folio boundaries, we have to copy - * everything into our local buffer to keep a simple linear memory - * access interface - */ - folio_sz = folio_size(r->folio); - if (file_off + sz > r->folio_off + folio_sz) { - int part_sz = r->folio_off + folio_sz - file_off; - - /* copy the part that resides in the current folio */ - memcpy(r->buf, r->addr + (file_off - r->folio_off), part_sz); - - /* fetch next folio */ - r->err = freader_get_folio(r, r->folio_off + folio_sz); - if (r->err) - return NULL; - - /* copy the rest of requested data */ - memcpy(r->buf + part_sz, r->addr, sz - part_sz); - - return r->buf; - } - - /* if data fits in a single folio, just return direct pointer */ - return r->addr + (file_off - r->folio_off); -} - -static void freader_cleanup(struct freader *r) -{ - if (!r->buf) - return; /* non-file-backed mode */ - - freader_put_folio(r); -} - /* * Parse build id from the note segment. This logic can be shared between * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are diff --git a/lib/freader.c b/lib/freader.c new file mode 100644 index 0000000000000..32a17d137b32a --- /dev/null +++ b/lib/freader.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include + +void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz, + struct file *file, bool may_fault) +{ + memset(r, 0, sizeof(*r)); + r->buf = buf; + r->buf_sz = buf_sz; + r->file = file; + r->may_fault = may_fault; +} + +void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz) +{ + memset(r, 0, sizeof(*r)); + r->data = data; + r->data_sz = data_sz; +} + +static void freader_put_folio(struct freader *r) +{ + if (!r->folio) + return; + kunmap_local(r->addr); + folio_put(r->folio); + r->folio = NULL; +} + +static int freader_get_folio(struct freader *r, loff_t file_off) +{ + /* check if we can just reuse current folio */ + if (r->folio && file_off >= r->folio_off && + file_off < r->folio_off + folio_size(r->folio)) + return 0; + + freader_put_folio(r); + + /* reject secretmem folios created with memfd_secret() */ + if (secretmem_mapping(r->file->f_mapping)) + return -EFAULT; + + r->folio = filemap_get_folio(r->file->f_mapping, file_off >> PAGE_SHIFT); + + /* if sleeping is allowed, wait for the page, if necessary */ + if (r->may_fault && (IS_ERR(r->folio) || !folio_test_uptodate(r->folio))) { + filemap_invalidate_lock_shared(r->file->f_mapping); + r->folio = read_cache_folio(r->file->f_mapping, file_off >> PAGE_SHIFT, + NULL, r->file); + filemap_invalidate_unlock_shared(r->file->f_mapping); + } + + if (IS_ERR(r->folio) || !folio_test_uptodate(r->folio)) { + if (!IS_ERR(r->folio)) + folio_put(r->folio); + r->folio = NULL; + return -EFAULT; + } + + r->folio_off = folio_pos(r->folio); + r->addr = kmap_local_folio(r->folio, 0); + + return 0; +} + +const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz) +{ + size_t folio_sz; + + /* provided internal temporary buffer should be sized correctly */ + if (WARN_ON(r->buf && sz > r->buf_sz)) { + r->err = -E2BIG; + return NULL; + } + + if (unlikely(file_off + sz < file_off)) { + r->err = -EOVERFLOW; + return NULL; + } + + /* working with memory buffer is much more straightforward */ + if (!r->buf) { + if (file_off + sz > r->data_sz) { + r->err = -ERANGE; + return NULL; + } + return r->data + file_off; + } + + /* fetch or reuse folio for given file offset */ + r->err = freader_get_folio(r, file_off); + if (r->err) + return NULL; + + /* if requested data is crossing folio boundaries, we have to copy + * everything into our local buffer to keep a simple linear memory + * access interface + */ + folio_sz = folio_size(r->folio); + if (file_off + sz > r->folio_off + folio_sz) { + int part_sz = r->folio_off + folio_sz - file_off; + + /* copy the part that resides in the current folio */ + memcpy(r->buf, r->addr + (file_off - r->folio_off), part_sz); + + /* fetch next folio */ + r->err = freader_get_folio(r, r->folio_off + folio_sz); + if (r->err) + return NULL; + + /* copy the rest of requested data */ + memcpy(r->buf + part_sz, r->addr, sz - part_sz); + + return r->buf; + } + + /* if data fits in a single folio, just return direct pointer */ + return r->addr + (file_off - r->folio_off); +} + +void freader_cleanup(struct freader *r) +{ + if (!r->buf) + return; /* non-file-backed mode */ + + freader_put_folio(r); +} From d3e5abdb7501078bf0b88fe49be4319b3f294933 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Wed, 24 Sep 2025 18:47:31 +0100 Subject: [PATCH 04/10] lib/freader: support reading more than 2 folios MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit freader_fetch currently reads from at most two folios. When a read spans into a third folio, the overflow bytes are copied adjacent to the second folio’s data instead of being handled as a separate folio. This patch modifies fetch algorithm to support reading from many folios. Signed-off-by: Mykyta Yatsenko --- lib/freader.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/lib/freader.c b/lib/freader.c index 32a17d137b32a..f73b594a137d9 100644 --- a/lib/freader.c +++ b/lib/freader.c @@ -105,17 +105,22 @@ const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz) folio_sz = folio_size(r->folio); if (file_off + sz > r->folio_off + folio_sz) { int part_sz = r->folio_off + folio_sz - file_off; - - /* copy the part that resides in the current folio */ - memcpy(r->buf, r->addr + (file_off - r->folio_off), part_sz); - - /* fetch next folio */ - r->err = freader_get_folio(r, r->folio_off + folio_sz); - if (r->err) - return NULL; - - /* copy the rest of requested data */ - memcpy(r->buf + part_sz, r->addr, sz - part_sz); + size_t dst_off = 0, src_off = file_off - r->folio_off; + + do { + memcpy(r->buf + dst_off, r->addr + src_off, part_sz); + sz -= part_sz; + if (sz == 0) + break; + /* fetch next folio */ + r->err = freader_get_folio(r, r->folio_off + folio_sz); + if (r->err) + return NULL; + folio_sz = folio_size(r->folio); + src_off = 0; /* read from the beginning, starting second folio */ + dst_off += part_sz; + part_sz = min_t(u64, sz, folio_sz); + } while (sz); return r->buf; } From d823b9a74fd318b2955ca3a9a5f2d52c6a3d79f4 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Thu, 2 Oct 2025 23:06:07 +0100 Subject: [PATCH 05/10] bpf: verifier: centralize const dynptr check in unmark_stack_slots_dynptr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the const dynptr check into unmark_stack_slots_dynptr() so callers don’t have to duplicate it. This puts the validation next to the code that manipulates dynptr stack slots and allows upcoming changes to reuse it directly. Signed-off-by: Mykyta Yatsenko --- kernel/bpf/verifier.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e892df386eed7..0b4ea18584bba 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -812,6 +812,10 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re struct bpf_func_state *state = func(env, reg); int spi, ref_obj_id, i; + if (reg->type == CONST_PTR_TO_DYNPTR) { + verifier_bug(env, "CONST_PTR_TO_DYNPTR cannot be released"); + return -EFAULT; + } spi = dynptr_get_spi(env, reg); if (spi < 0) return spi; @@ -11487,10 +11491,6 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn * is safe to do directly. */ if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) { - if (regs[meta.release_regno].type == CONST_PTR_TO_DYNPTR) { - verifier_bug(env, "CONST_PTR_TO_DYNPTR cannot be released"); - return -EFAULT; - } err = unmark_stack_slots_dynptr(env, ®s[meta.release_regno]); } else if (func_id == BPF_FUNC_kptr_xchg && meta.ref_obj_id) { u32 ref_obj_id = meta.ref_obj_id; From 8aa898eec0145c7a90f65f03132c6329780455da Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Wed, 10 Sep 2025 23:24:40 +0100 Subject: [PATCH 06/10] bpf: add plumbing for file-backed dynptr Add the necessary verifier plumbing for the new file-backed dynptr type. Introduce two kfuncs for its lifecycle management: * bpf_dynptr_from_file() for initialization * bpf_dynptr_file_discard() for destruction Currently there is no mechanism for kfunc to release dynptr, this patch add one: * Introduce is_dynptr_release_arg() to tell if given dynptr argument should be released * Set meta->release_regno and regs[regno].ref_obj_id to make release happen Signed-off-by: Mykyta Yatsenko --- include/linux/bpf.h | 7 ++++++- kernel/bpf/helpers.c | 12 ++++++++++++ kernel/bpf/log.c | 2 ++ kernel/bpf/verifier.c | 22 ++++++++++++++++++++-- 4 files changed, 40 insertions(+), 3 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9ab38eaa6af93..bd70117b8e84c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -785,12 +785,15 @@ enum bpf_type_flag { /* DYNPTR points to skb_metadata_end()-skb_metadata_len() */ DYNPTR_TYPE_SKB_META = BIT(19 + BPF_BASE_TYPE_BITS), + /* DYNPTR points to file */ + DYNPTR_TYPE_FILE = BIT(20 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; #define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \ - | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META) + | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META | DYNPTR_TYPE_FILE) /* Max number of base types. */ #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) @@ -1378,6 +1381,8 @@ enum bpf_dynptr_type { BPF_DYNPTR_TYPE_XDP, /* Points to skb_metadata_end()-skb_metadata_len() */ BPF_DYNPTR_TYPE_SKB_META, + /* Underlying data is a file */ + BPF_DYNPTR_TYPE_FILE, }; int bpf_dynptr_check_size(u64 size); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 44f4a561f8453..6f6aba03dda85 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -4199,6 +4199,16 @@ __bpf_kfunc int bpf_task_work_schedule_resume(struct task_struct *task, struct b return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_RESUME); } +__bpf_kfunc int bpf_dynptr_from_file(struct file *file, u32 flags, struct bpf_dynptr *ptr__uninit) +{ + return 0; +} + +__bpf_kfunc int bpf_dynptr_file_discard(struct bpf_dynptr *dynptr) +{ + return 0; +} + __bpf_kfunc_end_defs(); static void bpf_task_work_cancel_scheduled(struct irq_work *irq_work) @@ -4374,6 +4384,8 @@ BTF_ID_FLAGS(func, bpf_cgroup_read_xattr, KF_RCU) BTF_ID_FLAGS(func, bpf_stream_vprintk, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_task_work_schedule_signal, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_task_work_schedule_resume, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_dynptr_from_file) +BTF_ID_FLAGS(func, bpf_dynptr_file_discard) BTF_KFUNCS_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index f50533169cc34..70221aafc35c0 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -500,6 +500,8 @@ const char *dynptr_type_str(enum bpf_dynptr_type type) return "xdp"; case BPF_DYNPTR_TYPE_SKB_META: return "skb_meta"; + case BPF_DYNPTR_TYPE_FILE: + return "file"; case BPF_DYNPTR_TYPE_INVALID: return ""; default: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0b4ea18584bba..e4441155a4bf9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -676,6 +676,8 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type) return BPF_DYNPTR_TYPE_XDP; case DYNPTR_TYPE_SKB_META: return BPF_DYNPTR_TYPE_SKB_META; + case DYNPTR_TYPE_FILE: + return BPF_DYNPTR_TYPE_FILE; default: return BPF_DYNPTR_TYPE_INVALID; } @@ -694,6 +696,8 @@ static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type) return DYNPTR_TYPE_XDP; case BPF_DYNPTR_TYPE_SKB_META: return DYNPTR_TYPE_SKB_META; + case BPF_DYNPTR_TYPE_FILE: + return DYNPTR_TYPE_FILE; default: return 0; } @@ -701,7 +705,7 @@ static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type) static bool dynptr_type_refcounted(enum bpf_dynptr_type type) { - return type == BPF_DYNPTR_TYPE_RINGBUF; + return type == BPF_DYNPTR_TYPE_RINGBUF || type == BPF_DYNPTR_TYPE_FILE; } static void __mark_dynptr_reg(struct bpf_reg_state *reg, @@ -12258,6 +12262,8 @@ enum special_kfunc_type { KF_bpf_res_spin_unlock, KF_bpf_res_spin_lock_irqsave, KF_bpf_res_spin_unlock_irqrestore, + KF_bpf_dynptr_from_file, + KF_bpf_dynptr_file_discard, KF___bpf_trap, KF_bpf_task_work_schedule_signal, KF_bpf_task_work_schedule_resume, @@ -12330,6 +12336,8 @@ BTF_ID(func, bpf_res_spin_lock) BTF_ID(func, bpf_res_spin_unlock) BTF_ID(func, bpf_res_spin_lock_irqsave) BTF_ID(func, bpf_res_spin_unlock_irqrestore) +BTF_ID(func, bpf_dynptr_from_file) +BTF_ID(func, bpf_dynptr_file_discard) BTF_ID(func, __bpf_trap) BTF_ID(func, bpf_task_work_schedule_signal) BTF_ID(func, bpf_task_work_schedule_resume) @@ -13293,6 +13301,11 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ dynptr_arg_type |= DYNPTR_TYPE_XDP; } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb_meta]) { dynptr_arg_type |= DYNPTR_TYPE_SKB_META; + } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) { + dynptr_arg_type |= DYNPTR_TYPE_FILE; + } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_file_discard]) { + dynptr_arg_type |= DYNPTR_TYPE_FILE; + meta->release_regno = regno; } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] && (dynptr_arg_type & MEM_UNINIT)) { enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type; @@ -13969,7 +13982,12 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now. */ if (meta.release_regno) { - err = release_reference(env, regs[meta.release_regno].ref_obj_id); + struct bpf_reg_state *reg = ®s[meta.release_regno]; + + if (meta.initialized_dynptr.ref_obj_id) + err = unmark_stack_slots_dynptr(env, reg); + else + err = release_reference(env, reg->ref_obj_id); if (err) { verbose(env, "kfunc %s#%d reference has not been acquired before\n", func_name, meta.func_id); From 2aae9040a7d17a5f0d623f4f66417275faf070dc Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Wed, 10 Sep 2025 23:46:04 +0100 Subject: [PATCH 07/10] bpf: add kfuncs and helpers support for file dynptrs Add support for file dynptr. Introduce struct bpf_dynptr_file_impl to hold internal state for file dynptrs, with 64-bit size and offset support. Introduce lifecycle management kfuncs: - bpf_dynptr_from_file() for initialization - bpf_dynptr_file_discard() for destruction Extend existing helpers to support file dynptrs in: - bpf_dynptr_read() - bpf_dynptr_slice() Signed-off-by: Mykyta Yatsenko --- kernel/bpf/helpers.c | 97 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 6f6aba03dda85..4bba516599c79 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "../../lib/kstrtox.h" @@ -1657,6 +1658,13 @@ static const struct bpf_func_proto bpf_kptr_xchg_proto = { .arg2_btf_id = BPF_PTR_POISON, }; +struct bpf_dynptr_file_impl { + struct freader freader; + /* 64 bit offset and size overriding 32 bit ones in bpf_dynptr_kern */ + u64 offset; + u64 size; +}; + /* Since the upper 8 bits of dynptr->size is reserved, the * maximum supported size is 2^24 - 1. */ @@ -1687,13 +1695,36 @@ static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *pt u64 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr) { + if (bpf_dynptr_get_type(ptr) == BPF_DYNPTR_TYPE_FILE) { + struct bpf_dynptr_file_impl *df = ptr->data; + + return df->size; + } + return ptr->size & DYNPTR_SIZE_MASK; } +static void bpf_dynptr_advance_offset(struct bpf_dynptr_kern *ptr, u64 off) +{ + if (bpf_dynptr_get_type(ptr) == BPF_DYNPTR_TYPE_FILE) { + struct bpf_dynptr_file_impl *df = ptr->data; + + df->offset += off; + return; + } + ptr->offset += off; +} + static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u64 new_size) { u32 metadata = ptr->size & ~DYNPTR_SIZE_MASK; + if (bpf_dynptr_get_type(ptr) == BPF_DYNPTR_TYPE_FILE) { + struct bpf_dynptr_file_impl *df = ptr->data; + + df->size = new_size; + return; + } ptr->size = (u32)new_size | metadata; } @@ -1702,6 +1733,25 @@ int bpf_dynptr_check_size(u64 size) return size > DYNPTR_MAX_SIZE ? -E2BIG : 0; } +static int bpf_file_fetch_bytes(struct bpf_dynptr_file_impl *df, u64 offset, void *buf, u64 len) +{ + const void *ptr; + + if (!buf || len == 0) + return -EINVAL; + + df->freader.buf = buf; + df->freader.buf_sz = len; + ptr = freader_fetch(&df->freader, offset + df->offset, len); + if (!ptr) + return df->freader.err; + + if (ptr != buf) /* Force copying into the buffer */ + memcpy(buf, ptr, len); + + return 0; +} + void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, enum bpf_dynptr_type type, u32 offset, u32 size) { @@ -1782,6 +1832,8 @@ static int __bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr_kern *s case BPF_DYNPTR_TYPE_SKB_META: memmove(dst, bpf_skb_meta_pointer(src->data, src->offset + offset), len); return 0; + case BPF_DYNPTR_TYPE_FILE: + return bpf_file_fetch_bytes(src->data, offset, dst, len); default: WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type); return -EFAULT; @@ -2177,6 +2229,35 @@ void bpf_rb_root_free(const struct btf_field *field, void *rb_root, } } +enum bpf_is_sleepable { + MAY_SLEEP, + MAY_NOT_SLEEP, +}; + +static int make_file_dynptr(struct file *file, u32 flags, enum bpf_is_sleepable sleepable, + struct bpf_dynptr_kern *ptr) +{ + struct bpf_dynptr_file_impl *state; + + /* flags is currently unsupported */ + if (flags) { + bpf_dynptr_set_null(ptr); + return -EINVAL; + } + + state = bpf_mem_alloc(&bpf_global_ma, sizeof(struct bpf_dynptr_file_impl)); + if (!state) { + bpf_dynptr_set_null(ptr); + return -ENOMEM; + } + state->offset = 0; + state->size = U64_MAX; /* Don't restrict size, as file may change anyways */ + freader_init_from_file(&state->freader, NULL, 0, file, sleepable == MAY_SLEEP); + bpf_dynptr_init(ptr, state, BPF_DYNPTR_TYPE_FILE, 0, 0); + bpf_dynptr_set_rdonly(ptr); + return 0; +} + __bpf_kfunc_start_defs(); __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign) @@ -2720,6 +2801,9 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u64 offset, } case BPF_DYNPTR_TYPE_SKB_META: return bpf_skb_meta_pointer(ptr->data, ptr->offset + offset); + case BPF_DYNPTR_TYPE_FILE: + err = bpf_file_fetch_bytes(ptr->data, offset, buffer__opt, buffer__szk); + return err ? NULL : buffer__opt; default: WARN_ONCE(true, "unknown dynptr type %d\n", type); return NULL; @@ -2814,7 +2898,7 @@ __bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u64 start, u64 end if (start > size || end > size) return -ERANGE; - ptr->offset += start; + bpf_dynptr_advance_offset(ptr, start); bpf_dynptr_set_size(ptr, end - start); return 0; @@ -4201,11 +4285,20 @@ __bpf_kfunc int bpf_task_work_schedule_resume(struct task_struct *task, struct b __bpf_kfunc int bpf_dynptr_from_file(struct file *file, u32 flags, struct bpf_dynptr *ptr__uninit) { - return 0; + return make_file_dynptr(file, flags, MAY_NOT_SLEEP, (struct bpf_dynptr_kern *)ptr__uninit); } __bpf_kfunc int bpf_dynptr_file_discard(struct bpf_dynptr *dynptr) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)dynptr; + struct bpf_dynptr_file_impl *df; + + if (bpf_dynptr_get_type(ptr) == BPF_DYNPTR_TYPE_INVALID) + return 0; + + df = ptr->data; + freader_cleanup(&df->freader); + bpf_mem_free(&bpf_global_ma, df); return 0; } From 2a42e96c3a1fa80e280b2ba7490b8d489422ccac Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Thu, 2 Oct 2025 01:17:02 +0100 Subject: [PATCH 08/10] bpf: verifier: refactor kfunc specialization Move kfunc specialization (function address substitution) to later stage of verification to support a new use case, where we need to take into consideration whether kfunc is called in sleepable context. Minor refactoring in add_kfunc_call(), making sure that if function fails, kfunc desc is not added to tab->descs (previously it could be added or not, depending on what failed). Signed-off-by: Mykyta Yatsenko --- kernel/bpf/verifier.c | 97 ++++++++++++++++++++++++++----------------- 1 file changed, 59 insertions(+), 38 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e4441155a4bf9..aacefa3d05443 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -209,8 +209,6 @@ static void invalidate_non_owning_refs(struct bpf_verifier_env *env); static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env); static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg); -static void specialize_kfunc(struct bpf_verifier_env *env, - u32 func_id, u16 offset, unsigned long *addr); static bool is_trusted_reg(const struct bpf_reg_state *reg); static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) @@ -3105,6 +3103,10 @@ struct bpf_kfunc_btf_tab { u32 nr_descs; }; +static unsigned long kfunc_call_imm(unsigned long func_addr, u32 func_id); + +static void specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc); + static int kfunc_desc_cmp_by_id_off(const void *a, const void *b) { const struct bpf_kfunc_desc *d0 = a; @@ -3122,7 +3124,7 @@ static int kfunc_btf_cmp_by_off(const void *a, const void *b) return d0->offset - d1->offset; } -static const struct bpf_kfunc_desc * +static struct bpf_kfunc_desc * find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset) { struct bpf_kfunc_desc desc = { @@ -3245,6 +3247,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) { const struct btf_type *func, *func_proto; struct bpf_kfunc_btf_tab *btf_tab; + struct btf_func_model func_model; struct bpf_kfunc_desc_tab *tab; struct bpf_prog_aux *prog_aux; struct bpf_kfunc_desc *desc; @@ -3334,19 +3337,6 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) func_name); return -EINVAL; } - specialize_kfunc(env, func_id, offset, &addr); - - if (bpf_jit_supports_far_kfunc_call()) { - call_imm = func_id; - } else { - call_imm = BPF_CALL_IMM(addr); - /* Check whether the relative offset overflows desc->imm */ - if ((unsigned long)(s32)call_imm != call_imm) { - verbose(env, "address of kernel function %s is out of range\n", - func_name); - return -EINVAL; - } - } if (bpf_dev_bound_kfunc_id(func_id)) { err = bpf_dev_bound_kfunc_check(&env->log, prog_aux); @@ -3354,18 +3344,29 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) return err; } + err = btf_distill_func_proto(&env->log, desc_btf, + func_proto, func_name, + &func_model); + if (err) + return err; + + call_imm = kfunc_call_imm(addr, func_id); + /* Check whether the relative offset overflows desc->imm */ + if ((unsigned long)(s32)call_imm != call_imm) { + verbose(env, "address of kernel function %s is out of range\n", + func_name); + return -EINVAL; + } + desc = &tab->descs[tab->nr_descs++]; desc->func_id = func_id; - desc->imm = call_imm; desc->offset = offset; desc->addr = addr; - err = btf_distill_func_proto(&env->log, desc_btf, - func_proto, func_name, - &desc->func_model); - if (!err) - sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), - kfunc_desc_cmp_by_id_off, NULL); - return err; + desc->imm = call_imm; + desc->func_model = func_model; + sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), + kfunc_desc_cmp_by_id_off, NULL); + return 0; } static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b) @@ -21822,21 +21823,32 @@ static int fixup_call_args(struct bpf_verifier_env *env) return err; } +static unsigned long kfunc_call_imm(unsigned long func_addr, u32 func_id) +{ + if (bpf_jit_supports_far_kfunc_call()) + return func_id; + + return BPF_CALL_IMM(func_addr); +} + /* replace a generic kfunc with a specialized version if necessary */ -static void specialize_kfunc(struct bpf_verifier_env *env, - u32 func_id, u16 offset, unsigned long *addr) +static void specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc) { + struct bpf_prog_aux *prog_aux = env->prog->aux; + struct bpf_kfunc_desc_tab *tab = prog_aux->kfunc_tab; struct bpf_prog *prog = env->prog; bool seen_direct_write; void *xdp_kfunc; bool is_rdonly; + u32 func_id = desc->func_id; + u16 offset = desc->offset; + unsigned long call_imm; + unsigned long addr = 0; if (bpf_dev_bound_kfunc_id(func_id)) { xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id); - if (xdp_kfunc) { - *addr = (unsigned long)xdp_kfunc; - return; - } + if (xdp_kfunc) + addr = (unsigned long)xdp_kfunc; /* fallback to default kfunc when not supported by netdev */ } @@ -21848,21 +21860,28 @@ static void specialize_kfunc(struct bpf_verifier_env *env, is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE); if (is_rdonly) - *addr = (unsigned long)bpf_dynptr_from_skb_rdonly; + addr = (unsigned long)bpf_dynptr_from_skb_rdonly; /* restore env->seen_direct_write to its original value, since * may_access_direct_pkt_data mutates it */ env->seen_direct_write = seen_direct_write; + } else if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr]) { + if (bpf_lsm_has_d_inode_locked(prog)) + addr = (unsigned long)bpf_set_dentry_xattr_locked; + } else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr]) { + if (bpf_lsm_has_d_inode_locked(prog)) + addr = (unsigned long)bpf_remove_dentry_xattr_locked; } - if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr] && - bpf_lsm_has_d_inode_locked(prog)) - *addr = (unsigned long)bpf_set_dentry_xattr_locked; + if (!addr) /* Nothing to patch with */ + return; - if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr] && - bpf_lsm_has_d_inode_locked(prog)) - *addr = (unsigned long)bpf_remove_dentry_xattr_locked; + call_imm = kfunc_call_imm(addr, func_id); + desc->imm = call_imm; + desc->addr = addr; + sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), + kfunc_desc_cmp_by_id_off, NULL); } static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux, @@ -21885,7 +21904,7 @@ static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux, static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_insn *insn_buf, int insn_idx, int *cnt) { - const struct bpf_kfunc_desc *desc; + struct bpf_kfunc_desc *desc; if (!insn->imm) { verbose(env, "invalid kernel function call not eliminated in verifier pass\n"); @@ -21905,6 +21924,8 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EFAULT; } + specialize_kfunc(env, desc); + if (!bpf_jit_supports_far_kfunc_call()) insn->imm = BPF_CALL_IMM(desc->addr); if (insn->off) From 77f9db77e9085a97ac0cfcff5eebed32d322b15d Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Thu, 25 Sep 2025 15:41:58 +0100 Subject: [PATCH 09/10] bpf: dispatch to sleepable file dynptr File dynptr reads may sleep when the requested folios are not in the page cache. To avoid sleeping in non-sleepable contexts while still supporting valid sleepable use, given that dynptrs are non-sleepable by default, enable sleeping only when bpf_dynptr_from_file() is invoked from a sleepable context. This change: * Introduces a sleepable constructor: bpf_dynptr_from_file_sleepable() * Detects whether the kfunc is called in a sleepable context and stores the result in bpf_insn_aux_data (kfunc_in_sleepable_ctx) * Rewrites bpf_dynptr_from_file() calls to the sleepable variant when kfunc_in_sleepable_ctx is set Signed-off-by: Mykyta Yatsenko --- include/linux/bpf.h | 3 +++ include/linux/bpf_verifier.h | 2 ++ kernel/bpf/helpers.c | 5 +++++ kernel/bpf/verifier.c | 12 +++++++++--- 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bd70117b8e84c..9da7460e078cd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -663,6 +663,9 @@ int map_check_no_btf(const struct bpf_map *map, bool bpf_map_meta_equal(const struct bpf_map *meta0, const struct bpf_map *meta1); +int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags, + struct bpf_dynptr *ptr__uninit); + extern const struct bpf_map_ops bpf_map_offload_ops; /* bpf_type_flag contains a set of flags that are applicable to the values of diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 4c497e839526a..6078d5e9b5356 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -581,6 +581,8 @@ struct bpf_insn_aux_data { u32 scc; /* registers alive before this instruction. */ u16 live_regs_before; + /* kfunc is called in sleepable context */ + bool kfunc_in_sleepable_ctx; }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 4bba516599c79..f452e22333fef 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -4288,6 +4288,11 @@ __bpf_kfunc int bpf_dynptr_from_file(struct file *file, u32 flags, struct bpf_dy return make_file_dynptr(file, flags, MAY_NOT_SLEEP, (struct bpf_dynptr_kern *)ptr__uninit); } +int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags, struct bpf_dynptr *ptr__uninit) +{ + return make_file_dynptr(file, flags, MAY_SLEEP, (struct bpf_dynptr_kern *)ptr__uninit); +} + __bpf_kfunc int bpf_dynptr_file_discard(struct bpf_dynptr *dynptr) { struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)dynptr; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index aacefa3d05443..82762eab3f170 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3105,7 +3105,8 @@ struct bpf_kfunc_btf_tab { static unsigned long kfunc_call_imm(unsigned long func_addr, u32 func_id); -static void specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc); +static void specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, + int insn_idx); static int kfunc_desc_cmp_by_id_off(const void *a, const void *b) { @@ -13833,6 +13834,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, insn_aux = &env->insn_aux_data[insn_idx]; insn_aux->is_iter_next = is_iter_next_kfunc(&meta); + insn_aux->kfunc_in_sleepable_ctx = in_sleepable(env); if (!insn->off && (insn->imm == special_kfunc_list[KF_bpf_res_spin_lock] || @@ -21832,7 +21834,8 @@ static unsigned long kfunc_call_imm(unsigned long func_addr, u32 func_id) } /* replace a generic kfunc with a specialized version if necessary */ -static void specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc) +static void specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, + int insn_idx) { struct bpf_prog_aux *prog_aux = env->prog->aux; struct bpf_kfunc_desc_tab *tab = prog_aux->kfunc_tab; @@ -21872,6 +21875,9 @@ static void specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc } else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr]) { if (bpf_lsm_has_d_inode_locked(prog)) addr = (unsigned long)bpf_remove_dentry_xattr_locked; + } else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) { + if (env->insn_aux_data[insn_idx].kfunc_in_sleepable_ctx) + addr = (unsigned long)bpf_dynptr_from_file_sleepable; } if (!addr) /* Nothing to patch with */ @@ -21924,7 +21930,7 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EFAULT; } - specialize_kfunc(env, desc); + specialize_kfunc(env, desc, insn_idx); if (!bpf_jit_supports_far_kfunc_call()) insn->imm = BPF_CALL_IMM(desc->addr); From 3a86196fa5320364fa82bb622d2737b5fc479445 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Wed, 10 Sep 2025 23:53:52 +0100 Subject: [PATCH 10/10] selftests/bpf: add file dynptr tests Introducing selftests for validating file-backed dynptr works as expected. * validate implementation supports dynptr slice and read operations * validate destructors should be paired with initializers Signed-off-by: Mykyta Yatsenko --- .../selftests/bpf/prog_tests/file_reader.c | 81 ++++++ .../testing/selftests/bpf/progs/file_reader.c | 241 ++++++++++++++++++ .../selftests/bpf/progs/file_reader_fail.c | 57 +++++ 3 files changed, 379 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/file_reader.c create mode 100644 tools/testing/selftests/bpf/progs/file_reader.c create mode 100644 tools/testing/selftests/bpf/progs/file_reader_fail.c diff --git a/tools/testing/selftests/bpf/prog_tests/file_reader.c b/tools/testing/selftests/bpf/prog_tests/file_reader.c new file mode 100644 index 0000000000000..b314ad4326f18 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/file_reader.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include "file_reader.skel.h" +#include "file_reader_fail.skel.h" + +__thread int tls_counter; +const char *user_ptr = "hello world"; +char file_contents[256000]; + +enum file_reader_test { + VALIDATE_LARGE_FILE = 1, + SEARCH_ELF = 2, +}; + +static int initialize_file_contents(void) +{ + int fd; + ssize_t n; + int err = 0; + + fd = open("/proc/self/exe", O_RDONLY); + if (!ASSERT_GT(fd, 0, "Open /proc/self/exe\n")) + return 1; + + n = read(fd, file_contents, sizeof(file_contents)); + if (!ASSERT_EQ(n, sizeof(file_contents), "Read /proc/self/exe\n")) + err = 1; + + posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED); + close(fd); + return err; +} + +static void run_test(enum file_reader_test test_type) +{ + struct file_reader *skel; + int err; + char data[256]; + LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, .repeat = 1, + .data_size_in = sizeof(data)); + + skel = file_reader__open(); + if (!ASSERT_OK_PTR(skel, "file_reader__open")) + return; + + skel->bss->user_ptr = (void *)user_ptr; + skel->bss->user_buf = file_contents; + skel->rodata->user_buf_sz = sizeof(file_contents); + skel->rodata->test_type = test_type; + + err = file_reader__load(skel); + if (!ASSERT_OK(err, "file_reader__load")) + return; + + err = initialize_file_contents(); + if (!ASSERT_OK(err, "initialize file contents")) + goto cleanup; + + err = file_reader__attach(skel); + if (!ASSERT_OK(err, "file_reader__attach")) + goto cleanup; + + getpid(); + + ASSERT_EQ(skel->bss->err, 0, "err"); +cleanup: + file_reader__destroy(skel); +} + +void test_file_reader(void) +{ + if (test__start_subtest("test_large_file")) + run_test(VALIDATE_LARGE_FILE); + if (test__start_subtest("test_search_elf")) + run_test(SEARCH_ELF); + + RUN_TESTS(file_reader_fail); +} diff --git a/tools/testing/selftests/bpf/progs/file_reader.c b/tools/testing/selftests/bpf/progs/file_reader.c new file mode 100644 index 0000000000000..9dd9a68f3563a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/file_reader.c @@ -0,0 +1,241 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include "bpf_misc.h" + +#define ELFMAG "\177ELF" +#define SELFMAG 4 +#define ET_NONE 0 +#define ET_REL 1 +#define ET_EXEC 2 +#define ET_DYN 3 +#define ET_CORE 4 +#define ET_LOPROC 0xff00 +#define ET_HIPROC 0xffff +#define EI_CLASS 4 +#define ELFCLASS32 1 +#define ELFCLASS64 2 +#define STT_TLS 6 + +#define ELF_ST_BIND(x) ((x) >> 4) +#define ELF_ST_TYPE(x) ((x) & 0xf) +#define ELF32_ST_BIND(x) ELF_ST_BIND(x) +#define ELF32_ST_TYPE(x) ELF_ST_TYPE(x) +#define ELF64_ST_BIND(x) ELF_ST_BIND(x) +#define ELF64_ST_TYPE(x) ELF_ST_TYPE(x) + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} arrmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 10000000); +} ringbuf SEC(".maps"); + +struct elem { + struct file *file; + struct bpf_task_work tw; +}; + +enum file_reader_test { + VALIDATE_LARGE_FILE = 1, + SEARCH_ELF = 2, +}; + +int err; +void *user_ptr; +char buf[1024]; +char *user_buf; +volatile const __u32 user_buf_sz; +volatile const __s32 test_type = -1; + +static int process_vma(struct task_struct *task, struct vm_area_struct *vma, void *data); +static int search_elf(struct file *file); +static int validate_large_file_read(struct file *file); +static int task_work_callback(struct bpf_map *map, void *key, void *value); + +SEC("raw_tp/sys_enter") +int on_getpid(void *ctx) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct elem *work; + int key = 0; + + work = bpf_map_lookup_elem(&arrmap, &key); + if (!work) { + err = 1; + return 0; + } + bpf_task_work_schedule_signal(task, &work->tw, &arrmap, task_work_callback, NULL); + return 0; +} + +static int task_work_callback(struct bpf_map *map, void *key, void *value) +{ + struct task_struct *task = bpf_get_current_task_btf(); + + bpf_find_vma(task, (unsigned long)user_ptr, process_vma, NULL, 0); + return 0; +} + +static int process_vma(struct task_struct *task, struct vm_area_struct *vma, void *data) +{ + switch (test_type) { + case VALIDATE_LARGE_FILE: + err = validate_large_file_read(vma->vm_file); + break; + case SEARCH_ELF: + err = search_elf(vma->vm_file); + break; + default: + err = 1; + } + return err; +} + +static int validate_large_file_read(struct file *file) +{ + struct bpf_dynptr dynptr; + int err, i; + char *rbuf1 = NULL, *rbuf2 = NULL; + + if (!file) { + err = 1; + return 1; + } + + err = bpf_dynptr_from_file(file, 0, &dynptr); + if (err) + goto cleanup_file; + + rbuf1 = bpf_ringbuf_reserve(&ringbuf, user_buf_sz, 0); + if (!rbuf1) + goto cleanup_file; + + rbuf2 = bpf_ringbuf_reserve(&ringbuf, user_buf_sz, 0); + if (!rbuf2) + goto cleanup_all; + + bpf_dynptr_read(rbuf1, user_buf_sz, &dynptr, 0, 0); + bpf_copy_from_user(rbuf2, user_buf_sz, user_buf); + /* Verify file contents read from BPF is the same as the one read from userspace */ + bpf_for(i, 0, user_buf_sz) { + if (i >= 256000 || rbuf1[i] != rbuf2[i]) { + err = 1; + break; + } + } + +cleanup_all: + if (rbuf1) + bpf_ringbuf_discard(rbuf1, 0); + if (rbuf2) + bpf_ringbuf_discard(rbuf2, 0); +cleanup_file: + bpf_dynptr_file_discard(&dynptr); + return err ? 1 : 0; +} + +/* Finds thread local variable `tls_counter` in this executable's ELF */ +static int search_elf(struct file *file) +{ + Elf64_Ehdr ehdr; + Elf64_Shdr shdrs; + Elf64_Shdr symtab, strtab, tmp; + const Elf64_Sym *symbol; + int count, off, i, e_shnum, e_shoff, e_shentsize, sections = 0; + const char *string; + struct bpf_dynptr dynptr; + const __u32 slen = 11; + static const char *needle = "tls_counter"; + + if (!file) { + err = 1; + return 1; + } + + err = bpf_dynptr_from_file(file, 0, &dynptr); + if (err) + goto fail; + + err = bpf_dynptr_read(&ehdr, sizeof(ehdr), &dynptr, 0, 0); + if (err) + goto fail; + + if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0) + goto fail; + + if (ehdr.e_type != ET_EXEC && ehdr.e_type != ET_DYN) + goto fail; + + if (ehdr.e_ident[EI_CLASS] != ELFCLASS64) + goto fail; + + e_shnum = ehdr.e_shnum; + e_shoff = ehdr.e_shoff; + e_shentsize = ehdr.e_shentsize; + + err = bpf_dynptr_read(&shdrs, sizeof(shdrs), &dynptr, + e_shoff + e_shentsize * ehdr.e_shstrndx, 0); + if (err) + goto fail; + + off = shdrs.sh_offset; + + __builtin_memset(&symtab, 0, sizeof(symtab)); + __builtin_memset(&strtab, 0, sizeof(strtab)); + bpf_for(i, 0, e_shnum) + { + err = bpf_dynptr_read(&tmp, sizeof(Elf64_Shdr), &dynptr, e_shoff + e_shentsize * i, + 0); + if (err) + goto fail; + + string = bpf_dynptr_slice(&dynptr, off + tmp.sh_name, buf, slen); + if (!string) + goto fail; + + if (bpf_strncmp(string, slen, ".symtab") == 0) { + symtab = tmp; + ++sections; + } else if (bpf_strncmp(string, slen, ".strtab") == 0) { + strtab = tmp; + ++sections; + } + if (sections == 2) + break; + } + if (sections != 2) + goto fail; + + count = symtab.sh_size / sizeof(Elf64_Sym); + bpf_for(i, 0, count) + { + symbol = bpf_dynptr_slice(&dynptr, symtab.sh_offset + sizeof(Elf64_Sym) * i, buf, + sizeof(Elf64_Sym)); + if (!symbol) + goto fail; + if (symbol->st_name == 0 || ELF64_ST_TYPE(symbol->st_info) != STT_TLS) + continue; + string = bpf_dynptr_slice(&dynptr, strtab.sh_offset + symbol->st_name, buf, slen); + if (!string) + goto fail; + if (bpf_strncmp(string, slen, needle) == 0) + goto success; + } +fail: + err = 1; +success: + bpf_dynptr_file_discard(&dynptr); + return err ? 1 : 0; +} diff --git a/tools/testing/selftests/bpf/progs/file_reader_fail.c b/tools/testing/selftests/bpf/progs/file_reader_fail.c new file mode 100644 index 0000000000000..449c4f9a1c745 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/file_reader_fail.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +int err; +void *user_ptr; + +char buf[256]; + +static long process_vma_unreleased_ref(struct task_struct *task, struct vm_area_struct *vma, + void *data) +{ + struct bpf_dynptr dynptr; + + if (!vma->vm_file) + return 1; + + err = bpf_dynptr_from_file(vma->vm_file, 0, &dynptr); + return err ? 1 : 0; +} + +SEC("fentry.s/" SYS_PREFIX "sys_nanosleep") +__failure __msg("Unreleased reference id=") int on_nanosleep_unreleased_ref(void *ctx) +{ + struct task_struct *task = bpf_get_current_task_btf(); + + bpf_find_vma(task, (unsigned long)user_ptr, process_vma_unreleased_ref, NULL, 0); + return 0; +} + +SEC("xdp") +__failure __msg("Expected a dynptr of type file as arg #0") +int xdp_wrong_dynptr_type(struct xdp_md *xdp) +{ + struct bpf_dynptr dynptr; + + bpf_dynptr_from_xdp(xdp, 0, &dynptr); + bpf_dynptr_file_discard(&dynptr); + return 0; +} + +SEC("xdp") +__failure __msg("Expected an initialized dynptr as arg #0") +int xdp_no_dynptr_type(struct xdp_md *xdp) +{ + struct bpf_dynptr dynptr; + + bpf_dynptr_file_discard(&dynptr); + return 0; +}