diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ea2ed6771cc60..9da7460e078cd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -663,6 +663,9 @@ int map_check_no_btf(const struct bpf_map *map, bool bpf_map_meta_equal(const struct bpf_map *meta0, const struct bpf_map *meta1); +int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags, + struct bpf_dynptr *ptr__uninit); + extern const struct bpf_map_ops bpf_map_offload_ops; /* bpf_type_flag contains a set of flags that are applicable to the values of @@ -785,12 +788,15 @@ enum bpf_type_flag { /* DYNPTR points to skb_metadata_end()-skb_metadata_len() */ DYNPTR_TYPE_SKB_META = BIT(19 + BPF_BASE_TYPE_BITS), + /* DYNPTR points to file */ + DYNPTR_TYPE_FILE = BIT(20 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; #define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \ - | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META) + | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META | DYNPTR_TYPE_FILE) /* Max number of base types. */ #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) @@ -1378,21 +1384,23 @@ enum bpf_dynptr_type { BPF_DYNPTR_TYPE_XDP, /* Points to skb_metadata_end()-skb_metadata_len() */ BPF_DYNPTR_TYPE_SKB_META, + /* Underlying data is a file */ + BPF_DYNPTR_TYPE_FILE, }; -int bpf_dynptr_check_size(u32 size); -u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); -const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len); -void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len); +int bpf_dynptr_check_size(u64 size); +u64 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); +const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u64 len); +void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u64 len); bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr); -int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, - void *src, u32 len, u64 flags); -void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset, - void *buffer__opt, u32 buffer__szk); +int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u64 offset, + void *src, u64 len, u64 flags); +void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset, + void *buffer__opt, u64 buffer__szk); -static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len) +static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u64 offset, u64 len) { - u32 size = __bpf_dynptr_size(ptr); + u64 size = __bpf_dynptr_size(ptr); if (len > size || offset > size - len) return -E2BIG; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 4c497e839526a..6078d5e9b5356 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -581,6 +581,8 @@ struct bpf_insn_aux_data { u32 scc; /* registers alive before this instruction. */ u16 live_regs_before; + /* kfunc is called in sleepable context */ + bool kfunc_in_sleepable_ctx; }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ diff --git a/include/linux/freader.h b/include/linux/freader.h new file mode 100644 index 0000000000000..a08ea9f59945a --- /dev/null +++ b/include/linux/freader.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_FREADER_H +#define _LINUX_FREADER_H + +#include + +struct freader { + void *buf; + u32 buf_sz; + int err; + union { + struct { + struct file *file; + struct folio *folio; + void *addr; + loff_t folio_off; + bool may_fault; + }; + struct { + const char *data; + u64 data_sz; + }; + }; +}; + +void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz, + struct file *file, bool may_fault); +void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz); +const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz); +void freader_cleanup(struct freader *r); +int freader_err(struct freader *r); +#endif diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index c9fab9a356dfc..f452e22333fef 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "../../lib/kstrtox.h" @@ -1657,6 +1658,13 @@ static const struct bpf_func_proto bpf_kptr_xchg_proto = { .arg2_btf_id = BPF_PTR_POISON, }; +struct bpf_dynptr_file_impl { + struct freader freader; + /* 64 bit offset and size overriding 32 bit ones in bpf_dynptr_kern */ + u64 offset; + u64 size; +}; + /* Since the upper 8 bits of dynptr->size is reserved, the * maximum supported size is 2^24 - 1. */ @@ -1685,23 +1693,65 @@ static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *pt return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT; } -u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr) +u64 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr) { + if (bpf_dynptr_get_type(ptr) == BPF_DYNPTR_TYPE_FILE) { + struct bpf_dynptr_file_impl *df = ptr->data; + + return df->size; + } + return ptr->size & DYNPTR_SIZE_MASK; } -static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u32 new_size) +static void bpf_dynptr_advance_offset(struct bpf_dynptr_kern *ptr, u64 off) +{ + if (bpf_dynptr_get_type(ptr) == BPF_DYNPTR_TYPE_FILE) { + struct bpf_dynptr_file_impl *df = ptr->data; + + df->offset += off; + return; + } + ptr->offset += off; +} + +static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u64 new_size) { u32 metadata = ptr->size & ~DYNPTR_SIZE_MASK; - ptr->size = new_size | metadata; + if (bpf_dynptr_get_type(ptr) == BPF_DYNPTR_TYPE_FILE) { + struct bpf_dynptr_file_impl *df = ptr->data; + + df->size = new_size; + return; + } + ptr->size = (u32)new_size | metadata; } -int bpf_dynptr_check_size(u32 size) +int bpf_dynptr_check_size(u64 size) { return size > DYNPTR_MAX_SIZE ? -E2BIG : 0; } +static int bpf_file_fetch_bytes(struct bpf_dynptr_file_impl *df, u64 offset, void *buf, u64 len) +{ + const void *ptr; + + if (!buf || len == 0) + return -EINVAL; + + df->freader.buf = buf; + df->freader.buf_sz = len; + ptr = freader_fetch(&df->freader, offset + df->offset, len); + if (!ptr) + return df->freader.err; + + if (ptr != buf) /* Force copying into the buffer */ + memcpy(buf, ptr, len); + + return 0; +} + void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, enum bpf_dynptr_type type, u32 offset, u32 size) { @@ -1751,8 +1801,8 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = { .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT | MEM_WRITE, }; -static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *src, - u32 offset, u64 flags) +static int __bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr_kern *src, + u64 offset, u64 flags) { enum bpf_dynptr_type type; int err; @@ -1782,14 +1832,16 @@ static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *s case BPF_DYNPTR_TYPE_SKB_META: memmove(dst, bpf_skb_meta_pointer(src->data, src->offset + offset), len); return 0; + case BPF_DYNPTR_TYPE_FILE: + return bpf_file_fetch_bytes(src->data, offset, dst, len); default: WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type); return -EFAULT; } } -BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src, - u32, offset, u64, flags) +BPF_CALL_5(bpf_dynptr_read, void *, dst, u64, len, const struct bpf_dynptr_kern *, src, + u64, offset, u64, flags) { return __bpf_dynptr_read(dst, len, src, offset, flags); } @@ -1805,8 +1857,8 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = { .arg5_type = ARG_ANYTHING, }; -int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src, - u32 len, u64 flags) +int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u64 offset, void *src, + u64 len, u64 flags) { enum bpf_dynptr_type type; int err; @@ -2177,6 +2229,35 @@ void bpf_rb_root_free(const struct btf_field *field, void *rb_root, } } +enum bpf_is_sleepable { + MAY_SLEEP, + MAY_NOT_SLEEP, +}; + +static int make_file_dynptr(struct file *file, u32 flags, enum bpf_is_sleepable sleepable, + struct bpf_dynptr_kern *ptr) +{ + struct bpf_dynptr_file_impl *state; + + /* flags is currently unsupported */ + if (flags) { + bpf_dynptr_set_null(ptr); + return -EINVAL; + } + + state = bpf_mem_alloc(&bpf_global_ma, sizeof(struct bpf_dynptr_file_impl)); + if (!state) { + bpf_dynptr_set_null(ptr); + return -ENOMEM; + } + state->offset = 0; + state->size = U64_MAX; /* Don't restrict size, as file may change anyways */ + freader_init_from_file(&state->freader, NULL, 0, file, sleepable == MAY_SLEEP); + bpf_dynptr_init(ptr, state, BPF_DYNPTR_TYPE_FILE, 0, 0); + bpf_dynptr_set_rdonly(ptr); + return 0; +} + __bpf_kfunc_start_defs(); __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign) @@ -2681,12 +2762,12 @@ __bpf_kfunc struct task_struct *bpf_task_from_vpid(s32 vpid) * provided buffer, with its contents containing the data, if unable to obtain * direct pointer) */ -__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset, - void *buffer__opt, u32 buffer__szk) +__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u64 offset, + void *buffer__opt, u64 buffer__szk) { const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; enum bpf_dynptr_type type; - u32 len = buffer__szk; + u64 len = buffer__szk; int err; if (!ptr->data) @@ -2720,6 +2801,9 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset, } case BPF_DYNPTR_TYPE_SKB_META: return bpf_skb_meta_pointer(ptr->data, ptr->offset + offset); + case BPF_DYNPTR_TYPE_FILE: + err = bpf_file_fetch_bytes(ptr->data, offset, buffer__opt, buffer__szk); + return err ? NULL : buffer__opt; default: WARN_ONCE(true, "unknown dynptr type %d\n", type); return NULL; @@ -2768,8 +2852,8 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset, * provided buffer, with its contents containing the data, if unable to obtain * direct pointer) */ -__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset, - void *buffer__opt, u32 buffer__szk) +__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset, + void *buffer__opt, u64 buffer__szk) { const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; @@ -2801,10 +2885,10 @@ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset, return bpf_dynptr_slice(p, offset, buffer__opt, buffer__szk); } -__bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u32 start, u32 end) +__bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u64 start, u64 end) { struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; - u32 size; + u64 size; if (!ptr->data || start > end) return -EINVAL; @@ -2814,7 +2898,7 @@ __bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u32 start, u32 end if (start > size || end > size) return -ERANGE; - ptr->offset += start; + bpf_dynptr_advance_offset(ptr, start); bpf_dynptr_set_size(ptr, end - start); return 0; @@ -2837,7 +2921,7 @@ __bpf_kfunc bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *p) return __bpf_dynptr_is_rdonly(ptr); } -__bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr *p) +__bpf_kfunc u64 bpf_dynptr_size(const struct bpf_dynptr *p) { struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; @@ -2874,14 +2958,14 @@ __bpf_kfunc int bpf_dynptr_clone(const struct bpf_dynptr *p, * Copies data from source dynptr to destination dynptr. * Returns 0 on success; negative error, otherwise. */ -__bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off, - struct bpf_dynptr *src_ptr, u32 src_off, u32 size) +__bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u64 dst_off, + struct bpf_dynptr *src_ptr, u64 src_off, u64 size) { struct bpf_dynptr_kern *dst = (struct bpf_dynptr_kern *)dst_ptr; struct bpf_dynptr_kern *src = (struct bpf_dynptr_kern *)src_ptr; void *src_slice, *dst_slice; char buf[256]; - u32 off; + u64 off; src_slice = bpf_dynptr_slice(src_ptr, src_off, NULL, size); dst_slice = bpf_dynptr_slice_rdwr(dst_ptr, dst_off, NULL, size); @@ -2903,7 +2987,7 @@ __bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off, off = 0; while (off < size) { - u32 chunk_sz = min_t(u32, sizeof(buf), size - off); + u64 chunk_sz = min_t(u64, sizeof(buf), size - off); int err; err = __bpf_dynptr_read(buf, chunk_sz, src, src_off + off, 0); @@ -2929,10 +3013,10 @@ __bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off, * at @offset with the constant byte @val. * Returns 0 on success; negative error, otherwise. */ - __bpf_kfunc int bpf_dynptr_memset(struct bpf_dynptr *p, u32 offset, u32 size, u8 val) - { +__bpf_kfunc int bpf_dynptr_memset(struct bpf_dynptr *p, u64 offset, u64 size, u8 val) +{ struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; - u32 chunk_sz, write_off; + u64 chunk_sz, write_off; char buf[256]; void* slice; int err; @@ -2951,11 +3035,11 @@ __bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off, return err; /* Non-linear data under the dynptr, write from a local buffer */ - chunk_sz = min_t(u32, sizeof(buf), size); + chunk_sz = min_t(u64, sizeof(buf), size); memset(buf, val, chunk_sz); for (write_off = 0; write_off < size; write_off += chunk_sz) { - chunk_sz = min_t(u32, sizeof(buf), size - write_off); + chunk_sz = min_t(u64, sizeof(buf), size - write_off); err = __bpf_dynptr_write(ptr, offset + write_off, buf, chunk_sz, 0); if (err) return err; @@ -4199,6 +4283,30 @@ __bpf_kfunc int bpf_task_work_schedule_resume(struct task_struct *task, struct b return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_RESUME); } +__bpf_kfunc int bpf_dynptr_from_file(struct file *file, u32 flags, struct bpf_dynptr *ptr__uninit) +{ + return make_file_dynptr(file, flags, MAY_NOT_SLEEP, (struct bpf_dynptr_kern *)ptr__uninit); +} + +int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags, struct bpf_dynptr *ptr__uninit) +{ + return make_file_dynptr(file, flags, MAY_SLEEP, (struct bpf_dynptr_kern *)ptr__uninit); +} + +__bpf_kfunc int bpf_dynptr_file_discard(struct bpf_dynptr *dynptr) +{ + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)dynptr; + struct bpf_dynptr_file_impl *df; + + if (bpf_dynptr_get_type(ptr) == BPF_DYNPTR_TYPE_INVALID) + return 0; + + df = ptr->data; + freader_cleanup(&df->freader); + bpf_mem_free(&bpf_global_ma, df); + return 0; +} + __bpf_kfunc_end_defs(); static void bpf_task_work_cancel_scheduled(struct irq_work *irq_work) @@ -4374,6 +4482,8 @@ BTF_ID_FLAGS(func, bpf_cgroup_read_xattr, KF_RCU) BTF_ID_FLAGS(func, bpf_stream_vprintk, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_task_work_schedule_signal, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_task_work_schedule_resume, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_dynptr_from_file) +BTF_ID_FLAGS(func, bpf_dynptr_file_discard) BTF_KFUNCS_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { @@ -4414,7 +4524,7 @@ late_initcall(kfunc_init); /* Get a pointer to dynptr data up to len bytes for read only access. If * the dynptr doesn't have continuous data up to len bytes, return NULL. */ -const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len) +const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u64 len) { const struct bpf_dynptr *p = (struct bpf_dynptr *)ptr; @@ -4425,7 +4535,7 @@ const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len) * the dynptr doesn't have continuous data up to len bytes, or the dynptr * is read only, return NULL. */ -void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len) +void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u64 len) { if (__bpf_dynptr_is_rdonly(ptr)) return NULL; diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index f50533169cc34..70221aafc35c0 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -500,6 +500,8 @@ const char *dynptr_type_str(enum bpf_dynptr_type type) return "xdp"; case BPF_DYNPTR_TYPE_SKB_META: return "skb_meta"; + case BPF_DYNPTR_TYPE_FILE: + return "file"; case BPF_DYNPTR_TYPE_INVALID: return ""; default: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e892df386eed7..82762eab3f170 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -209,8 +209,6 @@ static void invalidate_non_owning_refs(struct bpf_verifier_env *env); static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env); static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg); -static void specialize_kfunc(struct bpf_verifier_env *env, - u32 func_id, u16 offset, unsigned long *addr); static bool is_trusted_reg(const struct bpf_reg_state *reg); static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) @@ -676,6 +674,8 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type) return BPF_DYNPTR_TYPE_XDP; case DYNPTR_TYPE_SKB_META: return BPF_DYNPTR_TYPE_SKB_META; + case DYNPTR_TYPE_FILE: + return BPF_DYNPTR_TYPE_FILE; default: return BPF_DYNPTR_TYPE_INVALID; } @@ -694,6 +694,8 @@ static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type) return DYNPTR_TYPE_XDP; case BPF_DYNPTR_TYPE_SKB_META: return DYNPTR_TYPE_SKB_META; + case BPF_DYNPTR_TYPE_FILE: + return DYNPTR_TYPE_FILE; default: return 0; } @@ -701,7 +703,7 @@ static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type) static bool dynptr_type_refcounted(enum bpf_dynptr_type type) { - return type == BPF_DYNPTR_TYPE_RINGBUF; + return type == BPF_DYNPTR_TYPE_RINGBUF || type == BPF_DYNPTR_TYPE_FILE; } static void __mark_dynptr_reg(struct bpf_reg_state *reg, @@ -812,6 +814,10 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re struct bpf_func_state *state = func(env, reg); int spi, ref_obj_id, i; + if (reg->type == CONST_PTR_TO_DYNPTR) { + verifier_bug(env, "CONST_PTR_TO_DYNPTR cannot be released"); + return -EFAULT; + } spi = dynptr_get_spi(env, reg); if (spi < 0) return spi; @@ -3097,6 +3103,11 @@ struct bpf_kfunc_btf_tab { u32 nr_descs; }; +static unsigned long kfunc_call_imm(unsigned long func_addr, u32 func_id); + +static void specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, + int insn_idx); + static int kfunc_desc_cmp_by_id_off(const void *a, const void *b) { const struct bpf_kfunc_desc *d0 = a; @@ -3114,7 +3125,7 @@ static int kfunc_btf_cmp_by_off(const void *a, const void *b) return d0->offset - d1->offset; } -static const struct bpf_kfunc_desc * +static struct bpf_kfunc_desc * find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset) { struct bpf_kfunc_desc desc = { @@ -3237,6 +3248,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) { const struct btf_type *func, *func_proto; struct bpf_kfunc_btf_tab *btf_tab; + struct btf_func_model func_model; struct bpf_kfunc_desc_tab *tab; struct bpf_prog_aux *prog_aux; struct bpf_kfunc_desc *desc; @@ -3326,19 +3338,6 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) func_name); return -EINVAL; } - specialize_kfunc(env, func_id, offset, &addr); - - if (bpf_jit_supports_far_kfunc_call()) { - call_imm = func_id; - } else { - call_imm = BPF_CALL_IMM(addr); - /* Check whether the relative offset overflows desc->imm */ - if ((unsigned long)(s32)call_imm != call_imm) { - verbose(env, "address of kernel function %s is out of range\n", - func_name); - return -EINVAL; - } - } if (bpf_dev_bound_kfunc_id(func_id)) { err = bpf_dev_bound_kfunc_check(&env->log, prog_aux); @@ -3346,18 +3345,29 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) return err; } + err = btf_distill_func_proto(&env->log, desc_btf, + func_proto, func_name, + &func_model); + if (err) + return err; + + call_imm = kfunc_call_imm(addr, func_id); + /* Check whether the relative offset overflows desc->imm */ + if ((unsigned long)(s32)call_imm != call_imm) { + verbose(env, "address of kernel function %s is out of range\n", + func_name); + return -EINVAL; + } + desc = &tab->descs[tab->nr_descs++]; desc->func_id = func_id; - desc->imm = call_imm; desc->offset = offset; desc->addr = addr; - err = btf_distill_func_proto(&env->log, desc_btf, - func_proto, func_name, - &desc->func_model); - if (!err) - sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), - kfunc_desc_cmp_by_id_off, NULL); - return err; + desc->imm = call_imm; + desc->func_model = func_model; + sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), + kfunc_desc_cmp_by_id_off, NULL); + return 0; } static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b) @@ -11487,10 +11497,6 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn * is safe to do directly. */ if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) { - if (regs[meta.release_regno].type == CONST_PTR_TO_DYNPTR) { - verifier_bug(env, "CONST_PTR_TO_DYNPTR cannot be released"); - return -EFAULT; - } err = unmark_stack_slots_dynptr(env, ®s[meta.release_regno]); } else if (func_id == BPF_FUNC_kptr_xchg && meta.ref_obj_id) { u32 ref_obj_id = meta.ref_obj_id; @@ -12258,6 +12264,8 @@ enum special_kfunc_type { KF_bpf_res_spin_unlock, KF_bpf_res_spin_lock_irqsave, KF_bpf_res_spin_unlock_irqrestore, + KF_bpf_dynptr_from_file, + KF_bpf_dynptr_file_discard, KF___bpf_trap, KF_bpf_task_work_schedule_signal, KF_bpf_task_work_schedule_resume, @@ -12330,6 +12338,8 @@ BTF_ID(func, bpf_res_spin_lock) BTF_ID(func, bpf_res_spin_unlock) BTF_ID(func, bpf_res_spin_lock_irqsave) BTF_ID(func, bpf_res_spin_unlock_irqrestore) +BTF_ID(func, bpf_dynptr_from_file) +BTF_ID(func, bpf_dynptr_file_discard) BTF_ID(func, __bpf_trap) BTF_ID(func, bpf_task_work_schedule_signal) BTF_ID(func, bpf_task_work_schedule_resume) @@ -13293,6 +13303,11 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ dynptr_arg_type |= DYNPTR_TYPE_XDP; } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb_meta]) { dynptr_arg_type |= DYNPTR_TYPE_SKB_META; + } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) { + dynptr_arg_type |= DYNPTR_TYPE_FILE; + } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_file_discard]) { + dynptr_arg_type |= DYNPTR_TYPE_FILE; + meta->release_regno = regno; } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] && (dynptr_arg_type & MEM_UNINIT)) { enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type; @@ -13819,6 +13834,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, insn_aux = &env->insn_aux_data[insn_idx]; insn_aux->is_iter_next = is_iter_next_kfunc(&meta); + insn_aux->kfunc_in_sleepable_ctx = in_sleepable(env); if (!insn->off && (insn->imm == special_kfunc_list[KF_bpf_res_spin_lock] || @@ -13969,7 +13985,12 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now. */ if (meta.release_regno) { - err = release_reference(env, regs[meta.release_regno].ref_obj_id); + struct bpf_reg_state *reg = ®s[meta.release_regno]; + + if (meta.initialized_dynptr.ref_obj_id) + err = unmark_stack_slots_dynptr(env, reg); + else + err = release_reference(env, reg->ref_obj_id); if (err) { verbose(env, "kfunc %s#%d reference has not been acquired before\n", func_name, meta.func_id); @@ -21804,21 +21825,33 @@ static int fixup_call_args(struct bpf_verifier_env *env) return err; } +static unsigned long kfunc_call_imm(unsigned long func_addr, u32 func_id) +{ + if (bpf_jit_supports_far_kfunc_call()) + return func_id; + + return BPF_CALL_IMM(func_addr); +} + /* replace a generic kfunc with a specialized version if necessary */ -static void specialize_kfunc(struct bpf_verifier_env *env, - u32 func_id, u16 offset, unsigned long *addr) +static void specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, + int insn_idx) { + struct bpf_prog_aux *prog_aux = env->prog->aux; + struct bpf_kfunc_desc_tab *tab = prog_aux->kfunc_tab; struct bpf_prog *prog = env->prog; bool seen_direct_write; void *xdp_kfunc; bool is_rdonly; + u32 func_id = desc->func_id; + u16 offset = desc->offset; + unsigned long call_imm; + unsigned long addr = 0; if (bpf_dev_bound_kfunc_id(func_id)) { xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id); - if (xdp_kfunc) { - *addr = (unsigned long)xdp_kfunc; - return; - } + if (xdp_kfunc) + addr = (unsigned long)xdp_kfunc; /* fallback to default kfunc when not supported by netdev */ } @@ -21830,21 +21863,31 @@ static void specialize_kfunc(struct bpf_verifier_env *env, is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE); if (is_rdonly) - *addr = (unsigned long)bpf_dynptr_from_skb_rdonly; + addr = (unsigned long)bpf_dynptr_from_skb_rdonly; /* restore env->seen_direct_write to its original value, since * may_access_direct_pkt_data mutates it */ env->seen_direct_write = seen_direct_write; - } - - if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr] && - bpf_lsm_has_d_inode_locked(prog)) - *addr = (unsigned long)bpf_set_dentry_xattr_locked; + } else if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr]) { + if (bpf_lsm_has_d_inode_locked(prog)) + addr = (unsigned long)bpf_set_dentry_xattr_locked; + } else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr]) { + if (bpf_lsm_has_d_inode_locked(prog)) + addr = (unsigned long)bpf_remove_dentry_xattr_locked; + } else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) { + if (env->insn_aux_data[insn_idx].kfunc_in_sleepable_ctx) + addr = (unsigned long)bpf_dynptr_from_file_sleepable; + } + + if (!addr) /* Nothing to patch with */ + return; - if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr] && - bpf_lsm_has_d_inode_locked(prog)) - *addr = (unsigned long)bpf_remove_dentry_xattr_locked; + call_imm = kfunc_call_imm(addr, func_id); + desc->imm = call_imm; + desc->addr = addr; + sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), + kfunc_desc_cmp_by_id_off, NULL); } static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux, @@ -21867,7 +21910,7 @@ static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux, static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_insn *insn_buf, int insn_idx, int *cnt) { - const struct bpf_kfunc_desc *desc; + struct bpf_kfunc_desc *desc; if (!insn->imm) { verbose(env, "invalid kernel function call not eliminated in verifier pass\n"); @@ -21887,6 +21930,8 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EFAULT; } + specialize_kfunc(env, desc, insn_idx); + if (!bpf_jit_supports_far_kfunc_call()) insn->imm = BPF_CALL_IMM(desc->addr); if (insn->off) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 8f23f5273babf..7cc4f2e05ed2c 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -3372,13 +3372,13 @@ typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struc * direct calls into all the specific callback implementations * (copy_user_data_sleepable, copy_user_data_nofault, and so on) */ -static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 doff, u32 size, +static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u64 doff, u64 size, const void *unsafe_src, copy_fn_t str_copy_fn, struct task_struct *tsk) { struct bpf_dynptr_kern *dst; - u32 chunk_sz, off; + u64 chunk_sz, off; void *dst_slice; int cnt, err; char buf[256]; @@ -3392,7 +3392,7 @@ static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 do return -E2BIG; for (off = 0; off < size; off += chunk_sz - 1) { - chunk_sz = min_t(u32, sizeof(buf), size - off); + chunk_sz = min_t(u64, sizeof(buf), size - off); /* Expect str_copy_fn to return count of copied bytes, including * zero terminator. Next iteration increment off by chunk_sz - 1 to * overwrite NUL. @@ -3409,14 +3409,14 @@ static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 do return off; } -static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u32 doff, - u32 size, const void *unsafe_src, +static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u64 doff, + u64 size, const void *unsafe_src, copy_fn_t copy_fn, struct task_struct *tsk) { struct bpf_dynptr_kern *dst; void *dst_slice; char buf[256]; - u32 off, chunk_sz; + u64 off, chunk_sz; int err; dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size); @@ -3428,7 +3428,7 @@ static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u32 return -E2BIG; for (off = 0; off < size; off += chunk_sz) { - chunk_sz = min_t(u32, sizeof(buf), size - off); + chunk_sz = min_t(u64, sizeof(buf), size - off); err = copy_fn(buf, unsafe_src + off, chunk_sz, tsk); if (err) return err; @@ -3514,58 +3514,58 @@ __bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid return bpf_send_signal_common(sig, type, task, value); } -__bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u32 off, - u32 size, const void __user *unsafe_ptr__ign) +__bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void __user *unsafe_ptr__ign) { return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign, copy_user_data_nofault, NULL); } -__bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u32 off, - u32 size, const void *unsafe_ptr__ign) +__bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void *unsafe_ptr__ign) { return __bpf_dynptr_copy(dptr, off, size, unsafe_ptr__ign, copy_kernel_data_nofault, NULL); } -__bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u32 off, - u32 size, const void __user *unsafe_ptr__ign) +__bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void __user *unsafe_ptr__ign) { return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign, copy_user_str_nofault, NULL); } -__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u32 off, - u32 size, const void *unsafe_ptr__ign) +__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void *unsafe_ptr__ign) { return __bpf_dynptr_copy_str(dptr, off, size, unsafe_ptr__ign, copy_kernel_str_nofault, NULL); } -__bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u32 off, - u32 size, const void __user *unsafe_ptr__ign) +__bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void __user *unsafe_ptr__ign) { return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign, copy_user_data_sleepable, NULL); } -__bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u32 off, - u32 size, const void __user *unsafe_ptr__ign) +__bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void __user *unsafe_ptr__ign) { return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign, copy_user_str_sleepable, NULL); } -__bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u32 off, - u32 size, const void __user *unsafe_ptr__ign, +__bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void __user *unsafe_ptr__ign, struct task_struct *tsk) { return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign, copy_user_data_sleepable, tsk); } -__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u32 off, - u32 size, const void __user *unsafe_ptr__ign, +__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u64 off, + u64 size, const void __user *unsafe_ptr__ign, struct task_struct *tsk) { return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign, diff --git a/lib/Makefile b/lib/Makefile index 392ff808c9b90..4761885228fae 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -40,7 +40,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ nmi_backtrace.o win_minmax.o memcat_p.o \ - buildid.o objpool.o iomem_copy.o sys_info.o + buildid.o objpool.o iomem_copy.o sys_info.o freader.o lib-$(CONFIG_UNION_FIND) += union_find.o lib-$(CONFIG_PRINTK) += dump_stack.o diff --git a/lib/buildid.c b/lib/buildid.c index c4b0f376fb341..e770dc4b73d33 100644 --- a/lib/buildid.c +++ b/lib/buildid.c @@ -6,155 +6,12 @@ #include #include #include +#include #define BUILD_ID 3 #define MAX_PHDR_CNT 256 -struct freader { - void *buf; - u32 buf_sz; - int err; - union { - struct { - struct file *file; - struct folio *folio; - void *addr; - loff_t folio_off; - bool may_fault; - }; - struct { - const char *data; - u64 data_sz; - }; - }; -}; - -static void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz, - struct file *file, bool may_fault) -{ - memset(r, 0, sizeof(*r)); - r->buf = buf; - r->buf_sz = buf_sz; - r->file = file; - r->may_fault = may_fault; -} - -static void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz) -{ - memset(r, 0, sizeof(*r)); - r->data = data; - r->data_sz = data_sz; -} - -static void freader_put_folio(struct freader *r) -{ - if (!r->folio) - return; - kunmap_local(r->addr); - folio_put(r->folio); - r->folio = NULL; -} - -static int freader_get_folio(struct freader *r, loff_t file_off) -{ - /* check if we can just reuse current folio */ - if (r->folio && file_off >= r->folio_off && - file_off < r->folio_off + folio_size(r->folio)) - return 0; - - freader_put_folio(r); - - /* reject secretmem folios created with memfd_secret() */ - if (secretmem_mapping(r->file->f_mapping)) - return -EFAULT; - - r->folio = filemap_get_folio(r->file->f_mapping, file_off >> PAGE_SHIFT); - - /* if sleeping is allowed, wait for the page, if necessary */ - if (r->may_fault && (IS_ERR(r->folio) || !folio_test_uptodate(r->folio))) { - filemap_invalidate_lock_shared(r->file->f_mapping); - r->folio = read_cache_folio(r->file->f_mapping, file_off >> PAGE_SHIFT, - NULL, r->file); - filemap_invalidate_unlock_shared(r->file->f_mapping); - } - - if (IS_ERR(r->folio) || !folio_test_uptodate(r->folio)) { - if (!IS_ERR(r->folio)) - folio_put(r->folio); - r->folio = NULL; - return -EFAULT; - } - - r->folio_off = folio_pos(r->folio); - r->addr = kmap_local_folio(r->folio, 0); - - return 0; -} - -static const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz) -{ - size_t folio_sz; - - /* provided internal temporary buffer should be sized correctly */ - if (WARN_ON(r->buf && sz > r->buf_sz)) { - r->err = -E2BIG; - return NULL; - } - - if (unlikely(file_off + sz < file_off)) { - r->err = -EOVERFLOW; - return NULL; - } - - /* working with memory buffer is much more straightforward */ - if (!r->buf) { - if (file_off + sz > r->data_sz) { - r->err = -ERANGE; - return NULL; - } - return r->data + file_off; - } - - /* fetch or reuse folio for given file offset */ - r->err = freader_get_folio(r, file_off); - if (r->err) - return NULL; - - /* if requested data is crossing folio boundaries, we have to copy - * everything into our local buffer to keep a simple linear memory - * access interface - */ - folio_sz = folio_size(r->folio); - if (file_off + sz > r->folio_off + folio_sz) { - int part_sz = r->folio_off + folio_sz - file_off; - - /* copy the part that resides in the current folio */ - memcpy(r->buf, r->addr + (file_off - r->folio_off), part_sz); - - /* fetch next folio */ - r->err = freader_get_folio(r, r->folio_off + folio_sz); - if (r->err) - return NULL; - - /* copy the rest of requested data */ - memcpy(r->buf + part_sz, r->addr, sz - part_sz); - - return r->buf; - } - - /* if data fits in a single folio, just return direct pointer */ - return r->addr + (file_off - r->folio_off); -} - -static void freader_cleanup(struct freader *r) -{ - if (!r->buf) - return; /* non-file-backed mode */ - - freader_put_folio(r); -} - /* * Parse build id from the note segment. This logic can be shared between * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are diff --git a/lib/freader.c b/lib/freader.c new file mode 100644 index 0000000000000..f73b594a137d9 --- /dev/null +++ b/lib/freader.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include + +void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz, + struct file *file, bool may_fault) +{ + memset(r, 0, sizeof(*r)); + r->buf = buf; + r->buf_sz = buf_sz; + r->file = file; + r->may_fault = may_fault; +} + +void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz) +{ + memset(r, 0, sizeof(*r)); + r->data = data; + r->data_sz = data_sz; +} + +static void freader_put_folio(struct freader *r) +{ + if (!r->folio) + return; + kunmap_local(r->addr); + folio_put(r->folio); + r->folio = NULL; +} + +static int freader_get_folio(struct freader *r, loff_t file_off) +{ + /* check if we can just reuse current folio */ + if (r->folio && file_off >= r->folio_off && + file_off < r->folio_off + folio_size(r->folio)) + return 0; + + freader_put_folio(r); + + /* reject secretmem folios created with memfd_secret() */ + if (secretmem_mapping(r->file->f_mapping)) + return -EFAULT; + + r->folio = filemap_get_folio(r->file->f_mapping, file_off >> PAGE_SHIFT); + + /* if sleeping is allowed, wait for the page, if necessary */ + if (r->may_fault && (IS_ERR(r->folio) || !folio_test_uptodate(r->folio))) { + filemap_invalidate_lock_shared(r->file->f_mapping); + r->folio = read_cache_folio(r->file->f_mapping, file_off >> PAGE_SHIFT, + NULL, r->file); + filemap_invalidate_unlock_shared(r->file->f_mapping); + } + + if (IS_ERR(r->folio) || !folio_test_uptodate(r->folio)) { + if (!IS_ERR(r->folio)) + folio_put(r->folio); + r->folio = NULL; + return -EFAULT; + } + + r->folio_off = folio_pos(r->folio); + r->addr = kmap_local_folio(r->folio, 0); + + return 0; +} + +const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz) +{ + size_t folio_sz; + + /* provided internal temporary buffer should be sized correctly */ + if (WARN_ON(r->buf && sz > r->buf_sz)) { + r->err = -E2BIG; + return NULL; + } + + if (unlikely(file_off + sz < file_off)) { + r->err = -EOVERFLOW; + return NULL; + } + + /* working with memory buffer is much more straightforward */ + if (!r->buf) { + if (file_off + sz > r->data_sz) { + r->err = -ERANGE; + return NULL; + } + return r->data + file_off; + } + + /* fetch or reuse folio for given file offset */ + r->err = freader_get_folio(r, file_off); + if (r->err) + return NULL; + + /* if requested data is crossing folio boundaries, we have to copy + * everything into our local buffer to keep a simple linear memory + * access interface + */ + folio_sz = folio_size(r->folio); + if (file_off + sz > r->folio_off + folio_sz) { + int part_sz = r->folio_off + folio_sz - file_off; + size_t dst_off = 0, src_off = file_off - r->folio_off; + + do { + memcpy(r->buf + dst_off, r->addr + src_off, part_sz); + sz -= part_sz; + if (sz == 0) + break; + /* fetch next folio */ + r->err = freader_get_folio(r, r->folio_off + folio_sz); + if (r->err) + return NULL; + folio_sz = folio_size(r->folio); + src_off = 0; /* read from the beginning, starting second folio */ + dst_off += part_sz; + part_sz = min_t(u64, sz, folio_sz); + } while (sz); + + return r->buf; + } + + /* if data fits in a single folio, just return direct pointer */ + return r->addr + (file_off - r->folio_off); +} + +void freader_cleanup(struct freader *r) +{ + if (!r->buf) + return; /* non-file-backed mode */ + + freader_put_folio(r); +} diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 794d44d19c886..60405477deb61 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -28,8 +28,8 @@ extern int bpf_dynptr_from_skb_meta(struct __sk_buff *skb, __u64 flags, * Either a direct pointer to the dynptr data or a pointer to the user-provided * buffer if unable to obtain a direct pointer */ -extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset, - void *buffer, __u32 buffer__szk) __ksym __weak; +extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u64 offset, + void *buffer, __u64 buffer__szk) __ksym __weak; /* Description * Obtain a read-write pointer to the dynptr's data @@ -37,13 +37,13 @@ extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset, * Either a direct pointer to the dynptr data or a pointer to the user-provided * buffer if unable to obtain a direct pointer */ -extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset, - void *buffer, __u32 buffer__szk) __ksym __weak; +extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u64 offset, + void *buffer, __u64 buffer__szk) __ksym __weak; -extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym __weak; +extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u64 start, __u64 end) __ksym __weak; extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym __weak; extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym __weak; -extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak; +extern __u64 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak; extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym __weak; /* Description diff --git a/tools/testing/selftests/bpf/prog_tests/file_reader.c b/tools/testing/selftests/bpf/prog_tests/file_reader.c new file mode 100644 index 0000000000000..b314ad4326f18 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/file_reader.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include "file_reader.skel.h" +#include "file_reader_fail.skel.h" + +__thread int tls_counter; +const char *user_ptr = "hello world"; +char file_contents[256000]; + +enum file_reader_test { + VALIDATE_LARGE_FILE = 1, + SEARCH_ELF = 2, +}; + +static int initialize_file_contents(void) +{ + int fd; + ssize_t n; + int err = 0; + + fd = open("/proc/self/exe", O_RDONLY); + if (!ASSERT_GT(fd, 0, "Open /proc/self/exe\n")) + return 1; + + n = read(fd, file_contents, sizeof(file_contents)); + if (!ASSERT_EQ(n, sizeof(file_contents), "Read /proc/self/exe\n")) + err = 1; + + posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED); + close(fd); + return err; +} + +static void run_test(enum file_reader_test test_type) +{ + struct file_reader *skel; + int err; + char data[256]; + LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, .repeat = 1, + .data_size_in = sizeof(data)); + + skel = file_reader__open(); + if (!ASSERT_OK_PTR(skel, "file_reader__open")) + return; + + skel->bss->user_ptr = (void *)user_ptr; + skel->bss->user_buf = file_contents; + skel->rodata->user_buf_sz = sizeof(file_contents); + skel->rodata->test_type = test_type; + + err = file_reader__load(skel); + if (!ASSERT_OK(err, "file_reader__load")) + return; + + err = initialize_file_contents(); + if (!ASSERT_OK(err, "initialize file contents")) + goto cleanup; + + err = file_reader__attach(skel); + if (!ASSERT_OK(err, "file_reader__attach")) + goto cleanup; + + getpid(); + + ASSERT_EQ(skel->bss->err, 0, "err"); +cleanup: + file_reader__destroy(skel); +} + +void test_file_reader(void) +{ + if (test__start_subtest("test_large_file")) + run_test(VALIDATE_LARGE_FILE); + if (test__start_subtest("test_search_elf")) + run_test(SEARCH_ELF); + + RUN_TESTS(file_reader_fail); +} diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index 127dea342e5a6..e0d672d93adf8 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -914,8 +914,8 @@ void *user_ptr; char expected_str[384]; __u32 test_len[7] = {0/* placeholder */, 0, 1, 2, 255, 256, 257}; -typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u32 off, - u32 size, const void *unsafe_ptr); +typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u64 off, + u64 size, const void *unsafe_ptr); /* Returns the offset just before the end of the maximum sized xdp fragment. * Any write larger than 32 bytes will be split between 2 fragments. @@ -1106,16 +1106,16 @@ int test_copy_from_user_str_dynptr(void *ctx) return 0; } -static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u32 off, - u32 size, const void *unsafe_ptr) +static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u64 off, + u64 size, const void *unsafe_ptr) { struct task_struct *task = bpf_get_current_task_btf(); return bpf_copy_from_user_task_dynptr(dptr, off, size, unsafe_ptr, task); } -static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u32 off, - u32 size, const void *unsafe_ptr) +static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u64 off, + u64 size, const void *unsafe_ptr) { struct task_struct *task = bpf_get_current_task_btf(); diff --git a/tools/testing/selftests/bpf/progs/file_reader.c b/tools/testing/selftests/bpf/progs/file_reader.c new file mode 100644 index 0000000000000..9dd9a68f3563a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/file_reader.c @@ -0,0 +1,241 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include "bpf_misc.h" + +#define ELFMAG "\177ELF" +#define SELFMAG 4 +#define ET_NONE 0 +#define ET_REL 1 +#define ET_EXEC 2 +#define ET_DYN 3 +#define ET_CORE 4 +#define ET_LOPROC 0xff00 +#define ET_HIPROC 0xffff +#define EI_CLASS 4 +#define ELFCLASS32 1 +#define ELFCLASS64 2 +#define STT_TLS 6 + +#define ELF_ST_BIND(x) ((x) >> 4) +#define ELF_ST_TYPE(x) ((x) & 0xf) +#define ELF32_ST_BIND(x) ELF_ST_BIND(x) +#define ELF32_ST_TYPE(x) ELF_ST_TYPE(x) +#define ELF64_ST_BIND(x) ELF_ST_BIND(x) +#define ELF64_ST_TYPE(x) ELF_ST_TYPE(x) + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} arrmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 10000000); +} ringbuf SEC(".maps"); + +struct elem { + struct file *file; + struct bpf_task_work tw; +}; + +enum file_reader_test { + VALIDATE_LARGE_FILE = 1, + SEARCH_ELF = 2, +}; + +int err; +void *user_ptr; +char buf[1024]; +char *user_buf; +volatile const __u32 user_buf_sz; +volatile const __s32 test_type = -1; + +static int process_vma(struct task_struct *task, struct vm_area_struct *vma, void *data); +static int search_elf(struct file *file); +static int validate_large_file_read(struct file *file); +static int task_work_callback(struct bpf_map *map, void *key, void *value); + +SEC("raw_tp/sys_enter") +int on_getpid(void *ctx) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct elem *work; + int key = 0; + + work = bpf_map_lookup_elem(&arrmap, &key); + if (!work) { + err = 1; + return 0; + } + bpf_task_work_schedule_signal(task, &work->tw, &arrmap, task_work_callback, NULL); + return 0; +} + +static int task_work_callback(struct bpf_map *map, void *key, void *value) +{ + struct task_struct *task = bpf_get_current_task_btf(); + + bpf_find_vma(task, (unsigned long)user_ptr, process_vma, NULL, 0); + return 0; +} + +static int process_vma(struct task_struct *task, struct vm_area_struct *vma, void *data) +{ + switch (test_type) { + case VALIDATE_LARGE_FILE: + err = validate_large_file_read(vma->vm_file); + break; + case SEARCH_ELF: + err = search_elf(vma->vm_file); + break; + default: + err = 1; + } + return err; +} + +static int validate_large_file_read(struct file *file) +{ + struct bpf_dynptr dynptr; + int err, i; + char *rbuf1 = NULL, *rbuf2 = NULL; + + if (!file) { + err = 1; + return 1; + } + + err = bpf_dynptr_from_file(file, 0, &dynptr); + if (err) + goto cleanup_file; + + rbuf1 = bpf_ringbuf_reserve(&ringbuf, user_buf_sz, 0); + if (!rbuf1) + goto cleanup_file; + + rbuf2 = bpf_ringbuf_reserve(&ringbuf, user_buf_sz, 0); + if (!rbuf2) + goto cleanup_all; + + bpf_dynptr_read(rbuf1, user_buf_sz, &dynptr, 0, 0); + bpf_copy_from_user(rbuf2, user_buf_sz, user_buf); + /* Verify file contents read from BPF is the same as the one read from userspace */ + bpf_for(i, 0, user_buf_sz) { + if (i >= 256000 || rbuf1[i] != rbuf2[i]) { + err = 1; + break; + } + } + +cleanup_all: + if (rbuf1) + bpf_ringbuf_discard(rbuf1, 0); + if (rbuf2) + bpf_ringbuf_discard(rbuf2, 0); +cleanup_file: + bpf_dynptr_file_discard(&dynptr); + return err ? 1 : 0; +} + +/* Finds thread local variable `tls_counter` in this executable's ELF */ +static int search_elf(struct file *file) +{ + Elf64_Ehdr ehdr; + Elf64_Shdr shdrs; + Elf64_Shdr symtab, strtab, tmp; + const Elf64_Sym *symbol; + int count, off, i, e_shnum, e_shoff, e_shentsize, sections = 0; + const char *string; + struct bpf_dynptr dynptr; + const __u32 slen = 11; + static const char *needle = "tls_counter"; + + if (!file) { + err = 1; + return 1; + } + + err = bpf_dynptr_from_file(file, 0, &dynptr); + if (err) + goto fail; + + err = bpf_dynptr_read(&ehdr, sizeof(ehdr), &dynptr, 0, 0); + if (err) + goto fail; + + if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0) + goto fail; + + if (ehdr.e_type != ET_EXEC && ehdr.e_type != ET_DYN) + goto fail; + + if (ehdr.e_ident[EI_CLASS] != ELFCLASS64) + goto fail; + + e_shnum = ehdr.e_shnum; + e_shoff = ehdr.e_shoff; + e_shentsize = ehdr.e_shentsize; + + err = bpf_dynptr_read(&shdrs, sizeof(shdrs), &dynptr, + e_shoff + e_shentsize * ehdr.e_shstrndx, 0); + if (err) + goto fail; + + off = shdrs.sh_offset; + + __builtin_memset(&symtab, 0, sizeof(symtab)); + __builtin_memset(&strtab, 0, sizeof(strtab)); + bpf_for(i, 0, e_shnum) + { + err = bpf_dynptr_read(&tmp, sizeof(Elf64_Shdr), &dynptr, e_shoff + e_shentsize * i, + 0); + if (err) + goto fail; + + string = bpf_dynptr_slice(&dynptr, off + tmp.sh_name, buf, slen); + if (!string) + goto fail; + + if (bpf_strncmp(string, slen, ".symtab") == 0) { + symtab = tmp; + ++sections; + } else if (bpf_strncmp(string, slen, ".strtab") == 0) { + strtab = tmp; + ++sections; + } + if (sections == 2) + break; + } + if (sections != 2) + goto fail; + + count = symtab.sh_size / sizeof(Elf64_Sym); + bpf_for(i, 0, count) + { + symbol = bpf_dynptr_slice(&dynptr, symtab.sh_offset + sizeof(Elf64_Sym) * i, buf, + sizeof(Elf64_Sym)); + if (!symbol) + goto fail; + if (symbol->st_name == 0 || ELF64_ST_TYPE(symbol->st_info) != STT_TLS) + continue; + string = bpf_dynptr_slice(&dynptr, strtab.sh_offset + symbol->st_name, buf, slen); + if (!string) + goto fail; + if (bpf_strncmp(string, slen, needle) == 0) + goto success; + } +fail: + err = 1; +success: + bpf_dynptr_file_discard(&dynptr); + return err ? 1 : 0; +} diff --git a/tools/testing/selftests/bpf/progs/file_reader_fail.c b/tools/testing/selftests/bpf/progs/file_reader_fail.c new file mode 100644 index 0000000000000..449c4f9a1c745 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/file_reader_fail.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +int err; +void *user_ptr; + +char buf[256]; + +static long process_vma_unreleased_ref(struct task_struct *task, struct vm_area_struct *vma, + void *data) +{ + struct bpf_dynptr dynptr; + + if (!vma->vm_file) + return 1; + + err = bpf_dynptr_from_file(vma->vm_file, 0, &dynptr); + return err ? 1 : 0; +} + +SEC("fentry.s/" SYS_PREFIX "sys_nanosleep") +__failure __msg("Unreleased reference id=") int on_nanosleep_unreleased_ref(void *ctx) +{ + struct task_struct *task = bpf_get_current_task_btf(); + + bpf_find_vma(task, (unsigned long)user_ptr, process_vma_unreleased_ref, NULL, 0); + return 0; +} + +SEC("xdp") +__failure __msg("Expected a dynptr of type file as arg #0") +int xdp_wrong_dynptr_type(struct xdp_md *xdp) +{ + struct bpf_dynptr dynptr; + + bpf_dynptr_from_xdp(xdp, 0, &dynptr); + bpf_dynptr_file_discard(&dynptr); + return 0; +} + +SEC("xdp") +__failure __msg("Expected an initialized dynptr as arg #0") +int xdp_no_dynptr_type(struct xdp_md *xdp) +{ + struct bpf_dynptr dynptr; + + bpf_dynptr_file_discard(&dynptr); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/ip_check_defrag.c b/tools/testing/selftests/bpf/progs/ip_check_defrag.c index 645b2c9f7867c..0e87ad1ebcfa5 100644 --- a/tools/testing/selftests/bpf/progs/ip_check_defrag.c +++ b/tools/testing/selftests/bpf/progs/ip_check_defrag.c @@ -12,11 +12,6 @@ #define IP_OFFSET 0x1FFF #define NEXTHDR_FRAGMENT 44 -extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, - struct bpf_dynptr *ptr__uninit) __ksym; -extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, - void *buffer, uint32_t buffer__sz) __ksym; - volatile int shootdowns = 0; static bool is_frag_v4(struct iphdr *iph) diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c index ab9f9f2620edc..e2cbc5bda65ea 100644 --- a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c +++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c @@ -79,11 +79,6 @@ int with_invalid_ctx_access_test5(struct bpf_nf_ctx *ctx) return NF_ACCEPT; } -extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, - struct bpf_dynptr *ptr__uninit) __ksym; -extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, - void *buffer, uint32_t buffer__sz) __ksym; - SEC("netfilter") __description("netfilter test prog with skb and state read access") __success __failure_unpriv