Skip to content

Commit bf08a60

Browse files
committed
bpf, arm64: Support up to 12 function arguments
JIRA: https://issues.redhat.com/browse/RHEL-78203 commit 9014cf5 Author: Xu Kuohai <[email protected]> Date: Tue May 27 12:06:03 2025 +0200 bpf, arm64: Support up to 12 function arguments Currently ARM64 bpf trampoline supports up to 8 function arguments. According to the statistics from commit 473e315 ("bpf, x86: allow function arguments up to 12 for TRACING"), there are about 200 functions accept 9 to 12 arguments, so adding support for up to 12 function arguments. Due to bpf only supporting function arguments up to 16 bytes, according to AAPCS64, starting from the first argument, each argument is first attempted to be loaded to 1 or 2 smallest registers from x0-x7, if there are no enough registers to hold the entire argument, then all remaining arguments starting from this one are pushed to the stack for passing. There are some non-trivial cases for which it is not possible to correctly read arguments from/write arguments to the stack: for example struct variables may have custom packing/alignment attributes that are invisible in BTF info. Such cases are denied for now to make sure not to read incorrect values. Signed-off-by: Xu Kuohai <[email protected]> Co-developed-by: Alexis Lothoré (eBPF Foundation) <[email protected]> Signed-off-by: Alexis Lothoré (eBPF Foundation) <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]> Signed-off-by: Viktor Malik <[email protected]>
1 parent 232920d commit bf08a60

File tree

1 file changed

+171
-54
lines changed

1 file changed

+171
-54
lines changed

arch/arm64/net/bpf_jit_comp.c

Lines changed: 171 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -2113,7 +2113,7 @@ bool bpf_jit_supports_subprog_tailcalls(void)
21132113
}
21142114

21152115
static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
2116-
int args_off, int retval_off, int run_ctx_off,
2116+
int bargs_off, int retval_off, int run_ctx_off,
21172117
bool save_ret)
21182118
{
21192119
__le32 *branch;
@@ -2155,7 +2155,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
21552155
branch = ctx->image + ctx->idx;
21562156
emit(A64_NOP, ctx);
21572157

2158-
emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx);
2158+
emit(A64_ADD_I(1, A64_R(0), A64_SP, bargs_off), ctx);
21592159
if (!p->jited)
21602160
emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx);
21612161

@@ -2180,7 +2180,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
21802180
}
21812181

21822182
static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
2183-
int args_off, int retval_off, int run_ctx_off,
2183+
int bargs_off, int retval_off, int run_ctx_off,
21842184
__le32 **branches)
21852185
{
21862186
int i;
@@ -2190,7 +2190,7 @@ static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
21902190
*/
21912191
emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx);
21922192
for (i = 0; i < tl->nr_links; i++) {
2193-
invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off,
2193+
invoke_bpf_prog(ctx, tl->links[i], bargs_off, retval_off,
21942194
run_ctx_off, true);
21952195
/* if (*(u64 *)(sp + retval_off) != 0)
21962196
* goto do_fexit;
@@ -2204,23 +2204,125 @@ static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
22042204
}
22052205
}
22062206

2207-
static void save_args(struct jit_ctx *ctx, int args_off, int nregs)
2207+
struct arg_aux {
2208+
/* how many args are passed through registers, the rest of the args are
2209+
* passed through stack
2210+
*/
2211+
int args_in_regs;
2212+
/* how many registers are used to pass arguments */
2213+
int regs_for_args;
2214+
/* how much stack is used for additional args passed to bpf program
2215+
* that did not fit in original function registers
2216+
*/
2217+
int bstack_for_args;
2218+
/* home much stack is used for additional args passed to the
2219+
* original function when called from trampoline (this one needs
2220+
* arguments to be properly aligned)
2221+
*/
2222+
int ostack_for_args;
2223+
};
2224+
2225+
static int calc_arg_aux(const struct btf_func_model *m,
2226+
struct arg_aux *a)
22082227
{
2209-
int i;
2228+
int stack_slots, nregs, slots, i;
2229+
2230+
/* verifier ensures m->nr_args <= MAX_BPF_FUNC_ARGS */
2231+
for (i = 0, nregs = 0; i < m->nr_args; i++) {
2232+
slots = (m->arg_size[i] + 7) / 8;
2233+
if (nregs + slots <= 8) /* passed through register ? */
2234+
nregs += slots;
2235+
else
2236+
break;
2237+
}
22102238

2211-
for (i = 0; i < nregs; i++) {
2212-
emit(A64_STR64I(i, A64_SP, args_off), ctx);
2213-
args_off += 8;
2239+
a->args_in_regs = i;
2240+
a->regs_for_args = nregs;
2241+
a->ostack_for_args = 0;
2242+
a->bstack_for_args = 0;
2243+
2244+
/* the rest arguments are passed through stack */
2245+
for (; i < m->nr_args; i++) {
2246+
/* We can not know for sure about exact alignment needs for
2247+
* struct passed on stack, so deny those
2248+
*/
2249+
if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
2250+
return -ENOTSUPP;
2251+
stack_slots = (m->arg_size[i] + 7) / 8;
2252+
a->bstack_for_args += stack_slots * 8;
2253+
a->ostack_for_args = a->ostack_for_args + stack_slots * 8;
2254+
}
2255+
2256+
return 0;
2257+
}
2258+
2259+
static void clear_garbage(struct jit_ctx *ctx, int reg, int effective_bytes)
2260+
{
2261+
if (effective_bytes) {
2262+
int garbage_bits = 64 - 8 * effective_bytes;
2263+
#ifdef CONFIG_CPU_BIG_ENDIAN
2264+
/* garbage bits are at the right end */
2265+
emit(A64_LSR(1, reg, reg, garbage_bits), ctx);
2266+
emit(A64_LSL(1, reg, reg, garbage_bits), ctx);
2267+
#else
2268+
/* garbage bits are at the left end */
2269+
emit(A64_LSL(1, reg, reg, garbage_bits), ctx);
2270+
emit(A64_LSR(1, reg, reg, garbage_bits), ctx);
2271+
#endif
22142272
}
22152273
}
22162274

2217-
static void restore_args(struct jit_ctx *ctx, int args_off, int nregs)
2275+
static void save_args(struct jit_ctx *ctx, int bargs_off, int oargs_off,
2276+
const struct btf_func_model *m,
2277+
const struct arg_aux *a,
2278+
bool for_call_origin)
22182279
{
22192280
int i;
2281+
int reg;
2282+
int doff;
2283+
int soff;
2284+
int slots;
2285+
u8 tmp = bpf2a64[TMP_REG_1];
2286+
2287+
/* store arguments to the stack for the bpf program, or restore
2288+
* arguments from stack for the original function
2289+
*/
2290+
for (reg = 0; reg < a->regs_for_args; reg++) {
2291+
emit(for_call_origin ?
2292+
A64_LDR64I(reg, A64_SP, bargs_off) :
2293+
A64_STR64I(reg, A64_SP, bargs_off),
2294+
ctx);
2295+
bargs_off += 8;
2296+
}
2297+
2298+
soff = 32; /* on stack arguments start from FP + 32 */
2299+
doff = (for_call_origin ? oargs_off : bargs_off);
2300+
2301+
/* save on stack arguments */
2302+
for (i = a->args_in_regs; i < m->nr_args; i++) {
2303+
slots = (m->arg_size[i] + 7) / 8;
2304+
/* verifier ensures arg_size <= 16, so slots equals 1 or 2 */
2305+
while (slots-- > 0) {
2306+
emit(A64_LDR64I(tmp, A64_FP, soff), ctx);
2307+
/* if there is unused space in the last slot, clear
2308+
* the garbage contained in the space.
2309+
*/
2310+
if (slots == 0 && !for_call_origin)
2311+
clear_garbage(ctx, tmp, m->arg_size[i] % 8);
2312+
emit(A64_STR64I(tmp, A64_SP, doff), ctx);
2313+
soff += 8;
2314+
doff += 8;
2315+
}
2316+
}
2317+
}
22202318

2221-
for (i = 0; i < nregs; i++) {
2222-
emit(A64_LDR64I(i, A64_SP, args_off), ctx);
2223-
args_off += 8;
2319+
static void restore_args(struct jit_ctx *ctx, int bargs_off, int nregs)
2320+
{
2321+
int reg;
2322+
2323+
for (reg = 0; reg < nregs; reg++) {
2324+
emit(A64_LDR64I(reg, A64_SP, bargs_off), ctx);
2325+
bargs_off += 8;
22242326
}
22252327
}
22262328

@@ -2243,17 +2345,21 @@ static bool is_struct_ops_tramp(const struct bpf_tramp_links *fentry_links)
22432345
*/
22442346
static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
22452347
struct bpf_tramp_links *tlinks, void *func_addr,
2246-
int nregs, u32 flags)
2348+
const struct btf_func_model *m,
2349+
const struct arg_aux *a,
2350+
u32 flags)
22472351
{
22482352
int i;
22492353
int stack_size;
22502354
int retaddr_off;
22512355
int regs_off;
22522356
int retval_off;
2253-
int args_off;
2254-
int nregs_off;
2357+
int bargs_off;
2358+
int nfuncargs_off;
22552359
int ip_off;
22562360
int run_ctx_off;
2361+
int oargs_off;
2362+
int nfuncargs;
22572363
struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
22582364
struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
22592365
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
@@ -2262,31 +2368,38 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
22622368
bool is_struct_ops = is_struct_ops_tramp(fentry);
22632369

22642370
/* trampoline stack layout:
2265-
* [ parent ip ]
2266-
* [ FP ]
2267-
* SP + retaddr_off [ self ip ]
2268-
* [ FP ]
2371+
* [ parent ip ]
2372+
* [ FP ]
2373+
* SP + retaddr_off [ self ip ]
2374+
* [ FP ]
22692375
*
2270-
* [ padding ] align SP to multiples of 16
2376+
* [ padding ] align SP to multiples of 16
22712377
*
2272-
* [ x20 ] callee saved reg x20
2273-
* SP + regs_off [ x19 ] callee saved reg x19
2378+
* [ x20 ] callee saved reg x20
2379+
* SP + regs_off [ x19 ] callee saved reg x19
22742380
*
2275-
* SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or
2276-
* BPF_TRAMP_F_RET_FENTRY_RET
2381+
* SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or
2382+
* BPF_TRAMP_F_RET_FENTRY_RET
2383+
* [ arg reg N ]
2384+
* [ ... ]
2385+
* SP + bargs_off [ arg reg 1 ] for bpf
22772386
*
2278-
* [ arg reg N ]
2279-
* [ ... ]
2280-
* SP + args_off [ arg reg 1 ]
2387+
* SP + nfuncargs_off [ arg regs count ]
22812388
*
2282-
* SP + nregs_off [ arg regs count ]
2389+
* SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
22832390
*
2284-
* SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
2391+
* SP + run_ctx_off [ bpf_tramp_run_ctx ]
22852392
*
2286-
* SP + run_ctx_off [ bpf_tramp_run_ctx ]
2393+
* [ stack arg N ]
2394+
* [ ... ]
2395+
* SP + oargs_off [ stack arg 1 ] for original func
22872396
*/
22882397

22892398
stack_size = 0;
2399+
oargs_off = stack_size;
2400+
if (flags & BPF_TRAMP_F_CALL_ORIG)
2401+
stack_size += a->ostack_for_args;
2402+
22902403
run_ctx_off = stack_size;
22912404
/* room for bpf_tramp_run_ctx */
22922405
stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
@@ -2296,13 +2409,14 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
22962409
if (flags & BPF_TRAMP_F_IP_ARG)
22972410
stack_size += 8;
22982411

2299-
nregs_off = stack_size;
2412+
nfuncargs_off = stack_size;
23002413
/* room for args count */
23012414
stack_size += 8;
23022415

2303-
args_off = stack_size;
2416+
bargs_off = stack_size;
23042417
/* room for args */
2305-
stack_size += nregs * 8;
2418+
nfuncargs = a->regs_for_args + a->bstack_for_args / 8;
2419+
stack_size += 8 * nfuncargs;
23062420

23072421
/* room for return value */
23082422
retval_off = stack_size;
@@ -2349,11 +2463,11 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
23492463
}
23502464

23512465
/* save arg regs count*/
2352-
emit(A64_MOVZ(1, A64_R(10), nregs, 0), ctx);
2353-
emit(A64_STR64I(A64_R(10), A64_SP, nregs_off), ctx);
2466+
emit(A64_MOVZ(1, A64_R(10), nfuncargs, 0), ctx);
2467+
emit(A64_STR64I(A64_R(10), A64_SP, nfuncargs_off), ctx);
23542468

2355-
/* save arg regs */
2356-
save_args(ctx, args_off, nregs);
2469+
/* save args for bpf */
2470+
save_args(ctx, bargs_off, oargs_off, m, a, false);
23572471

23582472
/* save callee saved registers */
23592473
emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx);
@@ -2369,7 +2483,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
23692483
}
23702484

23712485
for (i = 0; i < fentry->nr_links; i++)
2372-
invoke_bpf_prog(ctx, fentry->links[i], args_off,
2486+
invoke_bpf_prog(ctx, fentry->links[i], bargs_off,
23732487
retval_off, run_ctx_off,
23742488
flags & BPF_TRAMP_F_RET_FENTRY_RET);
23752489

@@ -2379,12 +2493,13 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
23792493
if (!branches)
23802494
return -ENOMEM;
23812495

2382-
invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off,
2496+
invoke_bpf_mod_ret(ctx, fmod_ret, bargs_off, retval_off,
23832497
run_ctx_off, branches);
23842498
}
23852499

23862500
if (flags & BPF_TRAMP_F_CALL_ORIG) {
2387-
restore_args(ctx, args_off, nregs);
2501+
/* save args for original func */
2502+
save_args(ctx, bargs_off, oargs_off, m, a, true);
23882503
/* call original func */
23892504
emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx);
23902505
emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx);
@@ -2403,7 +2518,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
24032518
}
24042519

24052520
for (i = 0; i < fexit->nr_links; i++)
2406-
invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off,
2521+
invoke_bpf_prog(ctx, fexit->links[i], bargs_off, retval_off,
24072522
run_ctx_off, false);
24082523

24092524
if (flags & BPF_TRAMP_F_CALL_ORIG) {
@@ -2417,7 +2532,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
24172532
}
24182533

24192534
if (flags & BPF_TRAMP_F_RESTORE_REGS)
2420-
restore_args(ctx, args_off, nregs);
2535+
restore_args(ctx, bargs_off, a->regs_for_args);
24212536

24222537
/* restore callee saved register x19 and x20 */
24232538
emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx);
@@ -2477,14 +2592,16 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
24772592
.idx = 0,
24782593
};
24792594
struct bpf_tramp_image im;
2595+
struct arg_aux aaux;
24802596
int nregs, ret;
24812597

24822598
nregs = btf_func_model_nregs(m);
2483-
/* the first 8 registers are used for arguments */
2484-
if (nregs > 8)
2485-
return -ENOTSUPP;
24862599

2487-
ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, nregs, flags);
2600+
ret = calc_arg_aux(m, &aaux);
2601+
if (ret < 0)
2602+
return ret;
2603+
2604+
ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, m, &aaux, flags);
24882605
if (ret < 0)
24892606
return ret;
24902607

@@ -2511,9 +2628,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
25112628
u32 flags, struct bpf_tramp_links *tlinks,
25122629
void *func_addr)
25132630
{
2514-
int ret, nregs;
2515-
void *image, *tmp;
25162631
u32 size = ro_image_end - ro_image;
2632+
struct arg_aux aaux;
2633+
void *image, *tmp;
2634+
int ret;
25172635

25182636
/* image doesn't need to be in module memory range, so we can
25192637
* use kvmalloc.
@@ -2529,13 +2647,12 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
25292647
.write = true,
25302648
};
25312649

2532-
nregs = btf_func_model_nregs(m);
2533-
/* the first 8 registers are used for arguments */
2534-
if (nregs > 8)
2535-
return -ENOTSUPP;
25362650

25372651
jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image));
2538-
ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags);
2652+
ret = calc_arg_aux(m, &aaux);
2653+
if (ret)
2654+
goto out;
2655+
ret = prepare_trampoline(&ctx, im, tlinks, func_addr, m, &aaux, flags);
25392656

25402657
if (ret > 0 && validate_code(&ctx) < 0) {
25412658
ret = -EINVAL;

0 commit comments

Comments
 (0)