Skip to content

Commit 473e315

Browse files
menglongdongAlexei Starovoitov
authored andcommitted
bpf, x86: allow function arguments up to 12 for TRACING
For now, the BPF program of type BPF_PROG_TYPE_TRACING can only be used on the kernel functions whose arguments count less than or equal to 6, if not considering '> 8 bytes' struct argument. This is not friendly at all, as too many functions have arguments count more than 6. According to the current kernel version, below is a statistics of the function arguments count: argument count | function count 7 | 704 8 | 270 9 | 84 10 | 47 11 | 47 12 | 27 13 | 22 14 | 5 15 | 0 16 | 1 Therefore, let's enhance it by increasing the function arguments count allowed in arch_prepare_bpf_trampoline(), for now, only x86_64. For the case that we don't need to call origin function, which means without BPF_TRAMP_F_CALL_ORIG, we need only copy the function arguments that stored in the frame of the caller to current frame. The 7th and later arguments are stored in "$rbp + 0x18", and they will be copied to the stack area following where register values are saved. For the case with BPF_TRAMP_F_CALL_ORIG, we need prepare the arguments in stack before call origin function, which means we need alloc extra "8 * (arg_count - 6)" memory in the top of the stack. Note, there should not be any data be pushed to the stack before calling the origin function. So 'rbx' value will be stored on a stack position higher than where stack arguments are stored for BPF_TRAMP_F_CALL_ORIG. According to the research of Yonghong, struct members should be all in register or all on the stack. Meanwhile, the compiler will pass the argument on regs if the remaining regs can hold the argument. Therefore, we need save the arguments in order. Otherwise, disorder of the args can happen. For example: struct foo_struct { long a; int b; }; int foo(char, char, char, char, char, struct foo_struct, char); the arg1-5,arg7 will be passed by regs, and arg6 will by stack. Therefore, we should save/restore the arguments in the same order with the declaration of foo(). And the args used as ctx in stack will be like this: reg_arg6 -- copy from regs stack_arg2 -- copy from stack stack_arg1 reg_arg5 -- copy from regs reg_arg4 reg_arg3 reg_arg2 reg_arg1 We use EMIT3_off32() or EMIT4() for "lea" and "sub". The range of the imm in "lea" and "sub" is [-128, 127] if EMIT4() is used. Therefore, we use EMIT3_off32() instead if the imm out of the range. It works well for the FENTRY/FEXIT/MODIFY_RETURN. Signed-off-by: Menglong Dong <[email protected]> Acked-by: Yonghong Song <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 02a6dfa commit 473e315

File tree

1 file changed

+209
-26
lines changed

1 file changed

+209
-26
lines changed

arch/x86/net/bpf_jit_comp.c

Lines changed: 209 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1857,37 +1857,178 @@ st: if (is_imm8(insn->off))
18571857
return proglen;
18581858
}
18591859

1860-
static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
1861-
int stack_size)
1860+
static void clean_stack_garbage(const struct btf_func_model *m,
1861+
u8 **pprog, int nr_stack_slots,
1862+
int stack_size)
18621863
{
1863-
int i;
1864+
int arg_size, off;
1865+
u8 *prog;
1866+
1867+
/* Generally speaking, the compiler will pass the arguments
1868+
* on-stack with "push" instruction, which will take 8-byte
1869+
* on the stack. In this case, there won't be garbage values
1870+
* while we copy the arguments from origin stack frame to current
1871+
* in BPF_DW.
1872+
*
1873+
* However, sometimes the compiler will only allocate 4-byte on
1874+
* the stack for the arguments. For now, this case will only
1875+
* happen if there is only one argument on-stack and its size
1876+
* not more than 4 byte. In this case, there will be garbage
1877+
* values on the upper 4-byte where we store the argument on
1878+
* current stack frame.
1879+
*
1880+
* arguments on origin stack:
1881+
*
1882+
* stack_arg_1(4-byte) xxx(4-byte)
1883+
*
1884+
* what we copy:
1885+
*
1886+
* stack_arg_1(8-byte): stack_arg_1(origin) xxx
1887+
*
1888+
* and the xxx is the garbage values which we should clean here.
1889+
*/
1890+
if (nr_stack_slots != 1)
1891+
return;
1892+
1893+
/* the size of the last argument */
1894+
arg_size = m->arg_size[m->nr_args - 1];
1895+
if (arg_size <= 4) {
1896+
off = -(stack_size - 4);
1897+
prog = *pprog;
1898+
/* mov DWORD PTR [rbp + off], 0 */
1899+
if (!is_imm8(off))
1900+
EMIT2_off32(0xC7, 0x85, off);
1901+
else
1902+
EMIT3(0xC7, 0x45, off);
1903+
EMIT(0, 4);
1904+
*pprog = prog;
1905+
}
1906+
}
1907+
1908+
/* get the count of the regs that are used to pass arguments */
1909+
static int get_nr_used_regs(const struct btf_func_model *m)
1910+
{
1911+
int i, arg_regs, nr_used_regs = 0;
1912+
1913+
for (i = 0; i < min_t(int, m->nr_args, MAX_BPF_FUNC_ARGS); i++) {
1914+
arg_regs = (m->arg_size[i] + 7) / 8;
1915+
if (nr_used_regs + arg_regs <= 6)
1916+
nr_used_regs += arg_regs;
1917+
1918+
if (nr_used_regs >= 6)
1919+
break;
1920+
}
1921+
1922+
return nr_used_regs;
1923+
}
1924+
1925+
static void save_args(const struct btf_func_model *m, u8 **prog,
1926+
int stack_size, bool for_call_origin)
1927+
{
1928+
int arg_regs, first_off, nr_regs = 0, nr_stack_slots = 0;
1929+
int i, j;
18641930

18651931
/* Store function arguments to stack.
18661932
* For a function that accepts two pointers the sequence will be:
18671933
* mov QWORD PTR [rbp-0x10],rdi
18681934
* mov QWORD PTR [rbp-0x8],rsi
18691935
*/
1870-
for (i = 0; i < min(nr_regs, 6); i++)
1871-
emit_stx(prog, BPF_DW, BPF_REG_FP,
1872-
i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
1873-
-(stack_size - i * 8));
1936+
for (i = 0; i < min_t(int, m->nr_args, MAX_BPF_FUNC_ARGS); i++) {
1937+
arg_regs = (m->arg_size[i] + 7) / 8;
1938+
1939+
/* According to the research of Yonghong, struct members
1940+
* should be all in register or all on the stack.
1941+
* Meanwhile, the compiler will pass the argument on regs
1942+
* if the remaining regs can hold the argument.
1943+
*
1944+
* Disorder of the args can happen. For example:
1945+
*
1946+
* struct foo_struct {
1947+
* long a;
1948+
* int b;
1949+
* };
1950+
* int foo(char, char, char, char, char, struct foo_struct,
1951+
* char);
1952+
*
1953+
* the arg1-5,arg7 will be passed by regs, and arg6 will
1954+
* by stack.
1955+
*/
1956+
if (nr_regs + arg_regs > 6) {
1957+
/* copy function arguments from origin stack frame
1958+
* into current stack frame.
1959+
*
1960+
* The starting address of the arguments on-stack
1961+
* is:
1962+
* rbp + 8(push rbp) +
1963+
* 8(return addr of origin call) +
1964+
* 8(return addr of the caller)
1965+
* which means: rbp + 24
1966+
*/
1967+
for (j = 0; j < arg_regs; j++) {
1968+
emit_ldx(prog, BPF_DW, BPF_REG_0, BPF_REG_FP,
1969+
nr_stack_slots * 8 + 0x18);
1970+
emit_stx(prog, BPF_DW, BPF_REG_FP, BPF_REG_0,
1971+
-stack_size);
1972+
1973+
if (!nr_stack_slots)
1974+
first_off = stack_size;
1975+
stack_size -= 8;
1976+
nr_stack_slots++;
1977+
}
1978+
} else {
1979+
/* Only copy the arguments on-stack to current
1980+
* 'stack_size' and ignore the regs, used to
1981+
* prepare the arguments on-stack for orign call.
1982+
*/
1983+
if (for_call_origin) {
1984+
nr_regs += arg_regs;
1985+
continue;
1986+
}
1987+
1988+
/* copy the arguments from regs into stack */
1989+
for (j = 0; j < arg_regs; j++) {
1990+
emit_stx(prog, BPF_DW, BPF_REG_FP,
1991+
nr_regs == 5 ? X86_REG_R9 : BPF_REG_1 + nr_regs,
1992+
-stack_size);
1993+
stack_size -= 8;
1994+
nr_regs++;
1995+
}
1996+
}
1997+
}
1998+
1999+
clean_stack_garbage(m, prog, nr_stack_slots, first_off);
18742000
}
18752001

1876-
static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
2002+
static void restore_regs(const struct btf_func_model *m, u8 **prog,
18772003
int stack_size)
18782004
{
1879-
int i;
2005+
int i, j, arg_regs, nr_regs = 0;
18802006

18812007
/* Restore function arguments from stack.
18822008
* For a function that accepts two pointers the sequence will be:
18832009
* EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10]
18842010
* EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8]
2011+
*
2012+
* The logic here is similar to what we do in save_args()
18852013
*/
1886-
for (i = 0; i < min(nr_regs, 6); i++)
1887-
emit_ldx(prog, BPF_DW,
1888-
i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
1889-
BPF_REG_FP,
1890-
-(stack_size - i * 8));
2014+
for (i = 0; i < min_t(int, m->nr_args, MAX_BPF_FUNC_ARGS); i++) {
2015+
arg_regs = (m->arg_size[i] + 7) / 8;
2016+
if (nr_regs + arg_regs <= 6) {
2017+
for (j = 0; j < arg_regs; j++) {
2018+
emit_ldx(prog, BPF_DW,
2019+
nr_regs == 5 ? X86_REG_R9 : BPF_REG_1 + nr_regs,
2020+
BPF_REG_FP,
2021+
-stack_size);
2022+
stack_size -= 8;
2023+
nr_regs++;
2024+
}
2025+
} else {
2026+
stack_size -= 8 * arg_regs;
2027+
}
2028+
2029+
if (nr_regs >= 6)
2030+
break;
2031+
}
18912032
}
18922033

18932034
static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
@@ -1915,7 +2056,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
19152056
/* arg1: mov rdi, progs[i] */
19162057
emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
19172058
/* arg2: lea rsi, [rbp - ctx_cookie_off] */
1918-
EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
2059+
if (!is_imm8(-run_ctx_off))
2060+
EMIT3_off32(0x48, 0x8D, 0xB5, -run_ctx_off);
2061+
else
2062+
EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
19192063

19202064
if (emit_rsb_call(&prog, bpf_trampoline_enter(p), prog))
19212065
return -EINVAL;
@@ -1931,7 +2075,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
19312075
emit_nops(&prog, 2);
19322076

19332077
/* arg1: lea rdi, [rbp - stack_size] */
1934-
EMIT4(0x48, 0x8D, 0x7D, -stack_size);
2078+
if (!is_imm8(-stack_size))
2079+
EMIT3_off32(0x48, 0x8D, 0xBD, -stack_size);
2080+
else
2081+
EMIT4(0x48, 0x8D, 0x7D, -stack_size);
19352082
/* arg2: progs[i]->insnsi for interpreter */
19362083
if (!p->jited)
19372084
emit_mov_imm64(&prog, BPF_REG_2,
@@ -1961,7 +2108,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
19612108
/* arg2: mov rsi, rbx <- start time in nsec */
19622109
emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
19632110
/* arg3: lea rdx, [rbp - run_ctx_off] */
1964-
EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
2111+
if (!is_imm8(-run_ctx_off))
2112+
EMIT3_off32(0x48, 0x8D, 0x95, -run_ctx_off);
2113+
else
2114+
EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
19652115
if (emit_rsb_call(&prog, bpf_trampoline_exit(p), prog))
19662116
return -EINVAL;
19672117

@@ -2113,7 +2263,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
21132263
void *func_addr)
21142264
{
21152265
int i, ret, nr_regs = m->nr_args, stack_size = 0;
2116-
int regs_off, nregs_off, ip_off, run_ctx_off;
2266+
int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
21172267
struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
21182268
struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
21192269
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
@@ -2127,8 +2277,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
21272277
if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
21282278
nr_regs += (m->arg_size[i] + 7) / 8 - 1;
21292279

2130-
/* x86-64 supports up to 6 arguments. 7+ can be added in the future */
2131-
if (nr_regs > 6)
2280+
/* x86-64 supports up to MAX_BPF_FUNC_ARGS arguments. 1-6
2281+
* are passed through regs, the remains are through stack.
2282+
*/
2283+
if (nr_regs > MAX_BPF_FUNC_ARGS)
21322284
return -ENOTSUPP;
21332285

21342286
/* Generated trampoline stack layout:
@@ -2147,7 +2299,14 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
21472299
*
21482300
* RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
21492301
*
2302+
* RBP - rbx_off [ rbx value ] always
2303+
*
21502304
* RBP - run_ctx_off [ bpf_tramp_run_ctx ]
2305+
*
2306+
* [ stack_argN ] BPF_TRAMP_F_CALL_ORIG
2307+
* [ ... ]
2308+
* [ stack_arg2 ]
2309+
* RBP - arg_stack_off [ stack_arg1 ]
21512310
*/
21522311

21532312
/* room for return value of orig_call or fentry prog */
@@ -2167,9 +2326,26 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
21672326

21682327
ip_off = stack_size;
21692328

2329+
stack_size += 8;
2330+
rbx_off = stack_size;
2331+
21702332
stack_size += (sizeof(struct bpf_tramp_run_ctx) + 7) & ~0x7;
21712333
run_ctx_off = stack_size;
21722334

2335+
if (nr_regs > 6 && (flags & BPF_TRAMP_F_CALL_ORIG)) {
2336+
/* the space that used to pass arguments on-stack */
2337+
stack_size += (nr_regs - get_nr_used_regs(m)) * 8;
2338+
/* make sure the stack pointer is 16-byte aligned if we
2339+
* need pass arguments on stack, which means
2340+
* [stack_size + 8(rbp) + 8(rip) + 8(origin rip)]
2341+
* should be 16-byte aligned. Following code depend on
2342+
* that stack_size is already 8-byte aligned.
2343+
*/
2344+
stack_size += (stack_size % 16) ? 0 : 8;
2345+
}
2346+
2347+
arg_stack_off = stack_size;
2348+
21732349
if (flags & BPF_TRAMP_F_SKIP_FRAME) {
21742350
/* skip patched call instruction and point orig_call to actual
21752351
* body of the kernel function.
@@ -2189,8 +2365,14 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
21892365
x86_call_depth_emit_accounting(&prog, NULL);
21902366
EMIT1(0x55); /* push rbp */
21912367
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
2192-
EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
2193-
EMIT1(0x53); /* push rbx */
2368+
if (!is_imm8(stack_size))
2369+
/* sub rsp, stack_size */
2370+
EMIT3_off32(0x48, 0x81, 0xEC, stack_size);
2371+
else
2372+
/* sub rsp, stack_size */
2373+
EMIT4(0x48, 0x83, 0xEC, stack_size);
2374+
/* mov QWORD PTR [rbp - rbx_off], rbx */
2375+
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_6, -rbx_off);
21942376

21952377
/* Store number of argument registers of the traced function:
21962378
* mov rax, nr_regs
@@ -2208,7 +2390,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
22082390
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -ip_off);
22092391
}
22102392

2211-
save_regs(m, &prog, nr_regs, regs_off);
2393+
save_args(m, &prog, regs_off, false);
22122394

22132395
if (flags & BPF_TRAMP_F_CALL_ORIG) {
22142396
/* arg1: mov rdi, im */
@@ -2238,7 +2420,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
22382420
}
22392421

22402422
if (flags & BPF_TRAMP_F_CALL_ORIG) {
2241-
restore_regs(m, &prog, nr_regs, regs_off);
2423+
restore_regs(m, &prog, regs_off);
2424+
save_args(m, &prog, arg_stack_off, true);
22422425

22432426
if (flags & BPF_TRAMP_F_ORIG_STACK) {
22442427
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8);
@@ -2279,7 +2462,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
22792462
}
22802463

22812464
if (flags & BPF_TRAMP_F_RESTORE_REGS)
2282-
restore_regs(m, &prog, nr_regs, regs_off);
2465+
restore_regs(m, &prog, regs_off);
22832466

22842467
/* This needs to be done regardless. If there were fmod_ret programs,
22852468
* the return value is only updated on the stack and still needs to be
@@ -2298,7 +2481,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
22982481
if (save_ret)
22992482
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
23002483

2301-
EMIT1(0x5B); /* pop rbx */
2484+
emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off);
23022485
EMIT1(0xC9); /* leave */
23032486
if (flags & BPF_TRAMP_F_SKIP_FRAME)
23042487
/* skip our return address and return to parent */

0 commit comments

Comments
 (0)