@@ -1857,37 +1857,178 @@ st: if (is_imm8(insn->off))
18571857 return proglen ;
18581858}
18591859
1860- static void save_regs (const struct btf_func_model * m , u8 * * prog , int nr_regs ,
1861- int stack_size )
1860+ static void clean_stack_garbage (const struct btf_func_model * m ,
1861+ u8 * * pprog , int nr_stack_slots ,
1862+ int stack_size )
18621863{
1863- int i ;
1864+ int arg_size , off ;
1865+ u8 * prog ;
1866+
1867+ /* Generally speaking, the compiler will pass the arguments
1868+ * on-stack with "push" instruction, which will take 8-byte
1869+ * on the stack. In this case, there won't be garbage values
1870+ * while we copy the arguments from origin stack frame to current
1871+ * in BPF_DW.
1872+ *
1873+ * However, sometimes the compiler will only allocate 4-byte on
1874+ * the stack for the arguments. For now, this case will only
1875+ * happen if there is only one argument on-stack and its size
1876+ * not more than 4 byte. In this case, there will be garbage
1877+ * values on the upper 4-byte where we store the argument on
1878+ * current stack frame.
1879+ *
1880+ * arguments on origin stack:
1881+ *
1882+ * stack_arg_1(4-byte) xxx(4-byte)
1883+ *
1884+ * what we copy:
1885+ *
1886+ * stack_arg_1(8-byte): stack_arg_1(origin) xxx
1887+ *
1888+ * and the xxx is the garbage values which we should clean here.
1889+ */
1890+ if (nr_stack_slots != 1 )
1891+ return ;
1892+
1893+ /* the size of the last argument */
1894+ arg_size = m -> arg_size [m -> nr_args - 1 ];
1895+ if (arg_size <= 4 ) {
1896+ off = - (stack_size - 4 );
1897+ prog = * pprog ;
1898+ /* mov DWORD PTR [rbp + off], 0 */
1899+ if (!is_imm8 (off ))
1900+ EMIT2_off32 (0xC7 , 0x85 , off );
1901+ else
1902+ EMIT3 (0xC7 , 0x45 , off );
1903+ EMIT (0 , 4 );
1904+ * pprog = prog ;
1905+ }
1906+ }
1907+
1908+ /* get the count of the regs that are used to pass arguments */
1909+ static int get_nr_used_regs (const struct btf_func_model * m )
1910+ {
1911+ int i , arg_regs , nr_used_regs = 0 ;
1912+
1913+ for (i = 0 ; i < min_t (int , m -> nr_args , MAX_BPF_FUNC_ARGS ); i ++ ) {
1914+ arg_regs = (m -> arg_size [i ] + 7 ) / 8 ;
1915+ if (nr_used_regs + arg_regs <= 6 )
1916+ nr_used_regs += arg_regs ;
1917+
1918+ if (nr_used_regs >= 6 )
1919+ break ;
1920+ }
1921+
1922+ return nr_used_regs ;
1923+ }
1924+
1925+ static void save_args (const struct btf_func_model * m , u8 * * prog ,
1926+ int stack_size , bool for_call_origin )
1927+ {
1928+ int arg_regs , first_off , nr_regs = 0 , nr_stack_slots = 0 ;
1929+ int i , j ;
18641930
18651931 /* Store function arguments to stack.
18661932 * For a function that accepts two pointers the sequence will be:
18671933 * mov QWORD PTR [rbp-0x10],rdi
18681934 * mov QWORD PTR [rbp-0x8],rsi
18691935 */
1870- for (i = 0 ; i < min (nr_regs , 6 ); i ++ )
1871- emit_stx (prog , BPF_DW , BPF_REG_FP ,
1872- i == 5 ? X86_REG_R9 : BPF_REG_1 + i ,
1873- - (stack_size - i * 8 ));
1936+ for (i = 0 ; i < min_t (int , m -> nr_args , MAX_BPF_FUNC_ARGS ); i ++ ) {
1937+ arg_regs = (m -> arg_size [i ] + 7 ) / 8 ;
1938+
1939+ /* According to the research of Yonghong, struct members
1940+ * should be all in register or all on the stack.
1941+ * Meanwhile, the compiler will pass the argument on regs
1942+ * if the remaining regs can hold the argument.
1943+ *
1944+ * Disorder of the args can happen. For example:
1945+ *
1946+ * struct foo_struct {
1947+ * long a;
1948+ * int b;
1949+ * };
1950+ * int foo(char, char, char, char, char, struct foo_struct,
1951+ * char);
1952+ *
1953+ * the arg1-5,arg7 will be passed by regs, and arg6 will
1954+ * by stack.
1955+ */
1956+ if (nr_regs + arg_regs > 6 ) {
1957+ /* copy function arguments from origin stack frame
1958+ * into current stack frame.
1959+ *
1960+ * The starting address of the arguments on-stack
1961+ * is:
1962+ * rbp + 8(push rbp) +
1963+ * 8(return addr of origin call) +
1964+ * 8(return addr of the caller)
1965+ * which means: rbp + 24
1966+ */
1967+ for (j = 0 ; j < arg_regs ; j ++ ) {
1968+ emit_ldx (prog , BPF_DW , BPF_REG_0 , BPF_REG_FP ,
1969+ nr_stack_slots * 8 + 0x18 );
1970+ emit_stx (prog , BPF_DW , BPF_REG_FP , BPF_REG_0 ,
1971+ - stack_size );
1972+
1973+ if (!nr_stack_slots )
1974+ first_off = stack_size ;
1975+ stack_size -= 8 ;
1976+ nr_stack_slots ++ ;
1977+ }
1978+ } else {
1979+ /* Only copy the arguments on-stack to current
1980+ * 'stack_size' and ignore the regs, used to
1981+ * prepare the arguments on-stack for orign call.
1982+ */
1983+ if (for_call_origin ) {
1984+ nr_regs += arg_regs ;
1985+ continue ;
1986+ }
1987+
1988+ /* copy the arguments from regs into stack */
1989+ for (j = 0 ; j < arg_regs ; j ++ ) {
1990+ emit_stx (prog , BPF_DW , BPF_REG_FP ,
1991+ nr_regs == 5 ? X86_REG_R9 : BPF_REG_1 + nr_regs ,
1992+ - stack_size );
1993+ stack_size -= 8 ;
1994+ nr_regs ++ ;
1995+ }
1996+ }
1997+ }
1998+
1999+ clean_stack_garbage (m , prog , nr_stack_slots , first_off );
18742000}
18752001
1876- static void restore_regs (const struct btf_func_model * m , u8 * * prog , int nr_regs ,
2002+ static void restore_regs (const struct btf_func_model * m , u8 * * prog ,
18772003 int stack_size )
18782004{
1879- int i ;
2005+ int i , j , arg_regs , nr_regs = 0 ;
18802006
18812007 /* Restore function arguments from stack.
18822008 * For a function that accepts two pointers the sequence will be:
18832009 * EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10]
18842010 * EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8]
2011+ *
2012+ * The logic here is similar to what we do in save_args()
18852013 */
1886- for (i = 0 ; i < min (nr_regs , 6 ); i ++ )
1887- emit_ldx (prog , BPF_DW ,
1888- i == 5 ? X86_REG_R9 : BPF_REG_1 + i ,
1889- BPF_REG_FP ,
1890- - (stack_size - i * 8 ));
2014+ for (i = 0 ; i < min_t (int , m -> nr_args , MAX_BPF_FUNC_ARGS ); i ++ ) {
2015+ arg_regs = (m -> arg_size [i ] + 7 ) / 8 ;
2016+ if (nr_regs + arg_regs <= 6 ) {
2017+ for (j = 0 ; j < arg_regs ; j ++ ) {
2018+ emit_ldx (prog , BPF_DW ,
2019+ nr_regs == 5 ? X86_REG_R9 : BPF_REG_1 + nr_regs ,
2020+ BPF_REG_FP ,
2021+ - stack_size );
2022+ stack_size -= 8 ;
2023+ nr_regs ++ ;
2024+ }
2025+ } else {
2026+ stack_size -= 8 * arg_regs ;
2027+ }
2028+
2029+ if (nr_regs >= 6 )
2030+ break ;
2031+ }
18912032}
18922033
18932034static int invoke_bpf_prog (const struct btf_func_model * m , u8 * * pprog ,
@@ -1915,7 +2056,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
19152056 /* arg1: mov rdi, progs[i] */
19162057 emit_mov_imm64 (& prog , BPF_REG_1 , (long ) p >> 32 , (u32 ) (long ) p );
19172058 /* arg2: lea rsi, [rbp - ctx_cookie_off] */
1918- EMIT4 (0x48 , 0x8D , 0x75 , - run_ctx_off );
2059+ if (!is_imm8 (- run_ctx_off ))
2060+ EMIT3_off32 (0x48 , 0x8D , 0xB5 , - run_ctx_off );
2061+ else
2062+ EMIT4 (0x48 , 0x8D , 0x75 , - run_ctx_off );
19192063
19202064 if (emit_rsb_call (& prog , bpf_trampoline_enter (p ), prog ))
19212065 return - EINVAL ;
@@ -1931,7 +2075,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
19312075 emit_nops (& prog , 2 );
19322076
19332077 /* arg1: lea rdi, [rbp - stack_size] */
1934- EMIT4 (0x48 , 0x8D , 0x7D , - stack_size );
2078+ if (!is_imm8 (- stack_size ))
2079+ EMIT3_off32 (0x48 , 0x8D , 0xBD , - stack_size );
2080+ else
2081+ EMIT4 (0x48 , 0x8D , 0x7D , - stack_size );
19352082 /* arg2: progs[i]->insnsi for interpreter */
19362083 if (!p -> jited )
19372084 emit_mov_imm64 (& prog , BPF_REG_2 ,
@@ -1961,7 +2108,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
19612108 /* arg2: mov rsi, rbx <- start time in nsec */
19622109 emit_mov_reg (& prog , true, BPF_REG_2 , BPF_REG_6 );
19632110 /* arg3: lea rdx, [rbp - run_ctx_off] */
1964- EMIT4 (0x48 , 0x8D , 0x55 , - run_ctx_off );
2111+ if (!is_imm8 (- run_ctx_off ))
2112+ EMIT3_off32 (0x48 , 0x8D , 0x95 , - run_ctx_off );
2113+ else
2114+ EMIT4 (0x48 , 0x8D , 0x55 , - run_ctx_off );
19652115 if (emit_rsb_call (& prog , bpf_trampoline_exit (p ), prog ))
19662116 return - EINVAL ;
19672117
@@ -2113,7 +2263,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
21132263 void * func_addr )
21142264{
21152265 int i , ret , nr_regs = m -> nr_args , stack_size = 0 ;
2116- int regs_off , nregs_off , ip_off , run_ctx_off ;
2266+ int regs_off , nregs_off , ip_off , run_ctx_off , arg_stack_off , rbx_off ;
21172267 struct bpf_tramp_links * fentry = & tlinks [BPF_TRAMP_FENTRY ];
21182268 struct bpf_tramp_links * fexit = & tlinks [BPF_TRAMP_FEXIT ];
21192269 struct bpf_tramp_links * fmod_ret = & tlinks [BPF_TRAMP_MODIFY_RETURN ];
@@ -2127,8 +2277,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
21272277 if (m -> arg_flags [i ] & BTF_FMODEL_STRUCT_ARG )
21282278 nr_regs += (m -> arg_size [i ] + 7 ) / 8 - 1 ;
21292279
2130- /* x86-64 supports up to 6 arguments. 7+ can be added in the future */
2131- if (nr_regs > 6 )
2280+ /* x86-64 supports up to MAX_BPF_FUNC_ARGS arguments. 1-6
2281+ * are passed through regs, the remains are through stack.
2282+ */
2283+ if (nr_regs > MAX_BPF_FUNC_ARGS )
21322284 return - ENOTSUPP ;
21332285
21342286 /* Generated trampoline stack layout:
@@ -2147,7 +2299,14 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
21472299 *
21482300 * RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
21492301 *
2302+ * RBP - rbx_off [ rbx value ] always
2303+ *
21502304 * RBP - run_ctx_off [ bpf_tramp_run_ctx ]
2305+ *
2306+ * [ stack_argN ] BPF_TRAMP_F_CALL_ORIG
2307+ * [ ... ]
2308+ * [ stack_arg2 ]
2309+ * RBP - arg_stack_off [ stack_arg1 ]
21512310 */
21522311
21532312 /* room for return value of orig_call or fentry prog */
@@ -2167,9 +2326,26 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
21672326
21682327 ip_off = stack_size ;
21692328
2329+ stack_size += 8 ;
2330+ rbx_off = stack_size ;
2331+
21702332 stack_size += (sizeof (struct bpf_tramp_run_ctx ) + 7 ) & ~0x7 ;
21712333 run_ctx_off = stack_size ;
21722334
2335+ if (nr_regs > 6 && (flags & BPF_TRAMP_F_CALL_ORIG )) {
2336+ /* the space that used to pass arguments on-stack */
2337+ stack_size += (nr_regs - get_nr_used_regs (m )) * 8 ;
2338+ /* make sure the stack pointer is 16-byte aligned if we
2339+ * need pass arguments on stack, which means
2340+ * [stack_size + 8(rbp) + 8(rip) + 8(origin rip)]
2341+ * should be 16-byte aligned. Following code depend on
2342+ * that stack_size is already 8-byte aligned.
2343+ */
2344+ stack_size += (stack_size % 16 ) ? 0 : 8 ;
2345+ }
2346+
2347+ arg_stack_off = stack_size ;
2348+
21732349 if (flags & BPF_TRAMP_F_SKIP_FRAME ) {
21742350 /* skip patched call instruction and point orig_call to actual
21752351 * body of the kernel function.
@@ -2189,8 +2365,14 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
21892365 x86_call_depth_emit_accounting (& prog , NULL );
21902366 EMIT1 (0x55 ); /* push rbp */
21912367 EMIT3 (0x48 , 0x89 , 0xE5 ); /* mov rbp, rsp */
2192- EMIT4 (0x48 , 0x83 , 0xEC , stack_size ); /* sub rsp, stack_size */
2193- EMIT1 (0x53 ); /* push rbx */
2368+ if (!is_imm8 (stack_size ))
2369+ /* sub rsp, stack_size */
2370+ EMIT3_off32 (0x48 , 0x81 , 0xEC , stack_size );
2371+ else
2372+ /* sub rsp, stack_size */
2373+ EMIT4 (0x48 , 0x83 , 0xEC , stack_size );
2374+ /* mov QWORD PTR [rbp - rbx_off], rbx */
2375+ emit_stx (& prog , BPF_DW , BPF_REG_FP , BPF_REG_6 , - rbx_off );
21942376
21952377 /* Store number of argument registers of the traced function:
21962378 * mov rax, nr_regs
@@ -2208,7 +2390,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
22082390 emit_stx (& prog , BPF_DW , BPF_REG_FP , BPF_REG_0 , - ip_off );
22092391 }
22102392
2211- save_regs (m , & prog , nr_regs , regs_off );
2393+ save_args (m , & prog , regs_off , false );
22122394
22132395 if (flags & BPF_TRAMP_F_CALL_ORIG ) {
22142396 /* arg1: mov rdi, im */
@@ -2238,7 +2420,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
22382420 }
22392421
22402422 if (flags & BPF_TRAMP_F_CALL_ORIG ) {
2241- restore_regs (m , & prog , nr_regs , regs_off );
2423+ restore_regs (m , & prog , regs_off );
2424+ save_args (m , & prog , arg_stack_off , true);
22422425
22432426 if (flags & BPF_TRAMP_F_ORIG_STACK ) {
22442427 emit_ldx (& prog , BPF_DW , BPF_REG_0 , BPF_REG_FP , 8 );
@@ -2279,7 +2462,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
22792462 }
22802463
22812464 if (flags & BPF_TRAMP_F_RESTORE_REGS )
2282- restore_regs (m , & prog , nr_regs , regs_off );
2465+ restore_regs (m , & prog , regs_off );
22832466
22842467 /* This needs to be done regardless. If there were fmod_ret programs,
22852468 * the return value is only updated on the stack and still needs to be
@@ -2298,7 +2481,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
22982481 if (save_ret )
22992482 emit_ldx (& prog , BPF_DW , BPF_REG_0 , BPF_REG_FP , -8 );
23002483
2301- EMIT1 ( 0x5B ); /* pop rbx */
2484+ emit_ldx ( & prog , BPF_DW , BPF_REG_6 , BPF_REG_FP , - rbx_off );
23022485 EMIT1 (0xC9 ); /* leave */
23032486 if (flags & BPF_TRAMP_F_SKIP_FRAME )
23042487 /* skip our return address and return to parent */
0 commit comments