Skip to content

Commit a86538a

Browse files
author
Alexei Starovoitov
committed
Merge branch 'bpf-prevent-userspace-memory-access'
Puranjay Mohan says: ==================== bpf: prevent userspace memory access V5: https://lore.kernel.org/bpf/[email protected]/ Changes in V6: - Disable the verifier's instrumentation in x86-64 and update the JIT to take care of vsyscall page in addition to userspace addresses. - Update bpf_testmod to test for vsyscall addresses. V4: https://lore.kernel.org/bpf/[email protected]/ Changes in V5: - Use TASK_SIZE_MAX + PAGE_SIZE, VSYSCALL_ADDR as userspace boundary in x86-64 JIT. - Added Acked-by: Ilya Leoshkevich <[email protected]> V3: https://lore.kernel.org/bpf/[email protected]/ Changes in V4: - Disable this feature on architectures that don't define CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE. - By doing the above, we don't need anything explicitly for s390x. V2: https://lore.kernel.org/bpf/[email protected]/ Changes in V3: - Return 0 from bpf_arch_uaddress_limit() in disabled case because it returns u64. - Modify the check in verifier to no do instrumentation when uaddress_limit is 0. V1: https://lore.kernel.org/bpf/[email protected]/ Changes in V2: - Disable this feature on s390x. With BPF_PROBE_MEM, BPF allows de-referencing an untrusted pointer. To thwart invalid memory accesses, the JITs add an exception table entry for all such accesses. But in case the src_reg + offset is a userspace address, the BPF program might read that memory if the user has mapped it. x86-64 JIT already instruments the BPF_PROBE_MEM based loads with checks to skip loads from userspace addresses, but is doesn't check for vsyscall page because it falls in the kernel address space but is considered a userspace page. The second patch in this series fixes the x86-64 JIT to also skip loads from the vsyscall page. The last patch updates the bpf_testmod so this address can be checked as part of the selftests. Other architectures don't have the complexity of the vsyscall address and just need to skip loads from the userspace. To make this more scalable and robust, the verifier is updated in the first patch to instrument BPF_PROBE_MEM to skip loads from the userspace addresses. ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents 5bcf0dc + 7cd6750 commit a86538a

File tree

5 files changed

+74
-32
lines changed

5 files changed

+74
-32
lines changed

arch/x86/net/bpf_jit_comp.c

Lines changed: 31 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1807,36 +1807,41 @@ st: if (is_imm8(insn->off))
18071807
if (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
18081808
BPF_MODE(insn->code) == BPF_PROBE_MEMSX) {
18091809
/* Conservatively check that src_reg + insn->off is a kernel address:
1810-
* src_reg + insn->off >= TASK_SIZE_MAX + PAGE_SIZE
1811-
* src_reg is used as scratch for src_reg += insn->off and restored
1812-
* after emit_ldx if necessary
1810+
* src_reg + insn->off > TASK_SIZE_MAX + PAGE_SIZE
1811+
* and
1812+
* src_reg + insn->off < VSYSCALL_ADDR
18131813
*/
18141814

1815-
u64 limit = TASK_SIZE_MAX + PAGE_SIZE;
1815+
u64 limit = TASK_SIZE_MAX + PAGE_SIZE - VSYSCALL_ADDR;
18161816
u8 *end_of_jmp;
18171817

1818-
/* At end of these emitted checks, insn->off will have been added
1819-
* to src_reg, so no need to do relative load with insn->off offset
1820-
*/
1821-
insn_off = 0;
1818+
/* movabsq r10, VSYSCALL_ADDR */
1819+
emit_mov_imm64(&prog, BPF_REG_AX, (long)VSYSCALL_ADDR >> 32,
1820+
(u32)(long)VSYSCALL_ADDR);
18221821

1823-
/* movabsq r11, limit */
1824-
EMIT2(add_1mod(0x48, AUX_REG), add_1reg(0xB8, AUX_REG));
1825-
EMIT((u32)limit, 4);
1826-
EMIT(limit >> 32, 4);
1822+
/* mov src_reg, r11 */
1823+
EMIT_mov(AUX_REG, src_reg);
18271824

18281825
if (insn->off) {
1829-
/* add src_reg, insn->off */
1830-
maybe_emit_1mod(&prog, src_reg, true);
1831-
EMIT2_off32(0x81, add_1reg(0xC0, src_reg), insn->off);
1826+
/* add r11, insn->off */
1827+
maybe_emit_1mod(&prog, AUX_REG, true);
1828+
EMIT2_off32(0x81, add_1reg(0xC0, AUX_REG), insn->off);
18321829
}
18331830

1834-
/* cmp src_reg, r11 */
1835-
maybe_emit_mod(&prog, src_reg, AUX_REG, true);
1836-
EMIT2(0x39, add_2reg(0xC0, src_reg, AUX_REG));
1831+
/* sub r11, r10 */
1832+
maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true);
1833+
EMIT2(0x29, add_2reg(0xC0, AUX_REG, BPF_REG_AX));
1834+
1835+
/* movabsq r10, limit */
1836+
emit_mov_imm64(&prog, BPF_REG_AX, (long)limit >> 32,
1837+
(u32)(long)limit);
1838+
1839+
/* cmp r10, r11 */
1840+
maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true);
1841+
EMIT2(0x39, add_2reg(0xC0, AUX_REG, BPF_REG_AX));
18371842

1838-
/* if unsigned '>=', goto load */
1839-
EMIT2(X86_JAE, 0);
1843+
/* if unsigned '>', goto load */
1844+
EMIT2(X86_JA, 0);
18401845
end_of_jmp = prog;
18411846

18421847
/* xor dst_reg, dst_reg */
@@ -1862,18 +1867,6 @@ st: if (is_imm8(insn->off))
18621867
/* populate jmp_offset for JMP above */
18631868
start_of_ldx[-1] = prog - start_of_ldx;
18641869

1865-
if (insn->off && src_reg != dst_reg) {
1866-
/* sub src_reg, insn->off
1867-
* Restore src_reg after "add src_reg, insn->off" in prev
1868-
* if statement. But if src_reg == dst_reg, emit_ldx
1869-
* above already clobbered src_reg, so no need to restore.
1870-
* If add src_reg, insn->off was unnecessary, no need to
1871-
* restore either.
1872-
*/
1873-
maybe_emit_1mod(&prog, src_reg, true);
1874-
EMIT2_off32(0x81, add_1reg(0xE8, src_reg), insn->off);
1875-
}
1876-
18771870
if (!bpf_prog->aux->extable)
18781871
break;
18791872

@@ -3473,3 +3466,9 @@ bool bpf_jit_supports_ptr_xchg(void)
34733466
{
34743467
return true;
34753468
}
3469+
3470+
/* x86-64 JIT emits its own code to filter user addresses so return 0 here */
3471+
u64 bpf_arch_uaddress_limit(void)
3472+
{
3473+
return 0;
3474+
}

include/linux/filter.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -963,6 +963,7 @@ bool bpf_jit_supports_far_kfunc_call(void);
963963
bool bpf_jit_supports_exceptions(void);
964964
bool bpf_jit_supports_ptr_xchg(void);
965965
bool bpf_jit_supports_arena(void);
966+
u64 bpf_arch_uaddress_limit(void);
966967
void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie);
967968
bool bpf_helper_changes_pkt_data(void *func);
968969

kernel/bpf/core.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2942,6 +2942,15 @@ bool __weak bpf_jit_supports_arena(void)
29422942
return false;
29432943
}
29442944

2945+
u64 __weak bpf_arch_uaddress_limit(void)
2946+
{
2947+
#if defined(CONFIG_64BIT) && defined(CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE)
2948+
return TASK_SIZE;
2949+
#else
2950+
return 0;
2951+
#endif
2952+
}
2953+
29452954
/* Return TRUE if the JIT backend satisfies the following two conditions:
29462955
* 1) JIT backend supports atomic_xchg() on pointer-sized words.
29472956
* 2) Under the specific arch, the implementation of xchg() is the same

kernel/bpf/verifier.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19675,6 +19675,36 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
1967519675
goto next_insn;
1967619676
}
1967719677

19678+
/* Make it impossible to de-reference a userspace address */
19679+
if (BPF_CLASS(insn->code) == BPF_LDX &&
19680+
(BPF_MODE(insn->code) == BPF_PROBE_MEM ||
19681+
BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) {
19682+
struct bpf_insn *patch = &insn_buf[0];
19683+
u64 uaddress_limit = bpf_arch_uaddress_limit();
19684+
19685+
if (!uaddress_limit)
19686+
goto next_insn;
19687+
19688+
*patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
19689+
if (insn->off)
19690+
*patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off);
19691+
*patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32);
19692+
*patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2);
19693+
*patch++ = *insn;
19694+
*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
19695+
*patch++ = BPF_MOV64_IMM(insn->dst_reg, 0);
19696+
19697+
cnt = patch - insn_buf;
19698+
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
19699+
if (!new_prog)
19700+
return -ENOMEM;
19701+
19702+
delta += cnt - 1;
19703+
env->prog = prog = new_prog;
19704+
insn = new_prog->insnsi + i + delta;
19705+
goto next_insn;
19706+
}
19707+
1967819708
/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
1967919709
if (BPF_CLASS(insn->code) == BPF_LD &&
1968019710
(BPF_MODE(insn->code) == BPF_ABS ||

tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,9 @@ __weak noinline struct file *bpf_testmod_return_ptr(int arg)
205205
case 5: return (void *)~(1ull << 30); /* trigger extable */
206206
case 6: return &f; /* valid addr */
207207
case 7: return (void *)((long)&f | 1); /* kernel tricks */
208+
#ifdef CONFIG_X86_64
209+
case 8: return (void *)VSYSCALL_ADDR; /* vsyscall page address */
210+
#endif
208211
default: return NULL;
209212
}
210213
}

0 commit comments

Comments
 (0)