Skip to content

Commit 931ab63

Browse files
author
Peter Zijlstra
committed
x86/ibt: Implement FineIBT
Implement an alternative CFI scheme that merges both the fine-grained nature of kCFI but also takes full advantage of the coarse grained hardware CFI as provided by IBT. To contrast: kCFI is a pure software CFI scheme and relies on being able to read text -- specifically the instruction *before* the target symbol, and does the hash validation *before* doing the call (otherwise control flow is compromised already). FineIBT is a software and hardware hybrid scheme; by ensuring every branch target starts with a hash validation it is possible to place the hash validation after the branch. This has several advantages: o the (hash) load is avoided; no memop; no RX requirement. o IBT WAIT-FOR-ENDBR state is a speculation stop; by placing the hash validation in the immediate instruction after the branch target there is a minimal speculation window and the whole is a viable defence against SpectreBHB. o Kees feels obliged to mention it is slightly more vulnerable when the attacker can write code. Obviously this patch relies on kCFI, but additionally it also relies on the padding from the call-depth-tracking patches. It uses this padding to place the hash-validation while the call-sites are re-written to modify the indirect target to be 16 bytes in front of the original target, thus hitting this new preamble. Notably, there is no hardware that needs call-depth-tracking (Skylake) and supports IBT (Tigerlake and onwards). Suggested-by: Joao Moreira (Intel) <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Kees Cook <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 9a479f7 commit 931ab63

File tree

11 files changed

+294
-21
lines changed

11 files changed

+294
-21
lines changed

arch/um/kernel/um_arch.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,11 @@ void apply_returns(s32 *start, s32 *end)
444444
{
445445
}
446446

447+
void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
448+
s32 *start_cfi, s32 *end_cfi)
449+
{
450+
}
451+
447452
void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
448453
{
449454
}

arch/x86/Kconfig

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2463,17 +2463,27 @@ config FUNCTION_PADDING_BYTES
24632463
default FUNCTION_PADDING_CFI if CFI_CLANG
24642464
default FUNCTION_ALIGNMENT
24652465

2466+
config CALL_PADDING
2467+
def_bool n
2468+
depends on CC_HAS_ENTRY_PADDING && OBJTOOL
2469+
select FUNCTION_ALIGNMENT_16B
2470+
2471+
config FINEIBT
2472+
def_bool y
2473+
depends on X86_KERNEL_IBT && CFI_CLANG && RETPOLINE
2474+
select CALL_PADDING
2475+
24662476
config HAVE_CALL_THUNKS
24672477
def_bool y
24682478
depends on CC_HAS_ENTRY_PADDING && RETHUNK && OBJTOOL
24692479

24702480
config CALL_THUNKS
24712481
def_bool n
2472-
select FUNCTION_ALIGNMENT_16B
2482+
select CALL_PADDING
24732483

24742484
config PREFIX_SYMBOLS
24752485
def_bool y
2476-
depends on CALL_THUNKS && !CFI_CLANG
2486+
depends on CALL_PADDING && !CFI_CLANG
24772487

24782488
menuconfig SPECULATION_MITIGATIONS
24792489
bool "Mitigations for speculative execution vulnerabilities"

arch/x86/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ ifdef CONFIG_SLS
208208
KBUILD_CFLAGS += -mharden-sls=all
209209
endif
210210

211-
ifdef CONFIG_CALL_THUNKS
211+
ifdef CONFIG_CALL_PADDING
212212
PADDING_CFLAGS := -fpatchable-function-entry=$(CONFIG_FUNCTION_PADDING_BYTES),$(CONFIG_FUNCTION_PADDING_BYTES)
213213
KBUILD_CFLAGS += $(PADDING_CFLAGS)
214214
export PADDING_CFLAGS

arch/x86/include/asm/alternative.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
7878
extern void apply_retpolines(s32 *start, s32 *end);
7979
extern void apply_returns(s32 *start, s32 *end);
8080
extern void apply_ibt_endbr(s32 *start, s32 *end);
81+
extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine,
82+
s32 *start_cfi, s32 *end_cfi);
8183

8284
struct module;
8385
struct paravirt_patch_site;

arch/x86/include/asm/linkage.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT, 0x90;
1616
#define __ALIGN_STR __stringify(__ALIGN)
1717

18-
#if defined(CONFIG_CALL_THUNKS) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
18+
#if defined(CONFIG_CALL_PADDING) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
1919
#define FUNCTION_PADDING .skip CONFIG_FUNCTION_ALIGNMENT, 0x90;
2020
#else
2121
#define FUNCTION_PADDING
@@ -57,7 +57,7 @@
5757
#endif /* __ASSEMBLY__ */
5858

5959
/*
60-
* Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_THUNKS) the
60+
* Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_PADDING) the
6161
* CFI symbol layout changes.
6262
*
6363
* Without CALL_THUNKS:
@@ -81,7 +81,7 @@
8181
* In both cases the whole thing is FUNCTION_ALIGNMENT aligned and sized.
8282
*/
8383

84-
#ifdef CONFIG_CALL_THUNKS
84+
#ifdef CONFIG_CALL_PADDING
8585
#define CFI_PRE_PADDING
8686
#define CFI_POST_PADDING .skip CONFIG_FUNCTION_PADDING_BYTES, 0x90;
8787
#else

arch/x86/kernel/alternative.c

Lines changed: 240 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
116116

117117
extern s32 __retpoline_sites[], __retpoline_sites_end[];
118118
extern s32 __return_sites[], __return_sites_end[];
119+
extern s32 __cfi_sites[], __cfi_sites_end[];
119120
extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[];
120121
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
121122
extern s32 __smp_locks[], __smp_locks_end[];
@@ -656,6 +657,28 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
656657

657658
#ifdef CONFIG_X86_KERNEL_IBT
658659

660+
static void poison_endbr(void *addr, bool warn)
661+
{
662+
u32 endbr, poison = gen_endbr_poison();
663+
664+
if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr)))
665+
return;
666+
667+
if (!is_endbr(endbr)) {
668+
WARN_ON_ONCE(warn);
669+
return;
670+
}
671+
672+
DPRINTK("ENDBR at: %pS (%px)", addr, addr);
673+
674+
/*
675+
* When we have IBT, the lack of ENDBR will trigger #CP
676+
*/
677+
DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr);
678+
DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr);
679+
text_poke_early(addr, &poison, 4);
680+
}
681+
659682
/*
660683
* Generated by: objtool --ibt
661684
*/
@@ -664,31 +687,232 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end)
664687
s32 *s;
665688

666689
for (s = start; s < end; s++) {
667-
u32 endbr, poison = gen_endbr_poison();
668690
void *addr = (void *)s + *s;
669691

670-
if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr)))
671-
continue;
692+
poison_endbr(addr, true);
693+
if (IS_ENABLED(CONFIG_FINEIBT))
694+
poison_endbr(addr - 16, false);
695+
}
696+
}
697+
698+
#else
699+
700+
void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) { }
701+
702+
#endif /* CONFIG_X86_KERNEL_IBT */
703+
704+
#ifdef CONFIG_FINEIBT
705+
/*
706+
* kCFI FineIBT
707+
*
708+
* __cfi_\func: __cfi_\func:
709+
* movl $0x12345678,%eax // 5 endbr64 // 4
710+
* nop subl $0x12345678,%r10d // 7
711+
* nop jz 1f // 2
712+
* nop ud2 // 2
713+
* nop 1: nop // 1
714+
* nop
715+
* nop
716+
* nop
717+
* nop
718+
* nop
719+
* nop
720+
* nop
721+
*
722+
*
723+
* caller: caller:
724+
* movl $(-0x12345678),%r10d // 6 movl $0x12345678,%r10d // 6
725+
* addl $-15(%r11),%r10d // 4 sub $16,%r11 // 4
726+
* je 1f // 2 nop4 // 4
727+
* ud2 // 2
728+
* 1: call __x86_indirect_thunk_r11 // 5 call *%r11; nop2; // 5
729+
*
730+
*/
731+
732+
asm( ".pushsection .rodata \n"
733+
"fineibt_preamble_start: \n"
734+
" endbr64 \n"
735+
" subl $0x12345678, %r10d \n"
736+
" je fineibt_preamble_end \n"
737+
" ud2 \n"
738+
" nop \n"
739+
"fineibt_preamble_end: \n"
740+
".popsection\n"
741+
);
742+
743+
extern u8 fineibt_preamble_start[];
744+
extern u8 fineibt_preamble_end[];
745+
746+
#define fineibt_preamble_size (fineibt_preamble_end - fineibt_preamble_start)
747+
#define fineibt_preamble_hash 7
748+
749+
asm( ".pushsection .rodata \n"
750+
"fineibt_caller_start: \n"
751+
" movl $0x12345678, %r10d \n"
752+
" sub $16, %r11 \n"
753+
ASM_NOP4
754+
"fineibt_caller_end: \n"
755+
".popsection \n"
756+
);
757+
758+
extern u8 fineibt_caller_start[];
759+
extern u8 fineibt_caller_end[];
760+
761+
#define fineibt_caller_size (fineibt_caller_end - fineibt_caller_start)
762+
#define fineibt_caller_hash 2
763+
764+
#define fineibt_caller_jmp (fineibt_caller_size - 2)
765+
766+
static u32 decode_preamble_hash(void *addr)
767+
{
768+
u8 *p = addr;
769+
770+
/* b8 78 56 34 12 mov $0x12345678,%eax */
771+
if (p[0] == 0xb8)
772+
return *(u32 *)(addr + 1);
773+
774+
return 0; /* invalid hash value */
775+
}
776+
777+
static u32 decode_caller_hash(void *addr)
778+
{
779+
u8 *p = addr;
780+
781+
/* 41 ba 78 56 34 12 mov $0x12345678,%r10d */
782+
if (p[0] == 0x41 && p[1] == 0xba)
783+
return -*(u32 *)(addr + 2);
784+
785+
/* e8 0c 78 56 34 12 jmp.d8 +12 */
786+
if (p[0] == JMP8_INSN_OPCODE && p[1] == fineibt_caller_jmp)
787+
return -*(u32 *)(addr + 2);
788+
789+
return 0; /* invalid hash value */
790+
}
791+
792+
/* .retpoline_sites */
793+
static int cfi_disable_callers(s32 *start, s32 *end)
794+
{
795+
/*
796+
* Disable kCFI by patching in a JMP.d8, this leaves the hash immediate
797+
* in tact for later usage. Also see decode_caller_hash() and
798+
* cfi_rewrite_callers().
799+
*/
800+
const u8 jmp[] = { JMP8_INSN_OPCODE, fineibt_caller_jmp };
801+
s32 *s;
672802

673-
if (WARN_ON_ONCE(!is_endbr(endbr)))
803+
for (s = start; s < end; s++) {
804+
void *addr = (void *)s + *s;
805+
u32 hash;
806+
807+
addr -= fineibt_caller_size;
808+
hash = decode_caller_hash(addr);
809+
if (!hash) /* nocfi callers */
674810
continue;
675811

676-
DPRINTK("ENDBR at: %pS (%px)", addr, addr);
812+
text_poke_early(addr, jmp, 2);
813+
}
677814

678-
/*
679-
* When we have IBT, the lack of ENDBR will trigger #CP
680-
*/
681-
DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr);
682-
DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr);
683-
text_poke_early(addr, &poison, 4);
815+
return 0;
816+
}
817+
818+
/* .cfi_sites */
819+
static int cfi_rewrite_preamble(s32 *start, s32 *end)
820+
{
821+
s32 *s;
822+
823+
for (s = start; s < end; s++) {
824+
void *addr = (void *)s + *s;
825+
u32 hash;
826+
827+
hash = decode_preamble_hash(addr);
828+
if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n",
829+
addr, addr, 5, addr))
830+
return -EINVAL;
831+
832+
text_poke_early(addr, fineibt_preamble_start, fineibt_preamble_size);
833+
WARN_ON(*(u32 *)(addr + fineibt_preamble_hash) != 0x12345678);
834+
text_poke_early(addr + fineibt_preamble_hash, &hash, 4);
684835
}
836+
837+
return 0;
838+
}
839+
840+
/* .retpoline_sites */
841+
static int cfi_rewrite_callers(s32 *start, s32 *end)
842+
{
843+
s32 *s;
844+
845+
for (s = start; s < end; s++) {
846+
void *addr = (void *)s + *s;
847+
u32 hash;
848+
849+
addr -= fineibt_caller_size;
850+
hash = decode_caller_hash(addr);
851+
if (hash) {
852+
text_poke_early(addr, fineibt_caller_start, fineibt_caller_size);
853+
WARN_ON(*(u32 *)(addr + fineibt_caller_hash) != 0x12345678);
854+
text_poke_early(addr + fineibt_caller_hash, &hash, 4);
855+
}
856+
/* rely on apply_retpolines() */
857+
}
858+
859+
return 0;
860+
}
861+
862+
static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
863+
s32 *start_cfi, s32 *end_cfi, bool builtin)
864+
{
865+
int ret;
866+
867+
if (WARN_ONCE(fineibt_preamble_size != 16,
868+
"FineIBT preamble wrong size: %ld", fineibt_preamble_size))
869+
return;
870+
871+
if (!HAS_KERNEL_IBT || !cpu_feature_enabled(X86_FEATURE_IBT))
872+
return;
873+
874+
/*
875+
* Rewrite the callers to not use the __cfi_ stubs, such that we might
876+
* rewrite them. This disables all CFI. If this succeeds but any of the
877+
* later stages fails, we're without CFI.
878+
*/
879+
ret = cfi_disable_callers(start_retpoline, end_retpoline);
880+
if (ret)
881+
goto err;
882+
883+
ret = cfi_rewrite_preamble(start_cfi, end_cfi);
884+
if (ret)
885+
goto err;
886+
887+
ret = cfi_rewrite_callers(start_retpoline, end_retpoline);
888+
if (ret)
889+
goto err;
890+
891+
if (builtin)
892+
pr_info("Using FineIBT CFI\n");
893+
894+
return;
895+
896+
err:
897+
pr_err("Something went horribly wrong trying to rewrite the CFI implementation.\n");
685898
}
686899

687900
#else
688901

689-
void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) { }
902+
static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
903+
s32 *start_cfi, s32 *end_cfi, bool builtin)
904+
{
905+
}
690906

691-
#endif /* CONFIG_X86_KERNEL_IBT */
907+
#endif
908+
909+
void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
910+
s32 *start_cfi, s32 *end_cfi)
911+
{
912+
return __apply_fineibt(start_retpoline, end_retpoline,
913+
start_cfi, end_cfi,
914+
/* .builtin = */ false);
915+
}
692916

693917
#ifdef CONFIG_SMP
694918
static void alternatives_smp_lock(const s32 *start, const s32 *end,
@@ -996,6 +1220,9 @@ void __init alternative_instructions(void)
9961220
*/
9971221
apply_paravirt(__parainstructions, __parainstructions_end);
9981222

1223+
__apply_fineibt(__retpoline_sites, __retpoline_sites_end,
1224+
__cfi_sites, __cfi_sites_end, true);
1225+
9991226
/*
10001227
* Rewrite the retpolines, must be done before alternatives since
10011228
* those can rewrite the retpoline thunks.

arch/x86/kernel/cpu/common.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,7 @@ static __always_inline void setup_cet(struct cpuinfo_x86 *c)
609609

610610
if (!ibt_selftest()) {
611611
pr_err("IBT selftest: Failed!\n");
612+
wrmsrl(MSR_IA32_S_CET, 0);
612613
setup_clear_cpu_cap(X86_FEATURE_IBT);
613614
return;
614615
}

0 commit comments

Comments
 (0)