Skip to content

Commit 60a5317

Browse files
htejunIngo Molnar
authored andcommitted
x86: implement x86_32 stack protector
Impact: stack protector for x86_32 Implement stack protector for x86_32. GDT entry 28 is used for it. It's set to point to stack_canary-20 and have the length of 24 bytes. CONFIG_CC_STACKPROTECTOR turns off CONFIG_X86_32_LAZY_GS and sets %gs to the stack canary segment on entry. As %gs is otherwise unused by the kernel, the canary can be anywhere. It's defined as a percpu variable. x86_32 exception handlers take register frame on stack directly as struct pt_regs. With -fstack-protector turned on, gcc copies the whole structure after the stack canary and (of course) doesn't copy back on return thus losing all changed. For now, -fno-stack-protector is added to all files which contain those functions. We definitely need something better. Signed-off-by: Tejun Heo <[email protected]> Signed-off-by: Ingo Molnar <[email protected]>
1 parent ccbeed3 commit 60a5317

File tree

12 files changed

+180
-16
lines changed

12 files changed

+180
-16
lines changed

arch/x86/Kconfig

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ config X86_TRAMPOLINE
209209

210210
config X86_32_LAZY_GS
211211
def_bool y
212-
depends on X86_32
212+
depends on X86_32 && !CC_STACKPROTECTOR
213213

214214
config KTIME_SCALAR
215215
def_bool X86_32
@@ -1356,7 +1356,6 @@ config CC_STACKPROTECTOR_ALL
13561356

13571357
config CC_STACKPROTECTOR
13581358
bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
1359-
depends on X86_64
13601359
select CC_STACKPROTECTOR_ALL
13611360
help
13621361
This option turns on the -fstack-protector GCC feature. This

arch/x86/include/asm/processor.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,11 @@ DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
396396
DECLARE_INIT_PER_CPU(irq_stack_union);
397397

398398
DECLARE_PER_CPU(char *, irq_stack_ptr);
399+
#else /* X86_64 */
400+
#ifdef CONFIG_CC_STACKPROTECTOR
401+
DECLARE_PER_CPU(unsigned long, stack_canary);
399402
#endif
403+
#endif /* X86_64 */
400404

401405
extern void print_cpu_info(struct cpuinfo_x86 *);
402406
extern unsigned int xstate_size;

arch/x86/include/asm/segment.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
*
6262
* 26 - ESPFIX small SS
6363
* 27 - per-cpu [ offset to per-cpu data area ]
64-
* 28 - unused
64+
* 28 - stack_canary-20 [ for stack protector ]
6565
* 29 - unused
6666
* 30 - unused
6767
* 31 - TSS for double fault handler
@@ -95,6 +95,13 @@
9595
#define __KERNEL_PERCPU 0
9696
#endif
9797

98+
#define GDT_ENTRY_STACK_CANARY (GDT_ENTRY_KERNEL_BASE + 16)
99+
#ifdef CONFIG_CC_STACKPROTECTOR
100+
#define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY * 8)
101+
#else
102+
#define __KERNEL_STACK_CANARY 0
103+
#endif
104+
98105
#define GDT_ENTRY_DOUBLEFAULT_TSS 31
99106

100107
/*

arch/x86/include/asm/stackprotector.h

Lines changed: 86 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,35 @@
1+
/*
2+
* GCC stack protector support.
3+
*
4+
* Stack protector works by putting predefined pattern at the start of
5+
* the stack frame and verifying that it hasn't been overwritten when
6+
* returning from the function. The pattern is called stack canary
7+
* and unfortunately gcc requires it to be at a fixed offset from %gs.
8+
* On x86_64, the offset is 40 bytes and on x86_32 20 bytes. x86_64
9+
* and x86_32 use segment registers differently and thus handles this
10+
* requirement differently.
11+
*
12+
* On x86_64, %gs is shared by percpu area and stack canary. All
13+
* percpu symbols are zero based and %gs points to the base of percpu
14+
* area. The first occupant of the percpu area is always
15+
* irq_stack_union which contains stack_canary at offset 40. Userland
16+
* %gs is always saved and restored on kernel entry and exit using
17+
* swapgs, so stack protector doesn't add any complexity there.
18+
*
19+
* On x86_32, it's slightly more complicated. As in x86_64, %gs is
20+
* used for userland TLS. Unfortunately, some processors are much
21+
* slower at loading segment registers with different value when
22+
* entering and leaving the kernel, so the kernel uses %fs for percpu
23+
* area and manages %gs lazily so that %gs is switched only when
24+
* necessary, usually during task switch.
25+
*
26+
* As gcc requires the stack canary at %gs:20, %gs can't be managed
27+
* lazily if stack protector is enabled, so the kernel saves and
28+
* restores userland %gs on kernel entry and exit. This behavior is
29+
* controlled by CONFIG_X86_32_LAZY_GS and accessors are defined in
30+
* system.h to hide the details.
31+
*/
32+
133
#ifndef _ASM_STACKPROTECTOR_H
234
#define _ASM_STACKPROTECTOR_H 1
335

@@ -6,8 +38,18 @@
638
#include <asm/tsc.h>
739
#include <asm/processor.h>
840
#include <asm/percpu.h>
41+
#include <asm/system.h>
42+
#include <asm/desc.h>
943
#include <linux/random.h>
1044

45+
/*
46+
* 24 byte read-only segment initializer for stack canary. Linker
47+
* can't handle the address bit shifting. Address will be set in
48+
* head_32 for boot CPU and setup_per_cpu_areas() for others.
49+
*/
50+
#define GDT_STACK_CANARY_INIT \
51+
[GDT_ENTRY_STACK_CANARY] = { { { 0x00000018, 0x00409000 } } },
52+
1153
/*
1254
* Initialize the stackprotector canary value.
1355
*
@@ -19,12 +61,9 @@ static __always_inline void boot_init_stack_canary(void)
1961
u64 canary;
2062
u64 tsc;
2163

22-
/*
23-
* Build time only check to make sure the stack_canary is at
24-
* offset 40 in the pda; this is a gcc ABI requirement
25-
*/
64+
#ifdef CONFIG_X86_64
2665
BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
27-
66+
#endif
2867
/*
2968
* We both use the random pool and the current TSC as a source
3069
* of randomness. The TSC only matters for very early init,
@@ -36,7 +75,49 @@ static __always_inline void boot_init_stack_canary(void)
3675
canary += tsc + (tsc << 32UL);
3776

3877
current->stack_canary = canary;
78+
#ifdef CONFIG_X86_64
3979
percpu_write(irq_stack_union.stack_canary, canary);
80+
#else
81+
percpu_write(stack_canary, canary);
82+
#endif
83+
}
84+
85+
static inline void setup_stack_canary_segment(int cpu)
86+
{
87+
#ifdef CONFIG_X86_32
88+
unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
89+
struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
90+
struct desc_struct desc;
91+
92+
desc = gdt_table[GDT_ENTRY_STACK_CANARY];
93+
desc.base0 = canary & 0xffff;
94+
desc.base1 = (canary >> 16) & 0xff;
95+
desc.base2 = (canary >> 24) & 0xff;
96+
write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S);
97+
#endif
98+
}
99+
100+
static inline void load_stack_canary_segment(void)
101+
{
102+
#ifdef CONFIG_X86_32
103+
asm("mov %0, %%gs" : : "r" (__KERNEL_STACK_CANARY) : "memory");
104+
#endif
105+
}
106+
107+
#else /* CC_STACKPROTECTOR */
108+
109+
#define GDT_STACK_CANARY_INIT
110+
111+
/* dummy boot_init_stack_canary() is defined in linux/stackprotector.h */
112+
113+
static inline void setup_stack_canary_segment(int cpu)
114+
{ }
115+
116+
static inline void load_stack_canary_segment(void)
117+
{
118+
#ifdef CONFIG_X86_32
119+
asm volatile ("mov %0, %%gs" : : "r" (0));
120+
#endif
40121
}
41122

42123
#endif /* CC_STACKPROTECTOR */

arch/x86/include/asm/system.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,22 @@ struct task_struct *__switch_to(struct task_struct *prev,
2323

2424
#ifdef CONFIG_X86_32
2525

26+
#ifdef CONFIG_CC_STACKPROTECTOR
27+
#define __switch_canary \
28+
"movl "__percpu_arg([current_task])",%%ebx\n\t" \
29+
"movl %P[task_canary](%%ebx),%%ebx\n\t" \
30+
"movl %%ebx,"__percpu_arg([stack_canary])"\n\t"
31+
#define __switch_canary_oparam \
32+
, [stack_canary] "=m" (per_cpu_var(stack_canary))
33+
#define __switch_canary_iparam \
34+
, [current_task] "m" (per_cpu_var(current_task)) \
35+
, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
36+
#else /* CC_STACKPROTECTOR */
37+
#define __switch_canary
38+
#define __switch_canary_oparam
39+
#define __switch_canary_iparam
40+
#endif /* CC_STACKPROTECTOR */
41+
2642
/*
2743
* Saving eflags is important. It switches not only IOPL between tasks,
2844
* it also protects other tasks from NT leaking through sysenter etc.
@@ -46,6 +62,7 @@ do { \
4662
"pushl %[next_ip]\n\t" /* restore EIP */ \
4763
"jmp __switch_to\n" /* regparm call */ \
4864
"1:\t" \
65+
__switch_canary \
4966
"popl %%ebp\n\t" /* restore EBP */ \
5067
"popfl\n" /* restore flags */ \
5168
\
@@ -58,6 +75,8 @@ do { \
5875
"=b" (ebx), "=c" (ecx), "=d" (edx), \
5976
"=S" (esi), "=D" (edi) \
6077
\
78+
__switch_canary_oparam \
79+
\
6180
/* input parameters: */ \
6281
: [next_sp] "m" (next->thread.sp), \
6382
[next_ip] "m" (next->thread.ip), \
@@ -66,6 +85,8 @@ do { \
6685
[prev] "a" (prev), \
6786
[next] "d" (next) \
6887
\
88+
__switch_canary_iparam \
89+
\
6990
: /* reloaded segment registers */ \
7091
"memory"); \
7192
} while (0)

arch/x86/kernel/Makefile

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,24 @@ CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
2424
CFLAGS_hpet.o := $(nostackp)
2525
CFLAGS_tsc.o := $(nostackp)
2626
CFLAGS_paravirt.o := $(nostackp)
27+
#
28+
# On x86_32, register frame is passed verbatim on stack as struct
29+
# pt_regs. gcc considers the parameter to belong to the callee and
30+
# with -fstack-protector it copies pt_regs to the callee's stack frame
31+
# to put the structure after the stack canary causing changes made by
32+
# the exception handlers to be lost. Turn off stack protector for all
33+
# files containing functions which take struct pt_regs from register
34+
# frame.
35+
#
36+
# The proper way to fix this is to teach gcc that the argument belongs
37+
# to the caller for these functions, oh well...
38+
#
39+
ifdef CONFIG_X86_32
40+
CFLAGS_process_32.o := $(nostackp)
41+
CFLAGS_vm86_32.o := $(nostackp)
42+
CFLAGS_signal.o := $(nostackp)
43+
CFLAGS_traps.o := $(nostackp)
44+
endif
2745

2846
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
2947
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o

arch/x86/kernel/cpu/common.c

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include <asm/sections.h>
4040
#include <asm/setup.h>
4141
#include <asm/hypervisor.h>
42+
#include <asm/stackprotector.h>
4243

4344
#include "cpu.h"
4445

@@ -122,6 +123,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
122123

123124
[GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
124125
[GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
126+
GDT_STACK_CANARY_INIT
125127
#endif
126128
} };
127129
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
@@ -261,6 +263,7 @@ void load_percpu_segment(int cpu)
261263
loadsegment(gs, 0);
262264
wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
263265
#endif
266+
load_stack_canary_segment();
264267
}
265268

266269
/* Current gdt points %fs at the "master" per-cpu area: after this,
@@ -946,16 +949,21 @@ unsigned long kernel_eflags;
946949
*/
947950
DEFINE_PER_CPU(struct orig_ist, orig_ist);
948951

949-
#else
952+
#else /* x86_64 */
953+
954+
#ifdef CONFIG_CC_STACKPROTECTOR
955+
DEFINE_PER_CPU(unsigned long, stack_canary);
956+
#endif
950957

951-
/* Make sure %fs is initialized properly in idle threads */
958+
/* Make sure %fs and %gs are initialized properly in idle threads */
952959
struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
953960
{
954961
memset(regs, 0, sizeof(struct pt_regs));
955962
regs->fs = __KERNEL_PERCPU;
963+
regs->gs = __KERNEL_STACK_CANARY;
956964
return regs;
957965
}
958-
#endif
966+
#endif /* x86_64 */
959967

960968
/*
961969
* cpu_init() initializes state that is per-CPU. Some data is already
@@ -1120,9 +1128,6 @@ void __cpuinit cpu_init(void)
11201128
__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
11211129
#endif
11221130

1123-
/* Clear %gs. */
1124-
asm volatile ("mov %0, %%gs" : : "r" (0));
1125-
11261131
/* Clear all 6 debug registers: */
11271132
set_debugreg(0, 0);
11281133
set_debugreg(0, 1);

arch/x86/kernel/entry_32.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@
186186
/*CFI_REL_OFFSET gs, PT_GS*/
187187
.endm
188188
.macro SET_KERNEL_GS reg
189-
xorl \reg, \reg
189+
movl $(__KERNEL_STACK_CANARY), \reg
190190
movl \reg, %gs
191191
.endm
192192

arch/x86/kernel/head_32.S

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <asm/asm-offsets.h>
2020
#include <asm/setup.h>
2121
#include <asm/processor-flags.h>
22+
#include <asm/percpu.h>
2223

2324
/* Physical address */
2425
#define pa(X) ((X) - __PAGE_OFFSET)
@@ -437,8 +438,25 @@ is386: movl $2,%ecx # set MP
437438
movl $(__KERNEL_PERCPU), %eax
438439
movl %eax,%fs # set this cpu's percpu
439440

440-
xorl %eax,%eax # Clear GS and LDT
441+
#ifdef CONFIG_CC_STACKPROTECTOR
442+
/*
443+
* The linker can't handle this by relocation. Manually set
444+
* base address in stack canary segment descriptor.
445+
*/
446+
cmpb $0,ready
447+
jne 1f
448+
movl $per_cpu__gdt_page,%eax
449+
movl $per_cpu__stack_canary,%ecx
450+
movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
451+
shrl $16, %ecx
452+
movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
453+
movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
454+
1:
455+
#endif
456+
movl $(__KERNEL_STACK_CANARY),%eax
441457
movl %eax,%gs
458+
459+
xorl %eax,%eax # Clear LDT
442460
lldt %ax
443461

444462
cld # gcc2 wants the direction flag cleared at all times

arch/x86/kernel/process_32.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
212212
regs.ds = __USER_DS;
213213
regs.es = __USER_DS;
214214
regs.fs = __KERNEL_PERCPU;
215+
regs.gs = __KERNEL_STACK_CANARY;
215216
regs.orig_ax = -1;
216217
regs.ip = (unsigned long) kernel_thread_helper;
217218
regs.cs = __KERNEL_CS | get_kernel_rpl();

0 commit comments

Comments
 (0)