Skip to content

Commit ccbeed3

Browse files
htejunIngo Molnar
authored andcommitted
x86: make lazy %gs optional on x86_32
Impact: pt_regs changed, lazy gs handling made optional, add slight overhead to SAVE_ALL, simplifies error_code path a bit On x86_32, %gs hasn't been used by kernel and handled lazily. pt_regs doesn't have place for it and gs is saved/loaded only when necessary. In preparation for stack protector support, this patch makes lazy %gs handling optional by doing the followings. * Add CONFIG_X86_32_LAZY_GS and place for gs in pt_regs. * Save and restore %gs along with other registers in entry_32.S unless LAZY_GS. Note that this unfortunately adds "pushl $0" on SAVE_ALL even when LAZY_GS. However, it adds no overhead to common exit path and simplifies entry path with error code. * Define different user_gs accessors depending on LAZY_GS and add lazy_save_gs() and lazy_load_gs() which are noop if !LAZY_GS. The lazy_*_gs() ops are used to save, load and clear %gs lazily. * Define ELF_CORE_COPY_KERNEL_REGS() which always read %gs directly. xen and lguest changes need to be verified. Signed-off-by: Tejun Heo <[email protected]> Cc: Jeremy Fitzhardinge <[email protected]> Cc: Rusty Russell <[email protected]> Signed-off-by: Ingo Molnar <[email protected]>
1 parent d9a89a2 commit ccbeed3

File tree

11 files changed

+158
-40
lines changed

11 files changed

+158
-40
lines changed

arch/x86/Kconfig

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,10 @@ config X86_TRAMPOLINE
207207
depends on X86_SMP || (X86_VOYAGER && SMP) || (64BIT && ACPI_SLEEP)
208208
default y
209209

210+
config X86_32_LAZY_GS
211+
def_bool y
212+
depends on X86_32
213+
210214
config KTIME_SCALAR
211215
def_bool X86_32
212216
source "init/Kconfig"

arch/x86/include/asm/elf.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ extern unsigned int vdso_enabled;
112112
* now struct_user_regs, they are different)
113113
*/
114114

115-
#define ELF_CORE_COPY_REGS(pr_reg, regs) \
115+
#define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs) \
116116
do { \
117117
pr_reg[0] = regs->bx; \
118118
pr_reg[1] = regs->cx; \
@@ -124,7 +124,6 @@ do { \
124124
pr_reg[7] = regs->ds & 0xffff; \
125125
pr_reg[8] = regs->es & 0xffff; \
126126
pr_reg[9] = regs->fs & 0xffff; \
127-
pr_reg[10] = get_user_gs(regs); \
128127
pr_reg[11] = regs->orig_ax; \
129128
pr_reg[12] = regs->ip; \
130129
pr_reg[13] = regs->cs & 0xffff; \
@@ -133,6 +132,18 @@ do { \
133132
pr_reg[16] = regs->ss & 0xffff; \
134133
} while (0);
135134

135+
#define ELF_CORE_COPY_REGS(pr_reg, regs) \
136+
do { \
137+
ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
138+
pr_reg[10] = get_user_gs(regs); \
139+
} while (0);
140+
141+
#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs) \
142+
do { \
143+
ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
144+
savesegment(gs, pr_reg[10]); \
145+
} while (0);
146+
136147
#define ELF_PLATFORM (utsname()->machine)
137148
#define set_personality_64bit() do { } while (0)
138149

arch/x86/include/asm/mmu_context.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ do { \
7979
#ifdef CONFIG_X86_32
8080
#define deactivate_mm(tsk, mm) \
8181
do { \
82-
set_user_gs(task_pt_regs(tsk), 0); \
82+
lazy_load_gs(0); \
8383
} while (0)
8484
#else
8585
#define deactivate_mm(tsk, mm) \

arch/x86/include/asm/ptrace.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ struct pt_regs {
2828
int xds;
2929
int xes;
3030
int xfs;
31-
/* int gs; */
31+
int xgs;
3232
long orig_eax;
3333
long eip;
3434
int xcs;
@@ -50,7 +50,7 @@ struct pt_regs {
5050
unsigned long ds;
5151
unsigned long es;
5252
unsigned long fs;
53-
/* int gs; */
53+
unsigned long gs;
5454
unsigned long orig_ax;
5555
unsigned long ip;
5656
unsigned long cs;

arch/x86/include/asm/system.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,10 +186,20 @@ extern void native_load_gs_index(unsigned);
186186
* x86_32 user gs accessors.
187187
*/
188188
#ifdef CONFIG_X86_32
189+
#ifdef CONFIG_X86_32_LAZY_GS
189190
#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;})
190191
#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
191192
#define task_user_gs(tsk) ((tsk)->thread.gs)
192-
#endif
193+
#define lazy_save_gs(v) savesegment(gs, (v))
194+
#define lazy_load_gs(v) loadsegment(gs, (v))
195+
#else /* X86_32_LAZY_GS */
196+
#define get_user_gs(regs) (u16)((regs)->gs)
197+
#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
198+
#define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
199+
#define lazy_save_gs(v) do { } while (0)
200+
#define lazy_load_gs(v) do { } while (0)
201+
#endif /* X86_32_LAZY_GS */
202+
#endif /* X86_32 */
193203

194204
static inline unsigned long get_limit(unsigned long segment)
195205
{

arch/x86/kernel/asm-offsets_32.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ void foo(void)
7575
OFFSET(PT_DS, pt_regs, ds);
7676
OFFSET(PT_ES, pt_regs, es);
7777
OFFSET(PT_FS, pt_regs, fs);
78+
OFFSET(PT_GS, pt_regs, gs);
7879
OFFSET(PT_ORIG_EAX, pt_regs, orig_ax);
7980
OFFSET(PT_EIP, pt_regs, ip);
8081
OFFSET(PT_CS, pt_regs, cs);

arch/x86/kernel/entry_32.S

Lines changed: 113 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,13 @@
3030
* 1C(%esp) - %ds
3131
* 20(%esp) - %es
3232
* 24(%esp) - %fs
33-
* 28(%esp) - orig_eax
34-
* 2C(%esp) - %eip
35-
* 30(%esp) - %cs
36-
* 34(%esp) - %eflags
37-
* 38(%esp) - %oldesp
38-
* 3C(%esp) - %oldss
33+
* 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS
34+
* 2C(%esp) - orig_eax
35+
* 30(%esp) - %eip
36+
* 34(%esp) - %cs
37+
* 38(%esp) - %eflags
38+
* 3C(%esp) - %oldesp
39+
* 40(%esp) - %oldss
3940
*
4041
* "current" is in register %ebx during any slow entries.
4142
*/
@@ -101,8 +102,99 @@
101102
#define resume_userspace_sig resume_userspace
102103
#endif
103104

105+
/*
106+
* User gs save/restore
107+
*
108+
* %gs is used for userland TLS and kernel only uses it for stack
109+
* canary which is required to be at %gs:20 by gcc. Read the comment
110+
* at the top of stackprotector.h for more info.
111+
*
112+
* Local labels 98 and 99 are used.
113+
*/
114+
#ifdef CONFIG_X86_32_LAZY_GS
115+
116+
/* unfortunately push/pop can't be no-op */
117+
.macro PUSH_GS
118+
pushl $0
119+
CFI_ADJUST_CFA_OFFSET 4
120+
.endm
121+
.macro POP_GS pop=0
122+
addl $(4 + \pop), %esp
123+
CFI_ADJUST_CFA_OFFSET -(4 + \pop)
124+
.endm
125+
.macro POP_GS_EX
126+
.endm
127+
128+
/* all the rest are no-op */
129+
.macro PTGS_TO_GS
130+
.endm
131+
.macro PTGS_TO_GS_EX
132+
.endm
133+
.macro GS_TO_REG reg
134+
.endm
135+
.macro REG_TO_PTGS reg
136+
.endm
137+
.macro SET_KERNEL_GS reg
138+
.endm
139+
140+
#else /* CONFIG_X86_32_LAZY_GS */
141+
142+
.macro PUSH_GS
143+
pushl %gs
144+
CFI_ADJUST_CFA_OFFSET 4
145+
/*CFI_REL_OFFSET gs, 0*/
146+
.endm
147+
148+
.macro POP_GS pop=0
149+
98: popl %gs
150+
CFI_ADJUST_CFA_OFFSET -4
151+
/*CFI_RESTORE gs*/
152+
.if \pop <> 0
153+
add $\pop, %esp
154+
CFI_ADJUST_CFA_OFFSET -\pop
155+
.endif
156+
.endm
157+
.macro POP_GS_EX
158+
.pushsection .fixup, "ax"
159+
99: movl $0, (%esp)
160+
jmp 98b
161+
.section __ex_table, "a"
162+
.align 4
163+
.long 98b, 99b
164+
.popsection
165+
.endm
166+
167+
.macro PTGS_TO_GS
168+
98: mov PT_GS(%esp), %gs
169+
.endm
170+
.macro PTGS_TO_GS_EX
171+
.pushsection .fixup, "ax"
172+
99: movl $0, PT_GS(%esp)
173+
jmp 98b
174+
.section __ex_table, "a"
175+
.align 4
176+
.long 98b, 99b
177+
.popsection
178+
.endm
179+
180+
.macro GS_TO_REG reg
181+
movl %gs, \reg
182+
/*CFI_REGISTER gs, \reg*/
183+
.endm
184+
.macro REG_TO_PTGS reg
185+
movl \reg, PT_GS(%esp)
186+
/*CFI_REL_OFFSET gs, PT_GS*/
187+
.endm
188+
.macro SET_KERNEL_GS reg
189+
xorl \reg, \reg
190+
movl \reg, %gs
191+
.endm
192+
193+
#endif /* CONFIG_X86_32_LAZY_GS */
194+
104195
.macro SAVE_ALL
105196
cld
197+
PUSH_GS
106198
pushl %fs
107199
CFI_ADJUST_CFA_OFFSET 4
108200
/*CFI_REL_OFFSET fs, 0;*/
@@ -138,6 +230,7 @@
138230
movl %edx, %es
139231
movl $(__KERNEL_PERCPU), %edx
140232
movl %edx, %fs
233+
SET_KERNEL_GS %edx
141234
.endm
142235

143236
.macro RESTORE_INT_REGS
@@ -164,7 +257,7 @@
164257
CFI_RESTORE eax
165258
.endm
166259

167-
.macro RESTORE_REGS
260+
.macro RESTORE_REGS pop=0
168261
RESTORE_INT_REGS
169262
1: popl %ds
170263
CFI_ADJUST_CFA_OFFSET -4
@@ -175,6 +268,7 @@
175268
3: popl %fs
176269
CFI_ADJUST_CFA_OFFSET -4
177270
/*CFI_RESTORE fs;*/
271+
POP_GS \pop
178272
.pushsection .fixup, "ax"
179273
4: movl $0, (%esp)
180274
jmp 1b
@@ -188,6 +282,7 @@
188282
.long 2b, 5b
189283
.long 3b, 6b
190284
.popsection
285+
POP_GS_EX
191286
.endm
192287

193288
.macro RING0_INT_FRAME
@@ -368,6 +463,7 @@ sysenter_exit:
368463
xorl %ebp,%ebp
369464
TRACE_IRQS_ON
370465
1: mov PT_FS(%esp), %fs
466+
PTGS_TO_GS
371467
ENABLE_INTERRUPTS_SYSEXIT
372468

373469
#ifdef CONFIG_AUDITSYSCALL
@@ -416,6 +512,7 @@ sysexit_audit:
416512
.align 4
417513
.long 1b,2b
418514
.popsection
515+
PTGS_TO_GS_EX
419516
ENDPROC(ia32_sysenter_target)
420517

421518
# system call handler stub
@@ -458,8 +555,7 @@ restore_all:
458555
restore_nocheck:
459556
TRACE_IRQS_IRET
460557
restore_nocheck_notrace:
461-
RESTORE_REGS
462-
addl $4, %esp # skip orig_eax/error_code
558+
RESTORE_REGS 4 # skip orig_eax/error_code
463559
CFI_ADJUST_CFA_OFFSET -4
464560
irq_return:
465561
INTERRUPT_RETURN
@@ -1078,7 +1174,10 @@ ENTRY(page_fault)
10781174
CFI_ADJUST_CFA_OFFSET 4
10791175
ALIGN
10801176
error_code:
1081-
/* the function address is in %fs's slot on the stack */
1177+
/* the function address is in %gs's slot on the stack */
1178+
pushl %fs
1179+
CFI_ADJUST_CFA_OFFSET 4
1180+
/*CFI_REL_OFFSET fs, 0*/
10821181
pushl %es
10831182
CFI_ADJUST_CFA_OFFSET 4
10841183
/*CFI_REL_OFFSET es, 0*/
@@ -1107,20 +1206,15 @@ error_code:
11071206
CFI_ADJUST_CFA_OFFSET 4
11081207
CFI_REL_OFFSET ebx, 0
11091208
cld
1110-
pushl %fs
1111-
CFI_ADJUST_CFA_OFFSET 4
1112-
/*CFI_REL_OFFSET fs, 0*/
11131209
movl $(__KERNEL_PERCPU), %ecx
11141210
movl %ecx, %fs
11151211
UNWIND_ESPFIX_STACK
1116-
popl %ecx
1117-
CFI_ADJUST_CFA_OFFSET -4
1118-
/*CFI_REGISTER es, ecx*/
1119-
movl PT_FS(%esp), %edi # get the function address
1212+
GS_TO_REG %ecx
1213+
movl PT_GS(%esp), %edi # get the function address
11201214
movl PT_ORIG_EAX(%esp), %edx # get the error code
11211215
movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
1122-
mov %ecx, PT_FS(%esp)
1123-
/*CFI_REL_OFFSET fs, ES*/
1216+
REG_TO_PTGS %ecx
1217+
SET_KERNEL_GS %ecx
11241218
movl $(__USER_DS), %ecx
11251219
movl %ecx, %ds
11261220
movl %ecx, %es

arch/x86/kernel/process_32.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
539539
* used %fs or %gs (it does not today), or if the kernel is
540540
* running inside of a hypervisor layer.
541541
*/
542-
savesegment(gs, prev->gs);
542+
lazy_save_gs(prev->gs);
543543

544544
/*
545545
* Load the per-thread Thread-Local Storage descriptor.
@@ -585,7 +585,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
585585
* Restore %gs if needed (which is common)
586586
*/
587587
if (prev->gs | next->gs)
588-
loadsegment(gs, next->gs);
588+
lazy_load_gs(next->gs);
589589

590590
percpu_write(current_task, next_p);
591591

arch/x86/kernel/ptrace.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,7 @@ static inline bool invalid_selector(u16 value)
7575
static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
7676
{
7777
BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
78-
regno >>= 2;
79-
if (regno > FS)
80-
--regno;
81-
return &regs->bx + regno;
78+
return &regs->bx + (regno >> 2);
8279
}
8380

8481
static u16 get_segment_reg(struct task_struct *task, unsigned long offset)

arch/x86/lguest/boot.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
283283
/* There's one problem which normal hardware doesn't have: the Host
284284
* can't handle us removing entries we're currently using. So we clear
285285
* the GS register here: if it's needed it'll be reloaded anyway. */
286-
loadsegment(gs, 0);
286+
lazy_load_gs(0);
287287
lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0);
288288
}
289289

0 commit comments

Comments
 (0)