Skip to content

Commit 2722146

Browse files
Sebastian Andrzej Siewiorsuryasaimadhu
authored andcommitted
x86/fpu: Remove fpu->initialized
The struct fpu.initialized member is always set to one for user tasks and zero for kernel tasks. This avoids saving/restoring the FPU registers for kernel threads. The ->initialized = 0 case for user tasks has been removed in previous changes, for instance, by doing an explicit unconditional init at fork() time for FPU-less systems which was otherwise delayed until the emulated opcode. The context switch code (switch_fpu_prepare() + switch_fpu_finish()) can't unconditionally save/restore registers for kernel threads. Not only would it slow down the switch but also load a zeroed xcomp_bv for XSAVES. For kernel_fpu_begin() (+end) the situation is similar: EFI with runtime services uses this before alternatives_patched is true. Which means that this function is used too early and it wasn't the case before. For those two cases, use current->mm to distinguish between user and kernel thread. For kernel_fpu_begin() skip save/restore of the FPU registers. During the context switch into a kernel thread don't do anything. There is no reason to save the FPU state of a kernel thread. The reordering in __switch_to() is important because the current() pointer needs to be valid before switch_fpu_finish() is invoked so ->mm is seen of the new task instead the old one. N.B.: fpu__save() doesn't need to check ->mm because it is called by user tasks only. [ bp: Massage. ] Signed-off-by: Sebastian Andrzej Siewior <[email protected]> Signed-off-by: Borislav Petkov <[email protected]> Reviewed-by: Dave Hansen <[email protected]> Reviewed-by: Thomas Gleixner <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Aubrey Li <[email protected]> Cc: Babu Moger <[email protected]> Cc: "Chang S. Bae" <[email protected]> Cc: Dmitry Safonov <[email protected]> Cc: "H. Peter Anvin" <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Jann Horn <[email protected]> Cc: "Jason A. Donenfeld" <[email protected]> Cc: Joerg Roedel <[email protected]> Cc: kvm ML <[email protected]> Cc: Masami Hiramatsu <[email protected]> Cc: Mathieu Desnoyers <[email protected]> Cc: Nicolai Stange <[email protected]> Cc: Paolo Bonzini <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Radim Krčmář <[email protected]> Cc: Rik van Riel <[email protected]> Cc: Sergey Senozhatsky <[email protected]> Cc: Will Deacon <[email protected]> Cc: x86-ml <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 39388e8 commit 2722146

File tree

12 files changed

+53
-121
lines changed

12 files changed

+53
-121
lines changed

arch/x86/ia32/ia32_signal.c

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -216,8 +216,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
216216
size_t frame_size,
217217
void __user **fpstate)
218218
{
219-
struct fpu *fpu = &current->thread.fpu;
220-
unsigned long sp;
219+
unsigned long sp, fx_aligned, math_size;
221220

222221
/* Default to using normal stack */
223222
sp = regs->sp;
@@ -231,15 +230,11 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
231230
ksig->ka.sa.sa_restorer)
232231
sp = (unsigned long) ksig->ka.sa.sa_restorer;
233232

234-
if (fpu->initialized) {
235-
unsigned long fx_aligned, math_size;
236-
237-
sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
238-
*fpstate = (struct _fpstate_32 __user *) sp;
239-
if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned,
240-
math_size) < 0)
241-
return (void __user *) -1L;
242-
}
233+
sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
234+
*fpstate = (struct _fpstate_32 __user *) sp;
235+
if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned,
236+
math_size) < 0)
237+
return (void __user *) -1L;
243238

244239
sp -= frame_size;
245240
/* Align the stack pointer according to the i386 ABI,

arch/x86/include/asm/fpu/internal.h

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -494,20 +494,22 @@ static inline void fpregs_activate(struct fpu *fpu)
494494
*
495495
* - switch_fpu_finish() restores the new state as
496496
* necessary.
497+
*
498+
* The FPU context is only stored/restored for a user task and
499+
* ->mm is used to distinguish between kernel and user threads.
497500
*/
498501
static inline void
499502
switch_fpu_prepare(struct fpu *old_fpu, int cpu)
500503
{
501-
if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) {
504+
if (static_cpu_has(X86_FEATURE_FPU) && current->mm) {
502505
if (!copy_fpregs_to_fpstate(old_fpu))
503506
old_fpu->last_cpu = -1;
504507
else
505508
old_fpu->last_cpu = cpu;
506509

507510
/* But leave fpu_fpregs_owner_ctx! */
508511
trace_x86_fpu_regs_deactivated(old_fpu);
509-
} else
510-
old_fpu->last_cpu = -1;
512+
}
511513
}
512514

513515
/*
@@ -520,12 +522,12 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
520522
*/
521523
static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
522524
{
523-
bool preload = static_cpu_has(X86_FEATURE_FPU) &&
524-
new_fpu->initialized;
525+
if (static_cpu_has(X86_FEATURE_FPU)) {
526+
if (!fpregs_state_valid(new_fpu, cpu)) {
527+
if (current->mm)
528+
copy_kernel_to_fpregs(&new_fpu->state);
529+
}
525530

526-
if (preload) {
527-
if (!fpregs_state_valid(new_fpu, cpu))
528-
copy_kernel_to_fpregs(&new_fpu->state);
529531
fpregs_activate(new_fpu);
530532
}
531533
}

arch/x86/include/asm/fpu/types.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -293,15 +293,6 @@ struct fpu {
293293
*/
294294
unsigned int last_cpu;
295295

296-
/*
297-
* @initialized:
298-
*
299-
* This flag indicates whether this context is initialized: if the task
300-
* is not running then we can restore from this context, if the task
301-
* is running then we should save into this context.
302-
*/
303-
unsigned char initialized;
304-
305296
/*
306297
* @avx512_timestamp:
307298
*

arch/x86/include/asm/trace/fpu.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,19 @@ DECLARE_EVENT_CLASS(x86_fpu,
1313

1414
TP_STRUCT__entry(
1515
__field(struct fpu *, fpu)
16-
__field(bool, initialized)
1716
__field(u64, xfeatures)
1817
__field(u64, xcomp_bv)
1918
),
2019

2120
TP_fast_assign(
2221
__entry->fpu = fpu;
23-
__entry->initialized = fpu->initialized;
2422
if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
2523
__entry->xfeatures = fpu->state.xsave.header.xfeatures;
2624
__entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv;
2725
}
2826
),
29-
TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx",
27+
TP_printk("x86/fpu: %p xfeatures: %llx xcomp_bv: %llx",
3028
__entry->fpu,
31-
__entry->initialized,
3229
__entry->xfeatures,
3330
__entry->xcomp_bv
3431
)

arch/x86/kernel/fpu/core.c

Lines changed: 20 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ static void __kernel_fpu_begin(void)
101101

102102
kernel_fpu_disable();
103103

104-
if (fpu->initialized) {
104+
if (current->mm) {
105105
/*
106106
* Ignore return value -- we don't care if reg state
107107
* is clobbered.
@@ -116,7 +116,7 @@ static void __kernel_fpu_end(void)
116116
{
117117
struct fpu *fpu = &current->thread.fpu;
118118

119-
if (fpu->initialized)
119+
if (current->mm)
120120
copy_kernel_to_fpregs(&fpu->state);
121121

122122
kernel_fpu_enable();
@@ -147,11 +147,10 @@ void fpu__save(struct fpu *fpu)
147147

148148
preempt_disable();
149149
trace_x86_fpu_before_save(fpu);
150-
if (fpu->initialized) {
151-
if (!copy_fpregs_to_fpstate(fpu)) {
152-
copy_kernel_to_fpregs(&fpu->state);
153-
}
154-
}
150+
151+
if (!copy_fpregs_to_fpstate(fpu))
152+
copy_kernel_to_fpregs(&fpu->state);
153+
155154
trace_x86_fpu_after_save(fpu);
156155
preempt_enable();
157156
}
@@ -190,7 +189,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
190189
{
191190
dst_fpu->last_cpu = -1;
192191

193-
if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU))
192+
if (!static_cpu_has(X86_FEATURE_FPU))
194193
return 0;
195194

196195
WARN_ON_FPU(src_fpu != &current->thread.fpu);
@@ -227,14 +226,10 @@ static void fpu__initialize(struct fpu *fpu)
227226
{
228227
WARN_ON_FPU(fpu != &current->thread.fpu);
229228

230-
if (!fpu->initialized) {
231-
fpstate_init(&fpu->state);
232-
trace_x86_fpu_init_state(fpu);
229+
fpstate_init(&fpu->state);
230+
trace_x86_fpu_init_state(fpu);
233231

234-
trace_x86_fpu_activate_state(fpu);
235-
/* Safe to do for the current task: */
236-
fpu->initialized = 1;
237-
}
232+
trace_x86_fpu_activate_state(fpu);
238233
}
239234

240235
/*
@@ -247,32 +242,20 @@ static void fpu__initialize(struct fpu *fpu)
247242
*
248243
* - or it's called for stopped tasks (ptrace), in which case the
249244
* registers were already saved by the context-switch code when
250-
* the task scheduled out - we only have to initialize the registers
251-
* if they've never been initialized.
245+
* the task scheduled out.
252246
*
253247
* If the task has used the FPU before then save it.
254248
*/
255249
void fpu__prepare_read(struct fpu *fpu)
256250
{
257-
if (fpu == &current->thread.fpu) {
251+
if (fpu == &current->thread.fpu)
258252
fpu__save(fpu);
259-
} else {
260-
if (!fpu->initialized) {
261-
fpstate_init(&fpu->state);
262-
trace_x86_fpu_init_state(fpu);
263-
264-
trace_x86_fpu_activate_state(fpu);
265-
/* Safe to do for current and for stopped child tasks: */
266-
fpu->initialized = 1;
267-
}
268-
}
269253
}
270254

271255
/*
272256
* This function must be called before we write a task's fpstate.
273257
*
274-
* If the task has used the FPU before then invalidate any cached FPU registers.
275-
* If the task has not used the FPU before then initialize its fpstate.
258+
* Invalidate any cached FPU registers.
276259
*
277260
* After this function call, after registers in the fpstate are
278261
* modified and the child task has woken up, the child task will
@@ -289,17 +272,8 @@ void fpu__prepare_write(struct fpu *fpu)
289272
*/
290273
WARN_ON_FPU(fpu == &current->thread.fpu);
291274

292-
if (fpu->initialized) {
293-
/* Invalidate any cached state: */
294-
__fpu_invalidate_fpregs_state(fpu);
295-
} else {
296-
fpstate_init(&fpu->state);
297-
trace_x86_fpu_init_state(fpu);
298-
299-
trace_x86_fpu_activate_state(fpu);
300-
/* Safe to do for stopped child tasks: */
301-
fpu->initialized = 1;
302-
}
275+
/* Invalidate any cached state: */
276+
__fpu_invalidate_fpregs_state(fpu);
303277
}
304278

305279
/*
@@ -316,17 +290,13 @@ void fpu__drop(struct fpu *fpu)
316290
preempt_disable();
317291

318292
if (fpu == &current->thread.fpu) {
319-
if (fpu->initialized) {
320-
/* Ignore delayed exceptions from user space */
321-
asm volatile("1: fwait\n"
322-
"2:\n"
323-
_ASM_EXTABLE(1b, 2b));
324-
fpregs_deactivate(fpu);
325-
}
293+
/* Ignore delayed exceptions from user space */
294+
asm volatile("1: fwait\n"
295+
"2:\n"
296+
_ASM_EXTABLE(1b, 2b));
297+
fpregs_deactivate(fpu);
326298
}
327299

328-
fpu->initialized = 0;
329-
330300
trace_x86_fpu_dropped(fpu);
331301

332302
preempt_enable();

arch/x86/kernel/fpu/init.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,8 +239,6 @@ static void __init fpu__init_system_ctx_switch(void)
239239

240240
WARN_ON_FPU(!on_boot_cpu);
241241
on_boot_cpu = 0;
242-
243-
WARN_ON_FPU(current->thread.fpu.initialized);
244242
}
245243

246244
/*

arch/x86/kernel/fpu/regset.c

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,12 @@
1515
*/
1616
int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
1717
{
18-
struct fpu *target_fpu = &target->thread.fpu;
19-
20-
return target_fpu->initialized ? regset->n : 0;
18+
return regset->n;
2119
}
2220

2321
int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
2422
{
25-
struct fpu *target_fpu = &target->thread.fpu;
26-
27-
if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->initialized)
23+
if (boot_cpu_has(X86_FEATURE_FXSR))
2824
return regset->n;
2925
else
3026
return 0;
@@ -370,16 +366,9 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
370366
int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu)
371367
{
372368
struct task_struct *tsk = current;
373-
struct fpu *fpu = &tsk->thread.fpu;
374-
int fpvalid;
375-
376-
fpvalid = fpu->initialized;
377-
if (fpvalid)
378-
fpvalid = !fpregs_get(tsk, NULL,
379-
0, sizeof(struct user_i387_ia32_struct),
380-
ufpu, NULL);
381369

382-
return fpvalid;
370+
return !fpregs_get(tsk, NULL, 0, sizeof(struct user_i387_ia32_struct),
371+
ufpu, NULL);
383372
}
384373
EXPORT_SYMBOL(dump_fpu);
385374

arch/x86/kernel/fpu/xstate.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -892,8 +892,6 @@ const void *get_xsave_field_ptr(int xsave_state)
892892
{
893893
struct fpu *fpu = &current->thread.fpu;
894894

895-
if (!fpu->initialized)
896-
return NULL;
897895
/*
898896
* fpu__save() takes the CPU's xstate registers
899897
* and saves them off to the 'fpu memory buffer.

arch/x86/kernel/process_32.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -288,10 +288,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
288288
if (prev->gs | next->gs)
289289
lazy_load_gs(next->gs);
290290

291-
switch_fpu_finish(next_fpu, cpu);
292-
293291
this_cpu_write(current_task, next_p);
294292

293+
switch_fpu_finish(next_fpu, cpu);
294+
295295
/* Load the Intel cache allocation PQR MSR. */
296296
resctrl_sched_in();
297297

arch/x86/kernel/process_64.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -566,14 +566,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
566566

567567
x86_fsgsbase_load(prev, next);
568568

569-
switch_fpu_finish(next_fpu, cpu);
570-
571569
/*
572570
* Switch the PDA and FPU contexts.
573571
*/
574572
this_cpu_write(current_task, next_p);
575573
this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
576574

575+
switch_fpu_finish(next_fpu, cpu);
576+
577577
/* Reload sp0. */
578578
update_task_stack(next_p);
579579

0 commit comments

Comments
 (0)