Skip to content

Commit 8c09871

Browse files
committed
s390/fpu: limit save and restore to used registers
The first invocation of kernel_fpu_begin() after switching from user to kernel context will save all vector registers, even if only parts of the vector registers are used within the kernel fpu context. Given that save and restore of all vector registers is quite expensive change the current approach in several ways: - Instead of saving and restoring all user registers limit this to those registers which are actually used within an kernel fpu context. - On context switch save all remaining user fpu registers, so they can be restored when the task is rescheduled. - Saving user registers within kernel_fpu_begin() is done without disabling and enabling interrupts - which also slightly reduces runtime. In worst case (e.g. interrupt context uses the same registers) this may lead to the situation that registers are saved several times, however the assumption is that this will not happen frequently, so that the new method is faster in nearly all cases. - save_user_fpu_regs() can still be called from all contexts and saves all (or all remaining) user registers to a tasks ufpu user fpu save area. Overall this reduces the time required to save and restore the user fpu context for nearly all cases. Signed-off-by: Heiko Carstens <[email protected]>
1 parent 066c409 commit 8c09871

File tree

4 files changed

+128
-69
lines changed

4 files changed

+128
-69
lines changed

arch/s390/include/asm/entry-common.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,7 @@ static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
4141

4242
static __always_inline void arch_exit_to_user_mode(void)
4343
{
44-
if (test_thread_flag(TIF_FPU))
45-
__load_user_fpu_regs();
44+
load_user_fpu_regs();
4645

4746
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
4847
debug_user_asce(1);

arch/s390/include/asm/fpu.h

Lines changed: 49 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,6 @@ static inline bool cpu_has_vx(void)
5858
return likely(test_facility(129));
5959
}
6060

61-
void save_user_fpu_regs(void);
62-
void load_user_fpu_regs(void);
63-
void __load_user_fpu_regs(void);
64-
6561
enum {
6662
KERNEL_FPC_BIT = 0,
6763
KERNEL_VXR_V0V7_BIT,
@@ -83,6 +79,8 @@ enum {
8379
#define KERNEL_VXR (KERNEL_VXR_LOW | KERNEL_VXR_HIGH)
8480
#define KERNEL_FPR (KERNEL_FPC | KERNEL_VXR_LOW)
8581

82+
void load_fpu_state(struct fpu *state, int flags);
83+
void save_fpu_state(struct fpu *state, int flags);
8684
void __kernel_fpu_begin(struct kernel_fpu *state, int flags);
8785
void __kernel_fpu_end(struct kernel_fpu *state, int flags);
8886

@@ -162,26 +160,57 @@ static __always_inline void load_fp_regs_vx(__vector128 *vxrs)
162160
__load_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t));
163161
}
164162

165-
static inline void _kernel_fpu_begin(struct kernel_fpu *state, int flags)
163+
static inline void load_user_fpu_regs(void)
164+
{
165+
struct thread_struct *thread = &current->thread;
166+
167+
if (!thread->ufpu_flags)
168+
return;
169+
load_fpu_state(&thread->ufpu, thread->ufpu_flags);
170+
thread->ufpu_flags = 0;
171+
}
172+
173+
static __always_inline void __save_user_fpu_regs(struct thread_struct *thread, int flags)
166174
{
167-
state->hdr.mask = READ_ONCE(current->thread.kfpu_flags);
168-
if (!test_thread_flag(TIF_FPU)) {
169-
/* Save user space FPU state and register contents */
170-
save_user_fpu_regs();
171-
} else if (state->hdr.mask & flags) {
172-
/* Save FPU/vector register in-use by the kernel */
175+
save_fpu_state(&thread->ufpu, flags);
176+
__atomic_or(flags, &thread->ufpu_flags);
177+
}
178+
179+
static inline void save_user_fpu_regs(void)
180+
{
181+
struct thread_struct *thread = &current->thread;
182+
int mask, flags;
183+
184+
mask = __atomic_or(KERNEL_FPC | KERNEL_VXR, &thread->kfpu_flags);
185+
flags = ~READ_ONCE(thread->ufpu_flags) & (KERNEL_FPC | KERNEL_VXR);
186+
if (flags)
187+
__save_user_fpu_regs(thread, flags);
188+
barrier();
189+
WRITE_ONCE(thread->kfpu_flags, mask);
190+
}
191+
192+
static __always_inline void _kernel_fpu_begin(struct kernel_fpu *state, int flags)
193+
{
194+
struct thread_struct *thread = &current->thread;
195+
int mask, uflags;
196+
197+
mask = __atomic_or(flags, &thread->kfpu_flags);
198+
state->hdr.mask = mask;
199+
uflags = READ_ONCE(thread->ufpu_flags);
200+
if ((uflags & flags) != flags)
201+
__save_user_fpu_regs(thread, ~uflags & flags);
202+
if (mask & flags)
173203
__kernel_fpu_begin(state, flags);
174-
}
175-
__atomic_or(flags, &current->thread.kfpu_flags);
176204
}
177205

178-
static inline void _kernel_fpu_end(struct kernel_fpu *state, int flags)
206+
static __always_inline void _kernel_fpu_end(struct kernel_fpu *state, int flags)
179207
{
180-
WRITE_ONCE(current->thread.kfpu_flags, state->hdr.mask);
181-
if (state->hdr.mask & flags) {
182-
/* Restore FPU/vector register in-use by the kernel */
208+
int mask = state->hdr.mask;
209+
210+
if (mask & flags)
183211
__kernel_fpu_end(state, flags);
184-
}
212+
barrier();
213+
WRITE_ONCE(current->thread.kfpu_flags, mask);
185214
}
186215

187216
void __kernel_fpu_invalid_size(void);
@@ -222,28 +251,16 @@ static __always_inline void kernel_fpu_check_size(int flags, unsigned int size)
222251

223252
static inline void save_kernel_fpu_regs(struct thread_struct *thread)
224253
{
225-
struct fpu *state = &thread->kfpu;
226-
227254
if (!thread->kfpu_flags)
228255
return;
229-
fpu_stfpc(&state->fpc);
230-
if (likely(cpu_has_vx()))
231-
save_vx_regs(state->vxrs);
232-
else
233-
save_fp_regs_vx(state->vxrs);
256+
save_fpu_state(&thread->kfpu, thread->kfpu_flags);
234257
}
235258

236259
static inline void restore_kernel_fpu_regs(struct thread_struct *thread)
237260
{
238-
struct fpu *state = &thread->kfpu;
239-
240261
if (!thread->kfpu_flags)
241262
return;
242-
fpu_lfpc(&state->fpc);
243-
if (likely(cpu_has_vx()))
244-
load_vx_regs(state->vxrs);
245-
else
246-
load_fp_regs_vx(state->vxrs);
263+
load_fpu_state(&thread->kfpu, thread->kfpu_flags);
247264
}
248265

249266
static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)

arch/s390/include/asm/processor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ struct thread_struct {
166166
unsigned int gmap_write_flag; /* gmap fault write indication */
167167
unsigned int gmap_int_code; /* int code of last gmap fault */
168168
unsigned int gmap_pfault; /* signal of a pending guest pfault */
169+
int ufpu_flags; /* user fpu flags */
169170
int kfpu_flags; /* kernel fpu flags */
170171

171172
/* Per-thread information related to debugging */

arch/s390/kernel/fpu.c

Lines changed: 77 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -107,45 +107,87 @@ void __kernel_fpu_end(struct kernel_fpu *state, int flags)
107107
}
108108
EXPORT_SYMBOL(__kernel_fpu_end);
109109

110-
void __load_user_fpu_regs(void)
110+
void load_fpu_state(struct fpu *state, int flags)
111111
{
112-
struct fpu *state = &current->thread.ufpu;
113-
114-
fpu_lfpc_safe(&state->fpc);
115-
if (likely(cpu_has_vx()))
116-
load_vx_regs(state->vxrs);
117-
else
118-
load_fp_regs_vx(state->vxrs);
119-
clear_thread_flag(TIF_FPU);
120-
}
112+
__vector128 *vxrs = &state->vxrs[0];
113+
int mask;
121114

122-
void load_user_fpu_regs(void)
123-
{
124-
raw_local_irq_disable();
125-
__load_user_fpu_regs();
126-
raw_local_irq_enable();
115+
if (flags & KERNEL_FPC)
116+
fpu_lfpc(&state->fpc);
117+
if (!cpu_has_vx()) {
118+
if (flags & KERNEL_VXR_V0V7)
119+
load_fp_regs_vx(state->vxrs);
120+
return;
121+
}
122+
mask = flags & KERNEL_VXR;
123+
if (mask == KERNEL_VXR) {
124+
fpu_vlm(0, 15, &vxrs[0]);
125+
fpu_vlm(16, 31, &vxrs[16]);
126+
return;
127+
}
128+
if (mask == KERNEL_VXR_MID) {
129+
fpu_vlm(8, 23, &vxrs[8]);
130+
return;
131+
}
132+
mask = flags & KERNEL_VXR_LOW;
133+
if (mask) {
134+
if (mask == KERNEL_VXR_LOW)
135+
fpu_vlm(0, 15, &vxrs[0]);
136+
else if (mask == KERNEL_VXR_V0V7)
137+
fpu_vlm(0, 7, &vxrs[0]);
138+
else
139+
fpu_vlm(8, 15, &vxrs[8]);
140+
}
141+
mask = flags & KERNEL_VXR_HIGH;
142+
if (mask) {
143+
if (mask == KERNEL_VXR_HIGH)
144+
fpu_vlm(16, 31, &vxrs[16]);
145+
else if (mask == KERNEL_VXR_V16V23)
146+
fpu_vlm(16, 23, &vxrs[16]);
147+
else
148+
fpu_vlm(24, 31, &vxrs[24]);
149+
}
127150
}
128-
EXPORT_SYMBOL(load_user_fpu_regs);
129151

130-
void save_user_fpu_regs(void)
152+
void save_fpu_state(struct fpu *state, int flags)
131153
{
132-
unsigned long flags;
133-
struct fpu *state;
134-
135-
local_irq_save(flags);
136-
137-
if (test_thread_flag(TIF_FPU))
138-
goto out;
139-
140-
state = &current->thread.ufpu;
154+
__vector128 *vxrs = &state->vxrs[0];
155+
int mask;
141156

142-
fpu_stfpc(&state->fpc);
143-
if (likely(cpu_has_vx()))
144-
save_vx_regs(state->vxrs);
145-
else
146-
save_fp_regs_vx(state->vxrs);
147-
set_thread_flag(TIF_FPU);
148-
out:
149-
local_irq_restore(flags);
157+
if (flags & KERNEL_FPC)
158+
fpu_stfpc(&state->fpc);
159+
if (!cpu_has_vx()) {
160+
if (flags & KERNEL_VXR_LOW)
161+
save_fp_regs_vx(state->vxrs);
162+
return;
163+
}
164+
mask = flags & KERNEL_VXR;
165+
if (mask == KERNEL_VXR) {
166+
fpu_vstm(0, 15, &vxrs[0]);
167+
fpu_vstm(16, 31, &vxrs[16]);
168+
return;
169+
}
170+
if (mask == KERNEL_VXR_MID) {
171+
fpu_vstm(8, 23, &vxrs[8]);
172+
return;
173+
}
174+
mask = flags & KERNEL_VXR_LOW;
175+
if (mask) {
176+
if (mask == KERNEL_VXR_LOW)
177+
fpu_vstm(0, 15, &vxrs[0]);
178+
else if (mask == KERNEL_VXR_V0V7)
179+
fpu_vstm(0, 7, &vxrs[0]);
180+
else
181+
fpu_vstm(8, 15, &vxrs[8]);
182+
}
183+
mask = flags & KERNEL_VXR_HIGH;
184+
if (mask) {
185+
if (mask == KERNEL_VXR_HIGH)
186+
fpu_vstm(16, 31, &vxrs[16]);
187+
else if (mask == KERNEL_VXR_V16V23)
188+
fpu_vstm(16, 23, &vxrs[16]);
189+
else
190+
fpu_vstm(24, 31, &vxrs[24]);
191+
}
150192
}
151-
EXPORT_SYMBOL(save_user_fpu_regs);
193+
EXPORT_SYMBOL(save_fpu_state);

0 commit comments

Comments
 (0)