Skip to content

Commit b5510d9

Browse files
hbruecknerMartin Schwidefsky
authored andcommitted
s390/fpu: always enable the vector facility if it is available
If the kernel detects that the s390 hardware supports the vector facility, it is enabled by default at an early stage. To force it off, use the novx kernel parameter. Note that there is a small time window, where the vector facility is enabled before it is forced to be off. With enabling the vector facility by default, the FPU save and restore functions can be improved. They do not longer require to manage expensive control register updates to enable or disable the vector enablement control for particular processes. Signed-off-by: Hendrik Brueckner <[email protected]> Reviewed-by: Heiko Carstens <[email protected]> Signed-off-by: Martin Schwidefsky <[email protected]>
1 parent 395e6aa commit b5510d9

File tree

13 files changed

+70
-199
lines changed

13 files changed

+70
-199
lines changed

arch/s390/include/asm/ctl_reg.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,6 @@ static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
4646
__ctl_load(reg, cr, cr);
4747
}
4848

49-
void __ctl_set_vx(void);
50-
5149
void smp_ctl_set_bit(int cr, int bit);
5250
void smp_ctl_clear_bit(int cr, int bit);
5351

arch/s390/include/asm/fpu-internal.h

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,6 @@
88
#ifndef _ASM_S390_FPU_INTERNAL_H
99
#define _ASM_S390_FPU_INTERNAL_H
1010

11-
#define FPU_USE_VX 1 /* Vector extension is active */
12-
13-
#ifndef __ASSEMBLY__
14-
1511
#include <linux/errno.h>
1612
#include <linux/string.h>
1713
#include <asm/linkage.h>
@@ -20,7 +16,6 @@
2016

2117
struct fpu {
2218
__u32 fpc; /* Floating-point control */
23-
__u32 flags;
2419
union {
2520
void *regs;
2621
freg_t *fprs; /* Floating-point register save area */
@@ -30,9 +25,6 @@ struct fpu {
3025

3126
void save_fpu_regs(void);
3227

33-
#define is_vx_fpu(fpu) (!!((fpu)->flags & FPU_USE_VX))
34-
#define is_vx_task(tsk) (!!((tsk)->thread.fpu.flags & FPU_USE_VX))
35-
3628
/* VX array structure for address operand constraints in inline assemblies */
3729
struct vx_array { __vector128 _[__NUM_VXRS]; };
3830

@@ -89,7 +81,7 @@ static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
8981
static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
9082
{
9183
fpregs->pad = 0;
92-
if (is_vx_fpu(fpu))
84+
if (MACHINE_HAS_VX)
9385
convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
9486
else
9587
memcpy((freg_t *)&fpregs->fprs, fpu->fprs,
@@ -98,13 +90,11 @@ static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
9890

9991
static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
10092
{
101-
if (is_vx_fpu(fpu))
93+
if (MACHINE_HAS_VX)
10294
convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
10395
else
10496
memcpy(fpu->fprs, (freg_t *)&fpregs->fprs,
10597
sizeof(fpregs->fprs));
10698
}
10799

108-
#endif
109-
110100
#endif /* _ASM_S390_FPU_INTERNAL_H */

arch/s390/kernel/asm-offsets.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ int main(void)
2929
BLANK();
3030
DEFINE(__THREAD_ksp, offsetof(struct thread_struct, ksp));
3131
DEFINE(__THREAD_FPU_fpc, offsetof(struct thread_struct, fpu.fpc));
32-
DEFINE(__THREAD_FPU_flags, offsetof(struct thread_struct, fpu.flags));
3332
DEFINE(__THREAD_FPU_regs, offsetof(struct thread_struct, fpu.regs));
3433
DEFINE(__THREAD_per_cause, offsetof(struct thread_struct, per_event.cause));
3534
DEFINE(__THREAD_per_address, offsetof(struct thread_struct, per_event.address));

arch/s390/kernel/compat_signal.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ static int save_sigregs_ext32(struct pt_regs *regs,
249249
return -EFAULT;
250250

251251
/* Save vector registers to signal stack */
252-
if (is_vx_task(current)) {
252+
if (MACHINE_HAS_VX) {
253253
for (i = 0; i < __NUM_VXRS_LOW; i++)
254254
vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
255255
if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
@@ -277,7 +277,7 @@ static int restore_sigregs_ext32(struct pt_regs *regs,
277277
*(__u32 *)&regs->gprs[i] = gprs_high[i];
278278

279279
/* Restore vector registers from signal stack */
280-
if (is_vx_task(current)) {
280+
if (MACHINE_HAS_VX) {
281281
if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
282282
sizeof(sregs_ext->vxrs_low)) ||
283283
__copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
@@ -470,8 +470,7 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
470470
*/
471471
uc_flags = UC_GPRS_HIGH;
472472
if (MACHINE_HAS_VX) {
473-
if (is_vx_task(current))
474-
uc_flags |= UC_VXRS;
473+
uc_flags |= UC_VXRS;
475474
} else
476475
frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +
477476
sizeof(frame->uc.uc_mcontext_ext.vxrs_high);

arch/s390/kernel/early.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,9 +329,19 @@ static __init void detect_machine_facilities(void)
329329
S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
330330
if (test_facility(51))
331331
S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
332-
if (test_facility(129))
332+
if (test_facility(129)) {
333333
S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
334+
__ctl_set_bit(0, 17);
335+
}
336+
}
337+
338+
static int __init disable_vector_extension(char *str)
339+
{
340+
S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
341+
__ctl_clear_bit(0, 17);
342+
return 1;
334343
}
344+
early_param("novx", disable_vector_extension);
335345

336346
static int __init cad_setup(char *str)
337347
{

arch/s390/kernel/entry.S

Lines changed: 21 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
#include <asm/page.h>
2121
#include <asm/sigp.h>
2222
#include <asm/irq.h>
23-
#include <asm/fpu-internal.h>
2423
#include <asm/vx-insn.h>
2524

2625
__PT_R0 = __PT_GPRS
@@ -748,15 +747,12 @@ ENTRY(psw_idle)
748747
br %r14
749748
.Lpsw_idle_end:
750749

751-
/* Store floating-point controls and floating-point or vector extension
752-
* registers instead. A critical section cleanup assures that the registers
753-
* are stored even if interrupted for some other work. The register %r2
754-
* designates a struct fpu to store register contents. If the specified
755-
* structure does not contain a register save area, the register store is
756-
* omitted (see also comments in arch_dup_task_struct()).
757-
*
758-
* The CIF_FPU flag is set in any case. The CIF_FPU triggers a lazy restore
759-
* of the register contents at system call or io return.
750+
/*
751+
* Store floating-point controls and floating-point or vector register
752+
* depending whether the vector facility is available. A critical section
753+
* cleanup assures that the registers are stored even if interrupted for
754+
* some other work. The CIF_FPU flag is set to trigger a lazy restore
755+
* of the register contents at return from io or a system call.
760756
*/
761757
ENTRY(save_fpu_regs)
762758
lg %r2,__LC_CURRENT
@@ -768,7 +764,7 @@ ENTRY(save_fpu_regs)
768764
lg %r3,__THREAD_FPU_regs(%r2)
769765
ltgr %r3,%r3
770766
jz .Lsave_fpu_regs_done # no save area -> set CIF_FPU
771-
tm __THREAD_FPU_flags+3(%r2),FPU_USE_VX
767+
tm __LC_MACHINE_FLAGS+5,4 # MACHINE_HAS_VX
772768
jz .Lsave_fpu_regs_fp # no -> store FP regs
773769
.Lsave_fpu_regs_vx_low:
774770
VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3)
@@ -797,41 +793,30 @@ ENTRY(save_fpu_regs)
797793
br %r14
798794
.Lsave_fpu_regs_end:
799795

800-
/* Load floating-point controls and floating-point or vector extension
801-
* registers. A critical section cleanup assures that the register contents
802-
* are loaded even if interrupted for some other work. Depending on the saved
803-
* FP/VX state, the vector-enablement control, CR0.46, is either set or cleared.
796+
/*
797+
* Load floating-point controls and floating-point or vector registers.
798+
* A critical section cleanup assures that the register contents are
799+
* loaded even if interrupted for some other work.
804800
*
805801
* There are special calling conventions to fit into sysc and io return work:
806802
* %r15: <kernel stack>
807803
* The function requires:
808-
* %r4 and __SF_EMPTY+32(%r15)
804+
* %r4
809805
*/
810806
load_fpu_regs:
811807
lg %r4,__LC_CURRENT
812808
aghi %r4,__TASK_thread
813809
tm __LC_CPU_FLAGS+7,_CIF_FPU
814810
bnor %r14
815811
lfpc __THREAD_FPU_fpc(%r4)
816-
stctg %c0,%c0,__SF_EMPTY+32(%r15) # store CR0
817-
tm __THREAD_FPU_flags+3(%r4),FPU_USE_VX # VX-enabled task ?
812+
tm __LC_MACHINE_FLAGS+5,4 # MACHINE_HAS_VX
818813
lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area
819-
jz .Lload_fpu_regs_fp_ctl # -> no VX, load FP regs
820-
.Lload_fpu_regs_vx_ctl:
821-
tm __SF_EMPTY+32+5(%r15),2 # test VX control
822-
jo .Lload_fpu_regs_vx
823-
oi __SF_EMPTY+32+5(%r15),2 # set VX control
824-
lctlg %c0,%c0,__SF_EMPTY+32(%r15)
814+
jz .Lload_fpu_regs_fp # -> no VX, load FP regs
825815
.Lload_fpu_regs_vx:
826816
VLM %v0,%v15,0,%r4
827817
.Lload_fpu_regs_vx_high:
828818
VLM %v16,%v31,256,%r4
829819
j .Lload_fpu_regs_done
830-
.Lload_fpu_regs_fp_ctl:
831-
tm __SF_EMPTY+32+5(%r15),2 # test VX control
832-
jz .Lload_fpu_regs_fp
833-
ni __SF_EMPTY+32+5(%r15),253 # clear VX control
834-
lctlg %c0,%c0,__SF_EMPTY+32(%r15)
835820
.Lload_fpu_regs_fp:
836821
ld 0,0(%r4)
837822
ld 1,8(%r4)
@@ -854,16 +839,6 @@ load_fpu_regs:
854839
br %r14
855840
.Lload_fpu_regs_end:
856841

857-
/* Test and set the vector enablement control in CR0.46 */
858-
ENTRY(__ctl_set_vx)
859-
stctg %c0,%c0,__SF_EMPTY(%r15)
860-
tm __SF_EMPTY+5(%r15),2
861-
bor %r14
862-
oi __SF_EMPTY+5(%r15),2
863-
lctlg %c0,%c0,__SF_EMPTY(%r15)
864-
br %r14
865-
.L__ctl_set_vx_end:
866-
867842
.L__critical_end:
868843

869844
/*
@@ -1019,10 +994,6 @@ cleanup_critical:
1019994
jl 0f
1020995
clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end
1021996
jl .Lcleanup_load_fpu_regs
1022-
clg %r9,BASED(.Lcleanup_table+112) # __ctl_set_vx
1023-
jl 0f
1024-
clg %r9,BASED(.Lcleanup_table+120) # .L__ctl_set_vx_end
1025-
jl .Lcleanup___ctl_set_vx
1026997
0: br %r14
1027998

1028999
.align 8
@@ -1041,8 +1012,6 @@ cleanup_critical:
10411012
.quad .Lsave_fpu_regs_end
10421013
.quad load_fpu_regs
10431014
.quad .Lload_fpu_regs_end
1044-
.quad __ctl_set_vx
1045-
.quad .L__ctl_set_vx_end
10461015

10471016
#if IS_ENABLED(CONFIG_KVM)
10481017
.Lcleanup_table_sie:
@@ -1226,7 +1195,7 @@ cleanup_critical:
12261195
lg %r3,__THREAD_FPU_regs(%r2)
12271196
ltgr %r3,%r3
12281197
jz 5f # no save area -> set CIF_FPU
1229-
tm __THREAD_FPU_flags+3(%r2),FPU_USE_VX
1198+
tm __LC_MACHINE_FLAGS+5,4 # MACHINE_HAS_VX
12301199
jz 4f # no VX -> store FP regs
12311200
2: # Store vector registers (V0-V15)
12321201
VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3)
@@ -1272,37 +1241,21 @@ cleanup_critical:
12721241
jhe 1f
12731242
clg %r9,BASED(.Lcleanup_load_fpu_regs_fp)
12741243
jhe 2f
1275-
clg %r9,BASED(.Lcleanup_load_fpu_regs_fp_ctl)
1276-
jhe 3f
12771244
clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_high)
1278-
jhe 4f
1245+
jhe 3f
12791246
clg %r9,BASED(.Lcleanup_load_fpu_regs_vx)
1280-
jhe 5f
1281-
clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_ctl)
1282-
jhe 6f
1247+
jhe 4f
12831248
lg %r4,__LC_CURRENT
12841249
aghi %r4,__TASK_thread
12851250
lfpc __THREAD_FPU_fpc(%r4)
1286-
tm __THREAD_FPU_flags+3(%r4),FPU_USE_VX # VX-enabled task ?
1251+
tm __LC_MACHINE_FLAGS+5,4 # MACHINE_HAS_VX
12871252
lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area
1288-
jz 3f # -> no VX, load FP regs
1289-
6: # Set VX-enablement control
1290-
stctg %c0,%c0,__SF_EMPTY+32(%r15) # store CR0
1291-
tm __SF_EMPTY+32+5(%r15),2 # test VX control
1292-
jo 5f
1293-
oi __SF_EMPTY+32+5(%r15),2 # set VX control
1294-
lctlg %c0,%c0,__SF_EMPTY+32(%r15)
1295-
5: # Load V0 ..V15 registers
1253+
jz 2f # -> no VX, load FP regs
1254+
4: # Load V0 ..V15 registers
12961255
VLM %v0,%v15,0,%r4
1297-
4: # Load V16..V31 registers
1256+
3: # Load V16..V31 registers
12981257
VLM %v16,%v31,256,%r4
12991258
j 1f
1300-
3: # Clear VX-enablement control for FP
1301-
stctg %c0,%c0,__SF_EMPTY+32(%r15) # store CR0
1302-
tm __SF_EMPTY+32+5(%r15),2 # test VX control
1303-
jz 2f
1304-
ni __SF_EMPTY+32+5(%r15),253 # clear VX control
1305-
lctlg %c0,%c0,__SF_EMPTY+32(%r15)
13061259
2: # Load floating-point registers
13071260
ld 0,0(%r4)
13081261
ld 1,8(%r4)
@@ -1324,28 +1277,15 @@ cleanup_critical:
13241277
ni __LC_CPU_FLAGS+7,255-_CIF_FPU
13251278
lg %r9,48(%r11) # return from load_fpu_regs
13261279
br %r14
1327-
.Lcleanup_load_fpu_regs_vx_ctl:
1328-
.quad .Lload_fpu_regs_vx_ctl
13291280
.Lcleanup_load_fpu_regs_vx:
13301281
.quad .Lload_fpu_regs_vx
13311282
.Lcleanup_load_fpu_regs_vx_high:
13321283
.quad .Lload_fpu_regs_vx_high
1333-
.Lcleanup_load_fpu_regs_fp_ctl:
1334-
.quad .Lload_fpu_regs_fp_ctl
13351284
.Lcleanup_load_fpu_regs_fp:
13361285
.quad .Lload_fpu_regs_fp
13371286
.Lcleanup_load_fpu_regs_done:
13381287
.quad .Lload_fpu_regs_done
13391288

1340-
.Lcleanup___ctl_set_vx:
1341-
stctg %c0,%c0,__SF_EMPTY(%r15)
1342-
tm __SF_EMPTY+5(%r15),2
1343-
bor %r14
1344-
oi __SF_EMPTY+5(%r15),2
1345-
lctlg %c0,%c0,__SF_EMPTY(%r15)
1346-
lg %r9,48(%r11) # return from __ctl_set_vx
1347-
br %r14
1348-
13491289
/*
13501290
* Integer constants
13511291
*/

arch/s390/kernel/entry.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ void psw_idle(struct s390_idle_data *, unsigned long);
2121
asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
2222
asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
2323

24-
int alloc_vector_registers(struct task_struct *tsk);
25-
2624
void do_protection_exception(struct pt_regs *regs);
2725
void do_dat_exception(struct pt_regs *regs);
2826

arch/s390/kernel/process.c

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -87,31 +87,29 @@ void arch_release_task_struct(struct task_struct *tsk)
8787

8888
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
8989
{
90+
size_t fpu_regs_size;
91+
9092
*dst = *src;
9193

92-
/* Set up a new floating-point register save area */
93-
dst->thread.fpu.fpc = 0;
94-
dst->thread.fpu.flags = 0; /* Always start with VX disabled */
95-
dst->thread.fpu.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
96-
GFP_KERNEL|__GFP_REPEAT);
97-
if (!dst->thread.fpu.fprs)
94+
/*
95+
* If the vector extension is available, it is enabled for all tasks,
96+
* and, thus, the FPU register save area must be allocated accordingly.
97+
*/
98+
fpu_regs_size = MACHINE_HAS_VX ? sizeof(__vector128) * __NUM_VXRS
99+
: sizeof(freg_t) * __NUM_FPRS;
100+
dst->thread.fpu.regs = kzalloc(fpu_regs_size, GFP_KERNEL|__GFP_REPEAT);
101+
if (!dst->thread.fpu.regs)
98102
return -ENOMEM;
99103

100104
/*
101105
* Save the floating-point or vector register state of the current
102-
* task. The state is not saved for early kernel threads, for example,
103-
* the init_task, which do not have an allocated save area.
104-
* The CIF_FPU flag is set in any case to lazy clear or restore a saved
105-
* state when switching to a different task or returning to user space.
106+
* task and set the CIF_FPU flag to lazy restore the FPU register
107+
* state when returning to user space.
106108
*/
107109
save_fpu_regs();
108110
dst->thread.fpu.fpc = current->thread.fpu.fpc;
109-
if (is_vx_task(current))
110-
convert_vx_to_fp(dst->thread.fpu.fprs,
111-
current->thread.fpu.vxrs);
112-
else
113-
memcpy(dst->thread.fpu.fprs, current->thread.fpu.fprs,
114-
sizeof(freg_t) * __NUM_FPRS);
111+
memcpy(dst->thread.fpu.regs, current->thread.fpu.regs, fpu_regs_size);
112+
115113
return 0;
116114
}
117115

@@ -199,7 +197,7 @@ int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
199197
save_fpu_regs();
200198
fpregs->fpc = current->thread.fpu.fpc;
201199
fpregs->pad = 0;
202-
if (is_vx_task(current))
200+
if (MACHINE_HAS_VX)
203201
convert_vx_to_fp((freg_t *)&fpregs->fprs,
204202
current->thread.fpu.vxrs);
205203
else

0 commit comments

Comments
 (0)