Skip to content

Commit 066c409

Browse files
committed
s390/fpu: decrease stack usage for some cases
The kernel_fpu structure has a quite large size of 520 bytes. In order to reduce stack footprint introduce several kernel fpu structures with different and also smaller sizes. This way every kernel fpu user must use the correct variant. A compile time check verifies that the correct variant is used. There are several users which use only 16 instead of all 32 vector registers. For those users the new kernel_fpu_16 structure with a size of only 266 bytes can be used. Signed-off-by: Heiko Carstens <[email protected]>
1 parent cad8c3a commit 066c409

File tree

8 files changed

+96
-42
lines changed

8 files changed

+96
-42
lines changed

arch/s390/crypto/chacha-glue.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ static void chacha20_crypt_s390(u32 *state, u8 *dst, const u8 *src,
2222
unsigned int nbytes, const u32 *key,
2323
u32 *counter)
2424
{
25-
DECLARE_KERNEL_FPU_ONSTACK(vxstate);
25+
DECLARE_KERNEL_FPU_ONSTACK32(vxstate);
2626

2727
kernel_fpu_begin(&vxstate, KERNEL_VXR);
2828
chacha20_vx(dst, src, nbytes, key, counter);

arch/s390/crypto/crc32-vx.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
5050
unsigned char const *data, size_t datalen) \
5151
{ \
5252
unsigned long prealign, aligned, remaining; \
53-
DECLARE_KERNEL_FPU_ONSTACK(vxstate); \
53+
DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \
5454
\
5555
if (datalen < VX_MIN_LEN + VX_ALIGN_MASK) \
5656
return ___crc32_sw(crc, data, datalen); \

arch/s390/include/asm/fpu-types.h

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,32 @@ struct fpu {
1616
__vector128 vxrs[__NUM_VXRS] __aligned(8);
1717
};
1818

19-
/* In-kernel FPU state structure */
19+
struct kernel_fpu_hdr {
20+
int mask;
21+
u32 fpc;
22+
};
23+
2024
struct kernel_fpu {
21-
int mask;
22-
u32 fpc;
23-
__vector128 vxrs[__NUM_VXRS] __aligned(8);
25+
struct kernel_fpu_hdr hdr;
26+
__vector128 vxrs[] __aligned(8);
2427
};
2528

26-
#define DECLARE_KERNEL_FPU_ONSTACK(name) \
27-
struct kernel_fpu name __uninitialized
29+
#define KERNEL_FPU_STRUCT(vxr_size) \
30+
struct kernel_fpu_##vxr_size { \
31+
struct kernel_fpu_hdr hdr; \
32+
__vector128 vxrs[vxr_size] __aligned(8); \
33+
}
34+
35+
KERNEL_FPU_STRUCT(16);
36+
KERNEL_FPU_STRUCT(32);
37+
38+
#define DECLARE_KERNEL_FPU_ONSTACK(vxr_size, name) \
39+
struct kernel_fpu_##vxr_size name __uninitialized
40+
41+
#define DECLARE_KERNEL_FPU_ONSTACK16(name) \
42+
DECLARE_KERNEL_FPU_ONSTACK(16, name)
43+
44+
#define DECLARE_KERNEL_FPU_ONSTACK32(name) \
45+
DECLARE_KERNEL_FPU_ONSTACK(32, name)
2846

2947
#endif /* _ASM_S390_FPU_TYPES_H */

arch/s390/include/asm/fpu.h

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -162,28 +162,64 @@ static __always_inline void load_fp_regs_vx(__vector128 *vxrs)
162162
__load_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t));
163163
}
164164

165-
static inline void kernel_fpu_begin(struct kernel_fpu *state, int flags)
165+
static inline void _kernel_fpu_begin(struct kernel_fpu *state, int flags)
166166
{
167-
state->mask = READ_ONCE(current->thread.kfpu_flags);
167+
state->hdr.mask = READ_ONCE(current->thread.kfpu_flags);
168168
if (!test_thread_flag(TIF_FPU)) {
169169
/* Save user space FPU state and register contents */
170170
save_user_fpu_regs();
171-
} else if (state->mask & flags) {
171+
} else if (state->hdr.mask & flags) {
172172
/* Save FPU/vector register in-use by the kernel */
173173
__kernel_fpu_begin(state, flags);
174174
}
175175
__atomic_or(flags, &current->thread.kfpu_flags);
176176
}
177177

178-
static inline void kernel_fpu_end(struct kernel_fpu *state, int flags)
178+
static inline void _kernel_fpu_end(struct kernel_fpu *state, int flags)
179179
{
180-
WRITE_ONCE(current->thread.kfpu_flags, state->mask);
181-
if (state->mask & flags) {
180+
WRITE_ONCE(current->thread.kfpu_flags, state->hdr.mask);
181+
if (state->hdr.mask & flags) {
182182
/* Restore FPU/vector register in-use by the kernel */
183183
__kernel_fpu_end(state, flags);
184184
}
185185
}
186186

187+
void __kernel_fpu_invalid_size(void);
188+
189+
static __always_inline void kernel_fpu_check_size(int flags, unsigned int size)
190+
{
191+
unsigned int cnt = 0;
192+
193+
if (flags & KERNEL_VXR_V0V7)
194+
cnt += 8;
195+
if (flags & KERNEL_VXR_V8V15)
196+
cnt += 8;
197+
if (flags & KERNEL_VXR_V16V23)
198+
cnt += 8;
199+
if (flags & KERNEL_VXR_V24V31)
200+
cnt += 8;
201+
if (cnt != size)
202+
__kernel_fpu_invalid_size();
203+
}
204+
205+
#define kernel_fpu_begin(state, flags) \
206+
{ \
207+
typeof(state) s = (state); \
208+
int _flags = (flags); \
209+
\
210+
kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs)); \
211+
_kernel_fpu_begin((struct kernel_fpu *)s, _flags); \
212+
}
213+
214+
#define kernel_fpu_end(state, flags) \
215+
{ \
216+
typeof(state) s = (state); \
217+
int _flags = (flags); \
218+
\
219+
kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs)); \
220+
_kernel_fpu_end((struct kernel_fpu *)s, _flags); \
221+
}
222+
187223
static inline void save_kernel_fpu_regs(struct thread_struct *thread)
188224
{
189225
struct fpu *state = &thread->kfpu;

arch/s390/kernel/fpu.c

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,41 +19,41 @@ void __kernel_fpu_begin(struct kernel_fpu *state, int flags)
1919
* Limit the save to the FPU/vector registers already
2020
* in use by the previous context.
2121
*/
22-
flags &= state->mask;
22+
flags &= state->hdr.mask;
2323
if (flags & KERNEL_FPC)
24-
fpu_stfpc(&state->fpc);
24+
fpu_stfpc(&state->hdr.fpc);
2525
if (!cpu_has_vx()) {
2626
if (flags & KERNEL_VXR_LOW)
27-
save_fp_regs_vx(state->vxrs);
27+
save_fp_regs_vx(vxrs);
2828
return;
2929
}
3030
mask = flags & KERNEL_VXR;
3131
if (mask == KERNEL_VXR) {
32-
fpu_vstm(0, 15, &vxrs[0]);
33-
fpu_vstm(16, 31, &vxrs[16]);
32+
vxrs += fpu_vstm(0, 15, vxrs);
33+
vxrs += fpu_vstm(16, 31, vxrs);
3434
return;
3535
}
3636
if (mask == KERNEL_VXR_MID) {
37-
fpu_vstm(8, 23, &vxrs[8]);
37+
vxrs += fpu_vstm(8, 23, vxrs);
3838
return;
3939
}
4040
mask = flags & KERNEL_VXR_LOW;
4141
if (mask) {
4242
if (mask == KERNEL_VXR_LOW)
43-
fpu_vstm(0, 15, &vxrs[0]);
43+
vxrs += fpu_vstm(0, 15, vxrs);
4444
else if (mask == KERNEL_VXR_V0V7)
45-
fpu_vstm(0, 7, &vxrs[0]);
45+
vxrs += fpu_vstm(0, 7, vxrs);
4646
else
47-
fpu_vstm(8, 15, &vxrs[8]);
47+
vxrs += fpu_vstm(8, 15, vxrs);
4848
}
4949
mask = flags & KERNEL_VXR_HIGH;
5050
if (mask) {
5151
if (mask == KERNEL_VXR_HIGH)
52-
fpu_vstm(16, 31, &vxrs[16]);
52+
vxrs += fpu_vstm(16, 31, vxrs);
5353
else if (mask == KERNEL_VXR_V16V23)
54-
fpu_vstm(16, 23, &vxrs[16]);
54+
vxrs += fpu_vstm(16, 23, vxrs);
5555
else
56-
fpu_vstm(24, 31, &vxrs[24]);
56+
vxrs += fpu_vstm(24, 31, vxrs);
5757
}
5858
}
5959
EXPORT_SYMBOL(__kernel_fpu_begin);
@@ -68,41 +68,41 @@ void __kernel_fpu_end(struct kernel_fpu *state, int flags)
6868
* previous context that have been overwritten by the
6969
* current context.
7070
*/
71-
flags &= state->mask;
71+
flags &= state->hdr.mask;
7272
if (flags & KERNEL_FPC)
73-
fpu_lfpc(&state->fpc);
73+
fpu_lfpc(&state->hdr.fpc);
7474
if (!cpu_has_vx()) {
7575
if (flags & KERNEL_VXR_LOW)
76-
load_fp_regs_vx(state->vxrs);
76+
load_fp_regs_vx(vxrs);
7777
return;
7878
}
7979
mask = flags & KERNEL_VXR;
8080
if (mask == KERNEL_VXR) {
81-
fpu_vlm(0, 15, &vxrs[0]);
82-
fpu_vlm(16, 31, &vxrs[16]);
81+
vxrs += fpu_vlm(0, 15, vxrs);
82+
vxrs += fpu_vlm(16, 31, vxrs);
8383
return;
8484
}
8585
if (mask == KERNEL_VXR_MID) {
86-
fpu_vlm(8, 23, &vxrs[8]);
86+
vxrs += fpu_vlm(8, 23, vxrs);
8787
return;
8888
}
8989
mask = flags & KERNEL_VXR_LOW;
9090
if (mask) {
9191
if (mask == KERNEL_VXR_LOW)
92-
fpu_vlm(0, 15, &vxrs[0]);
92+
vxrs += fpu_vlm(0, 15, vxrs);
9393
else if (mask == KERNEL_VXR_V0V7)
94-
fpu_vlm(0, 7, &vxrs[0]);
94+
vxrs += fpu_vlm(0, 7, vxrs);
9595
else
96-
fpu_vlm(8, 15, &vxrs[8]);
96+
vxrs += fpu_vlm(8, 15, vxrs);
9797
}
9898
mask = flags & KERNEL_VXR_HIGH;
9999
if (mask) {
100100
if (mask == KERNEL_VXR_HIGH)
101-
fpu_vlm(16, 31, &vxrs[16]);
101+
vxrs += fpu_vlm(16, 31, vxrs);
102102
else if (mask == KERNEL_VXR_V16V23)
103-
fpu_vlm(16, 23, &vxrs[16]);
103+
vxrs += fpu_vlm(16, 23, vxrs);
104104
else
105-
fpu_vlm(24, 31, &vxrs[24]);
105+
vxrs += fpu_vlm(24, 31, vxrs);
106106
}
107107
}
108108
EXPORT_SYMBOL(__kernel_fpu_end);

arch/s390/kernel/sysinfo.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,7 @@ subsys_initcall(create_proc_service_level);
426426
*/
427427
void s390_adjust_jiffies(void)
428428
{
429-
DECLARE_KERNEL_FPU_ONSTACK(fpu);
429+
DECLARE_KERNEL_FPU_ONSTACK16(fpu);
430430
struct sysinfo_1_2_2 *info;
431431
unsigned long capability;
432432

arch/s390/kvm/kvm-s390.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5026,7 +5026,7 @@ static void store_regs(struct kvm_vcpu *vcpu)
50265026
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
50275027
{
50285028
struct kvm_run *kvm_run = vcpu->run;
5029-
DECLARE_KERNEL_FPU_ONSTACK(fpu);
5029+
DECLARE_KERNEL_FPU_ONSTACK32(fpu);
50305030
int rc;
50315031

50325032
/*

lib/raid6/s390vx.uc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ static inline void COPY_VEC(int x, int y)
8080

8181
static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
8282
{
83-
DECLARE_KERNEL_FPU_ONSTACK(vxstate);
83+
DECLARE_KERNEL_FPU_ONSTACK32(vxstate);
8484
u8 **dptr, *p, *q;
8585
int d, z, z0;
8686

@@ -113,7 +113,7 @@ static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
113113
static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop,
114114
size_t bytes, void **ptrs)
115115
{
116-
DECLARE_KERNEL_FPU_ONSTACK(vxstate);
116+
DECLARE_KERNEL_FPU_ONSTACK32(vxstate);
117117
u8 **dptr, *p, *q;
118118
int d, z, z0;
119119

0 commit comments

Comments
 (0)