Skip to content

Commit 7b9896c

Browse files
committed
ARM: percpu: add SMP_ON_UP support
Permit the use of the TPIDRPRW system register for carrying the per-CPU offset in generic SMP configurations that also target non-SMP capable ARMv6 cores. This uses the SMP_ON_UP code patching framework to turn all TPIDRPRW accesses into reads/writes of entry #0 in the __per_cpu_offset array. While at it, switch over some existing direct TPIDRPRW accesses in asm code to invocations of a new helper that is patched in the same way when necessary. Note that CPU_V6+SMP without SMP_ON_UP results in a kernel that does not boot on v6 CPUs without SMP extensions, so add this dependency to Kconfig as well. Acked-by: Linus Walleij <[email protected]> Acked-by: Nicolas Pitre <[email protected]> Signed-off-by: Ard Biesheuvel <[email protected]> Tested-by: Marc Zyngier <[email protected]> Tested-by: Vladimir Murzin <[email protected]> # ARMv7M
1 parent 4e918ab commit 7b9896c

File tree

6 files changed

+107
-22
lines changed

6 files changed

+107
-22
lines changed

arch/arm/include/asm/assembler.h

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -220,9 +220,7 @@ THUMB( fpreg .req r7 )
220220

221221
.macro reload_current, t1:req, t2:req
222222
#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
223-
adr_l \t1, __entry_task @ get __entry_task base address
224-
mrc p15, 0, \t2, c13, c0, 4 @ get per-CPU offset
225-
ldr \t1, [\t1, \t2] @ load variable
223+
ldr_this_cpu \t1, __entry_task, \t1, \t2
226224
mcr p15, 0, \t1, c13, c0, 3 @ store in TPIDRURO
227225
#endif
228226
.endm
@@ -312,6 +310,26 @@ THUMB( fpreg .req r7 )
312310
#define ALT_UP_B(label) b label
313311
#endif
314312

313+
/*
314+
* this_cpu_offset - load the per-CPU offset of this CPU into
315+
* register 'rd'
316+
*/
317+
.macro this_cpu_offset, rd:req
318+
#ifdef CONFIG_SMP
319+
ALT_SMP(mrc p15, 0, \rd, c13, c0, 4)
320+
#ifdef CONFIG_CPU_V6
321+
ALT_UP_B(.L1_\@)
322+
.L0_\@:
323+
.subsection 1
324+
.L1_\@: ldr_va \rd, __per_cpu_offset
325+
b .L0_\@
326+
.previous
327+
#endif
328+
#else
329+
mov \rd, #0
330+
#endif
331+
.endm
332+
315333
/*
316334
* Instruction barrier
317335
*/
@@ -648,6 +666,41 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
648666
__ldst_va str, \rn, \tmp, \sym, \cond
649667
.endm
650668

669+
/*
670+
* ldr_this_cpu_armv6 - Load a 32-bit word from the per-CPU variable 'sym',
671+
* without using a temp register. Supported in ARM mode
672+
* only.
673+
*/
674+
.macro ldr_this_cpu_armv6, rd:req, sym:req
675+
this_cpu_offset \rd
676+
.globl \sym
677+
.reloc .L0_\@, R_ARM_ALU_PC_G0_NC, \sym
678+
.reloc .L1_\@, R_ARM_ALU_PC_G1_NC, \sym
679+
.reloc .L2_\@, R_ARM_LDR_PC_G2, \sym
680+
add \rd, \rd, pc
681+
.L0_\@: sub \rd, \rd, #4
682+
.L1_\@: sub \rd, \rd, #0
683+
.L2_\@: ldr \rd, [\rd, #4]
684+
.endm
685+
686+
/*
687+
* ldr_this_cpu - Load a 32-bit word from the per-CPU variable 'sym'
688+
* into register 'rd', which may be the stack pointer,
689+
* using 't1' and 't2' as general temp registers. These
690+
* are permitted to overlap with 'rd' if != sp
691+
*/
692+
.macro ldr_this_cpu, rd:req, sym:req, t1:req, t2:req
693+
#if __LINUX_ARM_ARCH__ >= 7 || \
694+
(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) || \
695+
(defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
696+
this_cpu_offset \t1
697+
mov_l \t2, \sym
698+
ldr \rd, [\t1, \t2]
699+
#else
700+
ldr_this_cpu_armv6 \rd, \sym
701+
#endif
702+
.endm
703+
651704
/*
652705
* rev_l - byte-swap a 32-bit value
653706
*

arch/arm/include/asm/insn.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,30 @@
22
#ifndef __ASM_ARM_INSN_H
33
#define __ASM_ARM_INSN_H
44

5+
#include <linux/types.h>
6+
7+
/*
8+
* Avoid a literal load by emitting a sequence of ADD/LDR instructions with the
9+
* appropriate relocations. The combined sequence has a range of -/+ 256 MiB,
10+
* which should be sufficient for the core kernel as well as modules loaded
11+
* into the module region. (Not supported by LLD before release 14)
12+
*/
13+
#if !(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) && \
14+
!(defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
15+
#define LOAD_SYM_ARMV6(reg, sym) \
16+
" .globl " #sym " \n\t" \
17+
" .reloc 10f, R_ARM_ALU_PC_G0_NC, " #sym " \n\t" \
18+
" .reloc 11f, R_ARM_ALU_PC_G1_NC, " #sym " \n\t" \
19+
" .reloc 12f, R_ARM_LDR_PC_G2, " #sym " \n\t" \
20+
"10: sub " #reg ", pc, #8 \n\t" \
21+
"11: sub " #reg ", " #reg ", #4 \n\t" \
22+
"12: ldr " #reg ", [" #reg ", #0] \n\t"
23+
#else
24+
#define LOAD_SYM_ARMV6(reg, sym) \
25+
" ldr " #reg ", =" #sym " \n\t" \
26+
" ldr " #reg ", [" #reg "] \n\t"
27+
#endif
28+
529
static inline unsigned long
630
arm_gen_nop(void)
731
{

arch/arm/include/asm/percpu.h

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,22 @@
55
#ifndef _ASM_ARM_PERCPU_H_
66
#define _ASM_ARM_PERCPU_H_
77

8+
#include <asm/insn.h>
9+
810
register unsigned long current_stack_pointer asm ("sp");
911

1012
/*
1113
* Same as asm-generic/percpu.h, except that we store the per cpu offset
1214
* in the TPIDRPRW. TPIDRPRW only exists on V6K and V7
1315
*/
14-
#if defined(CONFIG_SMP) && !defined(CONFIG_CPU_V6)
16+
#ifdef CONFIG_SMP
1517
static inline void set_my_cpu_offset(unsigned long off)
1618
{
19+
extern unsigned int smp_on_up;
20+
21+
if (IS_ENABLED(CONFIG_CPU_V6) && !smp_on_up)
22+
return;
23+
1724
/* Set TPIDRPRW */
1825
asm volatile("mcr p15, 0, %0, c13, c0, 4" : : "r" (off) : "memory");
1926
}
@@ -27,8 +34,20 @@ static inline unsigned long __my_cpu_offset(void)
2734
* We want to allow caching the value, so avoid using volatile and
2835
* instead use a fake stack read to hazard against barrier().
2936
*/
30-
asm("mrc p15, 0, %0, c13, c0, 4" : "=r" (off)
31-
: "Q" (*(const unsigned long *)current_stack_pointer));
37+
asm("0: mrc p15, 0, %0, c13, c0, 4 \n\t"
38+
#ifdef CONFIG_CPU_V6
39+
"1: \n\t"
40+
" .subsection 1 \n\t"
41+
"2: " LOAD_SYM_ARMV6(%0, __per_cpu_offset) " \n\t"
42+
" b 1b \n\t"
43+
" .previous \n\t"
44+
" .pushsection \".alt.smp.init\", \"a\" \n\t"
45+
" .long 0b - . \n\t"
46+
" b . + (2b - 0b) \n\t"
47+
" .popsection \n\t"
48+
#endif
49+
: "=r" (off)
50+
: "Q" (*(const unsigned long *)current_stack_pointer));
3251

3352
return off;
3453
}

arch/arm/kernel/entry-armv.S

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,14 @@
3535
.macro irq_handler, from_user:req
3636
mov r0, sp
3737
#ifdef CONFIG_IRQSTACKS
38-
mov_l r2, irq_stack_ptr @ Take base address
39-
mrc p15, 0, r3, c13, c0, 4 @ Get CPU offset
4038
#ifdef CONFIG_UNWINDER_ARM
4139
mov fpreg, sp @ Preserve original SP
4240
#else
4341
mov r8, fp @ Preserve original FP
4442
mov r9, sp @ Preserve original SP
4543
#endif
46-
ldr sp, [r2, r3] @ Load SP from per-CPU var
44+
ldr_this_cpu sp, irq_stack_ptr, r2, r3
45+
4746
.if \from_user == 0
4847
UNWIND( .setfp fpreg, sp )
4948
@
@@ -876,16 +875,7 @@ __bad_stack:
876875
THUMB( bx pc )
877876
THUMB( nop )
878877
THUMB( .arm )
879-
mrc p15, 0, ip, c13, c0, 4 @ Get per-CPU offset
880-
881-
.globl overflow_stack_ptr
882-
.reloc 0f, R_ARM_ALU_PC_G0_NC, overflow_stack_ptr
883-
.reloc 1f, R_ARM_ALU_PC_G1_NC, overflow_stack_ptr
884-
.reloc 2f, R_ARM_LDR_PC_G2, overflow_stack_ptr
885-
add ip, ip, pc
886-
0: add ip, ip, #-4
887-
1: add ip, ip, #0
888-
2: ldr ip, [ip, #4]
878+
ldr_this_cpu_armv6 ip, overflow_stack_ptr
889879

890880
str sp, [ip, #-4]! @ Preserve original SP value
891881
mov sp, ip @ Switch to overflow stack

arch/arm/kernel/sleep.S

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,7 @@ ENTRY(__cpu_suspend)
7171
@ Run the suspend code from the overflow stack so we don't have to rely
7272
@ on vmalloc-to-phys conversions anywhere in the arch suspend code.
7373
@ The original SP value captured in R5 will be restored on the way out.
74-
mov_l r6, overflow_stack_ptr @ Base pointer
75-
mrc p15, 0, r7, c13, c0, 4 @ Get per-CPU offset
76-
ldr sp, [r6, r7] @ Address of this CPU's overflow stack
74+
ldr_this_cpu sp, overflow_stack_ptr, r6, r7
7775
#endif
7876
add r4, r4, #12 @ Space for pgd, virt sp, phys resume fn
7977
sub sp, sp, r4 @ allocate CPU state on stack

arch/arm/mm/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,7 @@ config CPU_V6
386386
select CPU_PABRT_V6
387387
select CPU_THUMB_CAPABLE
388388
select CPU_TLB_V6 if MMU
389+
select SMP_ON_UP if SMP
389390

390391
# ARMv6k
391392
config CPU_V6K

0 commit comments

Comments
 (0)