Skip to content

Commit 1e02ce4

Browse files
amlutoIngo Molnar
authored andcommitted
x86: Store a per-cpu shadow copy of CR4
Context switches and TLB flushes can change individual bits of CR4. CR4 reads take several cycles, so store a shadow copy of CR4 in a per-cpu variable. To avoid wasting a cache line, I added the CR4 shadow to cpu_tlbstate, which is already touched in switch_mm. The heaviest users of the cr4 shadow will be switch_mm and __switch_to_xtra, and __switch_to_xtra is called shortly after switch_mm during context switch, so the cacheline is likely to be hot. Signed-off-by: Andy Lutomirski <[email protected]> Reviewed-by: Thomas Gleixner <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Cc: Kees Cook <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Vince Weaver <[email protected]> Cc: "hillf.zj" <[email protected]> Cc: Valdis Kletnieks <[email protected]> Cc: Paul Mackerras <[email protected]> Cc: Arnaldo Carvalho de Melo <[email protected]> Cc: Linus Torvalds <[email protected]> Link: http://lkml.kernel.org/r/3a54dd3353fffbf84804398e00dfdc5b7c1afd7d.1414190806.git.luto@amacapital.net Signed-off-by: Ingo Molnar <[email protected]>
1 parent 375074c commit 1e02ce4

File tree

20 files changed

+85
-46
lines changed

20 files changed

+85
-46
lines changed

arch/x86/include/asm/paravirt.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,16 +80,16 @@ static inline void write_cr3(unsigned long x)
8080
PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
8181
}
8282

83-
static inline unsigned long read_cr4(void)
83+
static inline unsigned long __read_cr4(void)
8484
{
8585
return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
8686
}
87-
static inline unsigned long read_cr4_safe(void)
87+
static inline unsigned long __read_cr4_safe(void)
8888
{
8989
return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
9090
}
9191

92-
static inline void write_cr4(unsigned long x)
92+
static inline void __write_cr4(unsigned long x)
9393
{
9494
PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
9595
}

arch/x86/include/asm/special_insns.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,17 +137,17 @@ static inline void write_cr3(unsigned long x)
137137
native_write_cr3(x);
138138
}
139139

140-
static inline unsigned long read_cr4(void)
140+
static inline unsigned long __read_cr4(void)
141141
{
142142
return native_read_cr4();
143143
}
144144

145-
static inline unsigned long read_cr4_safe(void)
145+
static inline unsigned long __read_cr4_safe(void)
146146
{
147147
return native_read_cr4_safe();
148148
}
149149

150-
static inline void write_cr4(unsigned long x)
150+
static inline void __write_cr4(unsigned long x)
151151
{
152152
native_write_cr4(x);
153153
}

arch/x86/include/asm/tlbflush.h

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,24 +15,56 @@
1515
#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
1616
#endif
1717

18+
struct tlb_state {
19+
#ifdef CONFIG_SMP
20+
struct mm_struct *active_mm;
21+
int state;
22+
#endif
23+
24+
/*
25+
* Access to this CR4 shadow and to H/W CR4 is protected by
26+
* disabling interrupts when modifying either one.
27+
*/
28+
unsigned long cr4;
29+
};
30+
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
31+
32+
/* Initialize cr4 shadow for this CPU. */
33+
static inline void cr4_init_shadow(void)
34+
{
35+
this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
36+
}
37+
1838
/* Set in this cpu's CR4. */
1939
static inline void cr4_set_bits(unsigned long mask)
2040
{
2141
unsigned long cr4;
2242

23-
cr4 = read_cr4();
24-
cr4 |= mask;
25-
write_cr4(cr4);
43+
cr4 = this_cpu_read(cpu_tlbstate.cr4);
44+
if ((cr4 | mask) != cr4) {
45+
cr4 |= mask;
46+
this_cpu_write(cpu_tlbstate.cr4, cr4);
47+
__write_cr4(cr4);
48+
}
2649
}
2750

2851
/* Clear in this cpu's CR4. */
2952
static inline void cr4_clear_bits(unsigned long mask)
3053
{
3154
unsigned long cr4;
3255

33-
cr4 = read_cr4();
34-
cr4 &= ~mask;
35-
write_cr4(cr4);
56+
cr4 = this_cpu_read(cpu_tlbstate.cr4);
57+
if ((cr4 & ~mask) != cr4) {
58+
cr4 &= ~mask;
59+
this_cpu_write(cpu_tlbstate.cr4, cr4);
60+
__write_cr4(cr4);
61+
}
62+
}
63+
64+
/* Read the CR4 shadow. */
65+
static inline unsigned long cr4_read_shadow(void)
66+
{
67+
return this_cpu_read(cpu_tlbstate.cr4);
3668
}
3769

3870
/*
@@ -61,7 +93,7 @@ static inline void __native_flush_tlb_global_irq_disabled(void)
6193
{
6294
unsigned long cr4;
6395

64-
cr4 = native_read_cr4();
96+
cr4 = this_cpu_read(cpu_tlbstate.cr4);
6597
/* clear PGE */
6698
native_write_cr4(cr4 & ~X86_CR4_PGE);
6799
/* write old PGE again and flush TLBs */
@@ -221,12 +253,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
221253
#define TLBSTATE_OK 1
222254
#define TLBSTATE_LAZY 2
223255

224-
struct tlb_state {
225-
struct mm_struct *active_mm;
226-
int state;
227-
};
228-
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
229-
230256
static inline void reset_lazy_tlbstate(void)
231257
{
232258
this_cpu_write(cpu_tlbstate.state, 0);

arch/x86/include/asm/virtext.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ static inline void cpu_vmxoff(void)
4646

4747
static inline int cpu_vmx_enabled(void)
4848
{
49-
return read_cr4() & X86_CR4_VMXE;
49+
return __read_cr4() & X86_CR4_VMXE;
5050
}
5151

5252
/** Disable VMX if it is enabled on the current CPU

arch/x86/kernel/acpi/sleep.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ int x86_acpi_suspend_lowlevel(void)
7878

7979
header->pmode_cr0 = read_cr0();
8080
if (__this_cpu_read(cpu_info.cpuid_level) >= 0) {
81-
header->pmode_cr4 = read_cr4();
81+
header->pmode_cr4 = __read_cr4();
8282
header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_CR4);
8383
}
8484
if (!rdmsr_safe(MSR_IA32_MISC_ENABLE,

arch/x86/kernel/cpu/common.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <asm/archrandom.h>
2020
#include <asm/hypervisor.h>
2121
#include <asm/processor.h>
22+
#include <asm/tlbflush.h>
2223
#include <asm/debugreg.h>
2324
#include <asm/sections.h>
2425
#include <asm/vsyscall.h>
@@ -1293,6 +1294,12 @@ void cpu_init(void)
12931294

12941295
wait_for_master_cpu(cpu);
12951296

1297+
/*
1298+
* Initialize the CR4 shadow before doing anything that could
1299+
* try to read it.
1300+
*/
1301+
cr4_init_shadow();
1302+
12961303
/*
12971304
* Load microcode on this cpu if a valid microcode is available.
12981305
* This is early microcode loading procedure.

arch/x86/kernel/cpu/mtrr/cyrix.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,8 @@ static void prepare_set(void)
138138

139139
/* Save value of CR4 and clear Page Global Enable (bit 7) */
140140
if (cpu_has_pge) {
141-
cr4 = read_cr4();
142-
write_cr4(cr4 & ~X86_CR4_PGE);
141+
cr4 = __read_cr4();
142+
__write_cr4(cr4 & ~X86_CR4_PGE);
143143
}
144144

145145
/*
@@ -171,7 +171,7 @@ static void post_set(void)
171171

172172
/* Restore value of CR4 */
173173
if (cpu_has_pge)
174-
write_cr4(cr4);
174+
__write_cr4(cr4);
175175
}
176176

177177
static void cyrix_set_arr(unsigned int reg, unsigned long base,

arch/x86/kernel/cpu/mtrr/generic.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -678,8 +678,8 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
678678

679679
/* Save value of CR4 and clear Page Global Enable (bit 7) */
680680
if (cpu_has_pge) {
681-
cr4 = read_cr4();
682-
write_cr4(cr4 & ~X86_CR4_PGE);
681+
cr4 = __read_cr4();
682+
__write_cr4(cr4 & ~X86_CR4_PGE);
683683
}
684684

685685
/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
@@ -708,7 +708,7 @@ static void post_set(void) __releases(set_atomicity_lock)
708708

709709
/* Restore value of CR4 */
710710
if (cpu_has_pge)
711-
write_cr4(cr4);
711+
__write_cr4(cr4);
712712
raw_spin_unlock(&set_atomicity_lock);
713713
}
714714

arch/x86/kernel/head32.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ static void __init i386_default_early_setup(void)
3131

3232
asmlinkage __visible void __init i386_start_kernel(void)
3333
{
34+
cr4_init_shadow();
3435
sanitize_boot_params(&boot_params);
3536

3637
/* Call the subarch specific early setup function */

arch/x86/kernel/head64.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
155155
(__START_KERNEL & PGDIR_MASK)));
156156
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
157157

158+
cr4_init_shadow();
159+
158160
/* Kill off the identity-map trampoline */
159161
reset_early_page_tables();
160162

0 commit comments

Comments
 (0)