Skip to content

Commit 2711b8e

Browse files
KAGA-KOKOPeter Zijlstra
authored andcommitted
x86/smpboot: Switch to hotplug core state synchronization
The new AP state tracking and synchronization mechanism in the CPU hotplug core code allows to remove quite some x86 specific code: 1) The AP alive synchronization based on cpumasks 2) The decision whether an AP can be brought up again Signed-off-by: Thomas Gleixner <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Tested-by: Michael Kelley <[email protected]> Tested-by: Oleksandr Natalenko <[email protected]> Tested-by: Helge Deller <[email protected]> # parisc Tested-by: Guilherme G. Piccoli <[email protected]> # Steam Deck Link: https://lore.kernel.org/r/[email protected]
1 parent 6f06212 commit 2711b8e

File tree

6 files changed

+75
-154
lines changed

6 files changed

+75
-154
lines changed

arch/x86/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ config X86
274274
select HAVE_UNSTABLE_SCHED_CLOCK
275275
select HAVE_USER_RETURN_NOTIFIER
276276
select HAVE_GENERIC_VDSO
277+
select HOTPLUG_CORE_SYNC_FULL if SMP
277278
select HOTPLUG_SMT if SMP
278279
select IRQ_FORCED_THREADING
279280
select NEED_PER_CPU_EMBED_FIRST_CHUNK

arch/x86/include/asm/smp.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ struct smp_ops {
3838
void (*crash_stop_other_cpus)(void);
3939
void (*smp_send_reschedule)(int cpu);
4040

41+
void (*cleanup_dead_cpu)(unsigned cpu);
42+
void (*poll_sync_state)(void);
4143
int (*cpu_up)(unsigned cpu, struct task_struct *tidle);
4244
int (*cpu_disable)(void);
4345
void (*cpu_die)(unsigned int cpu);
@@ -90,7 +92,8 @@ static inline int __cpu_disable(void)
9092

9193
static inline void __cpu_die(unsigned int cpu)
9294
{
93-
smp_ops.cpu_die(cpu);
95+
if (smp_ops.cpu_die)
96+
smp_ops.cpu_die(cpu);
9497
}
9598

9699
static inline void __noreturn play_dead(void)
@@ -123,8 +126,6 @@ void native_smp_cpus_done(unsigned int max_cpus);
123126
int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
124127
int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
125128
int native_cpu_disable(void);
126-
int common_cpu_die(unsigned int cpu);
127-
void native_cpu_die(unsigned int cpu);
128129
void __noreturn hlt_play_dead(void);
129130
void native_play_dead(void);
130131
void play_dead_common(void);

arch/x86/kernel/smp.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,6 @@ struct smp_ops smp_ops = {
269269
.smp_send_reschedule = native_smp_send_reschedule,
270270

271271
.cpu_up = native_cpu_up,
272-
.cpu_die = native_cpu_die,
273272
.cpu_disable = native_cpu_disable,
274273
.play_dead = native_play_dead,
275274

arch/x86/kernel/smpboot.c

Lines changed: 41 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
#include <linux/pgtable.h>
5858
#include <linux/overflow.h>
5959
#include <linux/stackprotector.h>
60+
#include <linux/cpuhotplug.h>
6061

6162
#include <asm/acpi.h>
6263
#include <asm/cacheinfo.h>
@@ -101,9 +102,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_die_map);
101102
DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
102103
EXPORT_PER_CPU_SYMBOL(cpu_info);
103104

104-
/* All of these masks are initialized in setup_cpu_local_masks() */
105-
static cpumask_var_t cpu_initialized_mask;
106-
static cpumask_var_t cpu_callout_mask;
107105
/* Representing CPUs for which sibling maps can be computed */
108106
static cpumask_var_t cpu_sibling_setup_mask;
109107

@@ -166,10 +164,10 @@ static void ap_starting(void)
166164
int cpuid = smp_processor_id();
167165

168166
/*
169-
* If woken up by an INIT in an 82489DX configuration
170-
* cpu_callout_mask guarantees the CPU does not reach this point
171-
* before an INIT_deassert IPI reaches the local APIC, so it is now
172-
* safe to touch the local APIC.
167+
* If woken up by an INIT in an 82489DX configuration the alive
168+
* synchronization guarantees that the CPU does not reach this
169+
* point before an INIT_deassert IPI reaches the local APIC, so it
170+
* is now safe to touch the local APIC.
173171
*
174172
* Set up this CPU, first the APIC, which is probably redundant on
175173
* most boards.
@@ -213,17 +211,6 @@ static void ap_calibrate_delay(void)
213211
cpu_data(smp_processor_id()).loops_per_jiffy = loops_per_jiffy;
214212
}
215213

216-
static void wait_for_master_cpu(int cpu)
217-
{
218-
/*
219-
* Wait for release by control CPU before continuing with AP
220-
* initialization.
221-
*/
222-
WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask));
223-
while (!cpumask_test_cpu(cpu, cpu_callout_mask))
224-
cpu_relax();
225-
}
226-
227214
/*
228215
* Activate a secondary processor.
229216
*/
@@ -244,11 +231,11 @@ static void notrace start_secondary(void *unused)
244231
cpu_init_exception_handling();
245232

246233
/*
247-
* Sync point with wait_cpu_initialized(). Sets AP in
248-
* cpu_initialized_mask and then waits for the control CPU
249-
* to release it.
234+
* Synchronization point with the hotplug core. Sets the
235+
* synchronization state to ALIVE and waits for the control CPU to
236+
* release this CPU for further bringup.
250237
*/
251-
wait_for_master_cpu(raw_smp_processor_id());
238+
cpuhp_ap_sync_alive();
252239

253240
cpu_init();
254241
rcu_cpu_starting(raw_smp_processor_id());
@@ -278,7 +265,6 @@ static void notrace start_secondary(void *unused)
278265
set_cpu_online(smp_processor_id(), true);
279266
lapic_online();
280267
unlock_vector_lock();
281-
cpu_set_state_online(smp_processor_id());
282268
x86_platform.nmi_init();
283269

284270
/* enable local interrupts */
@@ -729,9 +715,9 @@ static void impress_friends(void)
729715
* Allow the user to impress friends.
730716
*/
731717
pr_debug("Before bogomips\n");
732-
for_each_possible_cpu(cpu)
733-
if (cpumask_test_cpu(cpu, cpu_callout_mask))
734-
bogosum += cpu_data(cpu).loops_per_jiffy;
718+
for_each_online_cpu(cpu)
719+
bogosum += cpu_data(cpu).loops_per_jiffy;
720+
735721
pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n",
736722
num_online_cpus(),
737723
bogosum/(500000/HZ),
@@ -1003,6 +989,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
1003989
static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
1004990
{
1005991
unsigned long start_ip = real_mode_header->trampoline_start;
992+
int ret;
1006993

1007994
#ifdef CONFIG_X86_64
1008995
/* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */
@@ -1043,13 +1030,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
10431030
}
10441031
}
10451032

1046-
/*
1047-
* AP might wait on cpu_callout_mask in cpu_init() with
1048-
* cpu_initialized_mask set if previous attempt to online
1049-
* it timed-out. Clear cpu_initialized_mask so that after
1050-
* INIT/SIPI it could start with a clean state.
1051-
*/
1052-
cpumask_clear_cpu(cpu, cpu_initialized_mask);
10531033
smp_mb();
10541034

10551035
/*
@@ -1060,47 +1040,16 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
10601040
* - Use an INIT boot APIC message
10611041
*/
10621042
if (apic->wakeup_secondary_cpu_64)
1063-
return apic->wakeup_secondary_cpu_64(apicid, start_ip);
1043+
ret = apic->wakeup_secondary_cpu_64(apicid, start_ip);
10641044
else if (apic->wakeup_secondary_cpu)
1065-
return apic->wakeup_secondary_cpu(apicid, start_ip);
1066-
1067-
return wakeup_secondary_cpu_via_init(apicid, start_ip);
1068-
}
1069-
1070-
static int wait_cpu_cpumask(unsigned int cpu, const struct cpumask *mask)
1071-
{
1072-
unsigned long timeout;
1073-
1074-
/*
1075-
* Wait up to 10s for the CPU to report in.
1076-
*/
1077-
timeout = jiffies + 10*HZ;
1078-
while (time_before(jiffies, timeout)) {
1079-
if (cpumask_test_cpu(cpu, mask))
1080-
return 0;
1081-
1082-
schedule();
1083-
}
1084-
return -1;
1085-
}
1086-
1087-
/*
1088-
* Bringup step two: Wait for the target AP to reach cpu_init_secondary()
1089-
* and thus wait_for_master_cpu(), then set cpu_callout_mask to allow it
1090-
* to proceed. The AP will then proceed past setting its 'callin' bit
1091-
* and end up waiting in check_tsc_sync_target() until we reach
1092-
* wait_cpu_online() to tend to it.
1093-
*/
1094-
static int wait_cpu_initialized(unsigned int cpu)
1095-
{
1096-
/*
1097-
* Wait for first sign of life from AP.
1098-
*/
1099-
if (wait_cpu_cpumask(cpu, cpu_initialized_mask))
1100-
return -1;
1045+
ret = apic->wakeup_secondary_cpu(apicid, start_ip);
1046+
else
1047+
ret = wakeup_secondary_cpu_via_init(apicid, start_ip);
11011048

1102-
cpumask_set_cpu(cpu, cpu_callout_mask);
1103-
return 0;
1049+
/* If the wakeup mechanism failed, cleanup the warm reset vector */
1050+
if (ret)
1051+
arch_cpuhp_cleanup_kick_cpu(cpu);
1052+
return ret;
11041053
}
11051054

11061055
static int native_kick_ap(unsigned int cpu, struct task_struct *tidle)
@@ -1125,11 +1074,6 @@ static int native_kick_ap(unsigned int cpu, struct task_struct *tidle)
11251074
*/
11261075
mtrr_save_state();
11271076

1128-
/* x86 CPUs take themselves offline, so delayed offline is OK. */
1129-
err = cpu_check_up_prepare(cpu);
1130-
if (err && err != -EBUSY)
1131-
return err;
1132-
11331077
/* the FPU context is blank, nobody can own it */
11341078
per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
11351079

@@ -1146,17 +1090,29 @@ static int native_kick_ap(unsigned int cpu, struct task_struct *tidle)
11461090

11471091
int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
11481092
{
1149-
int ret;
1150-
1151-
ret = native_kick_ap(cpu, tidle);
1152-
if (!ret)
1153-
ret = wait_cpu_initialized(cpu);
1093+
return native_kick_ap(cpu, tidle);
1094+
}
11541095

1096+
void arch_cpuhp_cleanup_kick_cpu(unsigned int cpu)
1097+
{
11551098
/* Cleanup possible dangling ends... */
1156-
if (x86_platform.legacy.warm_reset)
1099+
if (smp_ops.cpu_up == native_cpu_up && x86_platform.legacy.warm_reset)
11571100
smpboot_restore_warm_reset_vector();
1101+
}
11581102

1159-
return ret;
1103+
void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
1104+
{
1105+
if (smp_ops.cleanup_dead_cpu)
1106+
smp_ops.cleanup_dead_cpu(cpu);
1107+
1108+
if (system_state == SYSTEM_RUNNING)
1109+
pr_info("CPU %u is now offline\n", cpu);
1110+
}
1111+
1112+
void arch_cpuhp_sync_state_poll(void)
1113+
{
1114+
if (smp_ops.poll_sync_state)
1115+
smp_ops.poll_sync_state();
11601116
}
11611117

11621118
/**
@@ -1348,9 +1304,6 @@ void __init native_smp_prepare_boot_cpu(void)
13481304
if (!IS_ENABLED(CONFIG_SMP))
13491305
switch_gdt_and_percpu_base(me);
13501306

1351-
/* already set me in cpu_online_mask in boot_cpu_init() */
1352-
cpumask_set_cpu(me, cpu_callout_mask);
1353-
cpu_set_state_online(me);
13541307
native_pv_lock_init();
13551308
}
13561309

@@ -1477,8 +1430,6 @@ __init void prefill_possible_map(void)
14771430
/* correctly size the local cpu masks */
14781431
void __init setup_cpu_local_masks(void)
14791432
{
1480-
alloc_bootmem_cpumask_var(&cpu_initialized_mask);
1481-
alloc_bootmem_cpumask_var(&cpu_callout_mask);
14821433
alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
14831434
}
14841435

@@ -1540,9 +1491,6 @@ static void remove_siblinginfo(int cpu)
15401491
static void remove_cpu_from_maps(int cpu)
15411492
{
15421493
set_cpu_online(cpu, false);
1543-
cpumask_clear_cpu(cpu, cpu_callout_mask);
1544-
/* was set by cpu_init() */
1545-
cpumask_clear_cpu(cpu, cpu_initialized_mask);
15461494
numa_remove_cpu(cpu);
15471495
}
15481496

@@ -1593,36 +1541,11 @@ int native_cpu_disable(void)
15931541
return 0;
15941542
}
15951543

1596-
int common_cpu_die(unsigned int cpu)
1597-
{
1598-
int ret = 0;
1599-
1600-
/* We don't do anything here: idle task is faking death itself. */
1601-
1602-
/* They ack this in play_dead() by setting CPU_DEAD */
1603-
if (cpu_wait_death(cpu, 5)) {
1604-
if (system_state == SYSTEM_RUNNING)
1605-
pr_info("CPU %u is now offline\n", cpu);
1606-
} else {
1607-
pr_err("CPU %u didn't die...\n", cpu);
1608-
ret = -1;
1609-
}
1610-
1611-
return ret;
1612-
}
1613-
1614-
void native_cpu_die(unsigned int cpu)
1615-
{
1616-
common_cpu_die(cpu);
1617-
}
1618-
16191544
void play_dead_common(void)
16201545
{
16211546
idle_task_exit();
16221547

1623-
/* Ack it */
1624-
(void)cpu_report_death();
1625-
1548+
cpuhp_ap_report_dead();
16261549
/*
16271550
* With physical CPU hotplug, we should halt the cpu
16281551
*/
@@ -1724,12 +1647,6 @@ int native_cpu_disable(void)
17241647
return -ENOSYS;
17251648
}
17261649

1727-
void native_cpu_die(unsigned int cpu)
1728-
{
1729-
/* We said "no" in __cpu_disable */
1730-
BUG();
1731-
}
1732-
17331650
void native_play_dead(void)
17341651
{
17351652
BUG();

arch/x86/xen/smp_hvm.c

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,18 +55,16 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
5555
}
5656

5757
#ifdef CONFIG_HOTPLUG_CPU
58-
static void xen_hvm_cpu_die(unsigned int cpu)
58+
static void xen_hvm_cleanup_dead_cpu(unsigned int cpu)
5959
{
60-
if (common_cpu_die(cpu) == 0) {
61-
if (xen_have_vector_callback) {
62-
xen_smp_intr_free(cpu);
63-
xen_uninit_lock_cpu(cpu);
64-
xen_teardown_timer(cpu);
65-
}
60+
if (xen_have_vector_callback) {
61+
xen_smp_intr_free(cpu);
62+
xen_uninit_lock_cpu(cpu);
63+
xen_teardown_timer(cpu);
6664
}
6765
}
6866
#else
69-
static void xen_hvm_cpu_die(unsigned int cpu)
67+
static void xen_hvm_cleanup_dead_cpu(unsigned int cpu)
7068
{
7169
BUG();
7270
}
@@ -77,7 +75,7 @@ void __init xen_hvm_smp_init(void)
7775
smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu;
7876
smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
7977
smp_ops.smp_cpus_done = xen_smp_cpus_done;
80-
smp_ops.cpu_die = xen_hvm_cpu_die;
78+
smp_ops.cleanup_dead_cpu = xen_hvm_cleanup_dead_cpu;
8179

8280
if (!xen_have_vector_callback) {
8381
#ifdef CONFIG_PARAVIRT_SPINLOCKS

0 commit comments

Comments
 (0)