@@ -193,6 +193,10 @@ static void smp_callin(void)
193193
194194 wmb ();
195195
196+ /*
197+ * This runs the AP through all the cpuhp states to its target
198+ * state CPUHP_ONLINE.
199+ */
196200 notify_cpu_starting (cpuid );
197201
198202 /*
@@ -233,12 +237,28 @@ static void notrace start_secondary(void *unused)
233237 load_cr3 (swapper_pg_dir );
234238 __flush_tlb_all ();
235239#endif
240+ /*
241+ * Sync point with wait_cpu_initialized(). Before proceeding through
242+ * cpu_init(), the AP will call wait_for_master_cpu() which sets its
243+ * own bit in cpu_initialized_mask and then waits for the BSP to set
244+ * its bit in cpu_callout_mask to release it.
245+ */
236246 cpu_init_secondary ();
237247 rcu_cpu_starting (raw_smp_processor_id ());
238248 x86_cpuinit .early_percpu_clock_init ();
249+
250+ /*
251+ * Sync point with wait_cpu_callin(). The AP doesn't wait here
252+ * but just sets the bit to let the controlling CPU (BSP) know that
253+ * it's got this far.
254+ */
239255 smp_callin ();
240256
241- /* Check TSC synchronization with the control CPU: */
257+ /*
258+ * Check TSC synchronization with the control CPU, which will do
259+ * its part of this from wait_cpu_online(), making it an implicit
260+ * synchronization point.
261+ */
242262 check_tsc_sync_target ();
243263
244264 /*
@@ -257,6 +277,7 @@ static void notrace start_secondary(void *unused)
257277 * half valid vector space.
258278 */
259279 lock_vector_lock ();
280+ /* Sync point with do_wait_cpu_online() */
260281 set_cpu_online (smp_processor_id (), true);
261282 lapic_online ();
262283 unlock_vector_lock ();
@@ -979,17 +1000,13 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
9791000/*
9801001 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
9811002 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
982- * Returns zero if CPU booted OK , else error code from
1003+ * Returns zero if startup was successfully sent , else error code from
9831004 * ->wakeup_secondary_cpu.
9841005 */
9851006static int do_boot_cpu (int apicid , int cpu , struct task_struct * idle )
9861007{
987- /* start_ip had better be page-aligned! */
9881008 unsigned long start_ip = real_mode_header -> trampoline_start ;
9891009
990- unsigned long boot_error = 0 ;
991- unsigned long timeout ;
992-
9931010#ifdef CONFIG_X86_64
9941011 /* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */
9951012 if (apic -> wakeup_secondary_cpu_64 )
@@ -1046,60 +1063,89 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
10461063 * - Use an INIT boot APIC message
10471064 */
10481065 if (apic -> wakeup_secondary_cpu_64 )
1049- boot_error = apic -> wakeup_secondary_cpu_64 (apicid , start_ip );
1066+ return apic -> wakeup_secondary_cpu_64 (apicid , start_ip );
10501067 else if (apic -> wakeup_secondary_cpu )
1051- boot_error = apic -> wakeup_secondary_cpu (apicid , start_ip );
1052- else
1053- boot_error = wakeup_secondary_cpu_via_init (apicid , start_ip );
1068+ return apic -> wakeup_secondary_cpu (apicid , start_ip );
10541069
1055- if (!boot_error ) {
1056- /*
1057- * Wait 10s total for first sign of life from AP
1058- */
1059- boot_error = -1 ;
1060- timeout = jiffies + 10 * HZ ;
1061- while (time_before (jiffies , timeout )) {
1062- if (cpumask_test_cpu (cpu , cpu_initialized_mask )) {
1063- /*
1064- * Tell AP to proceed with initialization
1065- */
1066- cpumask_set_cpu (cpu , cpu_callout_mask );
1067- boot_error = 0 ;
1068- break ;
1069- }
1070- schedule ();
1071- }
1072- }
1070+ return wakeup_secondary_cpu_via_init (apicid , start_ip );
1071+ }
10731072
1074- if (!boot_error ) {
1075- /*
1076- * Wait till AP completes initial initialization
1077- */
1078- while (!cpumask_test_cpu (cpu , cpu_callin_mask )) {
1079- /*
1080- * Allow other tasks to run while we wait for the
1081- * AP to come online. This also gives a chance
1082- * for the MTRR work(triggered by the AP coming online)
1083- * to be completed in the stop machine context.
1084- */
1085- schedule ();
1086- }
1087- }
1073+ static int wait_cpu_cpumask (unsigned int cpu , const struct cpumask * mask )
1074+ {
1075+ unsigned long timeout ;
10881076
1089- if (x86_platform .legacy .warm_reset ) {
1090- /*
1091- * Cleanup possible dangling ends...
1092- */
1093- smpboot_restore_warm_reset_vector ();
1077+ /*
1078+ * Wait up to 10s for the CPU to report in.
1079+ */
1080+ timeout = jiffies + 10 * HZ ;
1081+ while (time_before (jiffies , timeout )) {
1082+ if (cpumask_test_cpu (cpu , mask ))
1083+ return 0 ;
1084+
1085+ schedule ();
10941086 }
1087+ return -1 ;
1088+ }
10951089
1096- return boot_error ;
1090+ /*
1091+ * Bringup step two: Wait for the target AP to reach cpu_init_secondary()
1092+ * and thus wait_for_master_cpu(), then set cpu_callout_mask to allow it
1093+ * to proceed. The AP will then proceed past setting its 'callin' bit
1094+ * and end up waiting in check_tsc_sync_target() until we reach
1095+ * do_wait_cpu_online() to tend to it.
1096+ */
1097+ static int wait_cpu_initialized (unsigned int cpu )
1098+ {
1099+ /*
1100+ * Wait for first sign of life from AP.
1101+ */
1102+ if (wait_cpu_cpumask (cpu , cpu_initialized_mask ))
1103+ return -1 ;
1104+
1105+ cpumask_set_cpu (cpu , cpu_callout_mask );
1106+ return 0 ;
10971107}
10981108
1099- int native_cpu_up (unsigned int cpu , struct task_struct * tidle )
1109+ /*
1110+ * Bringup step three: Wait for the target AP to reach smp_callin().
1111+ * The AP is not waiting for us here so we don't need to parallelise
1112+ * this step. Not entirely clear why we care about this, since we just
1113+ * proceed directly to TSC synchronization which is the next sync
1114+ * point with the AP anyway.
1115+ */
1116+ static void wait_cpu_callin (unsigned int cpu )
1117+ {
1118+ while (!cpumask_test_cpu (cpu , cpu_callin_mask ))
1119+ schedule ();
1120+ }
1121+
1122+ /*
1123+ * Bringup step four: Synchronize the TSC and wait for the target AP
1124+ * to reach set_cpu_online() in start_secondary().
1125+ */
1126+ static void wait_cpu_online (unsigned int cpu )
11001127{
1101- int apicid = apic -> cpu_present_to_apicid (cpu );
11021128 unsigned long flags ;
1129+
1130+ /*
1131+ * Check TSC synchronization with the AP (keep irqs disabled
1132+ * while doing so):
1133+ */
1134+ local_irq_save (flags );
1135+ check_tsc_sync_source (cpu );
1136+ local_irq_restore (flags );
1137+
1138+ /*
1139+ * Wait for the AP to mark itself online, so the core caller
1140+ * can drop sparse_irq_lock.
1141+ */
1142+ while (!cpu_online (cpu ))
1143+ schedule ();
1144+ }
1145+
1146+ static int native_kick_ap (unsigned int cpu , struct task_struct * tidle )
1147+ {
1148+ int apicid = apic -> cpu_present_to_apicid (cpu );
11031149 int err ;
11041150
11051151 lockdep_assert_irqs_enabled ();
@@ -1140,25 +1186,33 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
11401186 return err ;
11411187
11421188 err = do_boot_cpu (apicid , cpu , tidle );
1143- if (err ) {
1189+ if (err )
11441190 pr_err ("do_boot_cpu failed(%d) to wakeup CPU#%u\n" , err , cpu );
1145- return err ;
1146- }
11471191
1148- /*
1149- * Check TSC synchronization with the AP (keep irqs disabled
1150- * while doing so):
1151- */
1152- local_irq_save (flags );
1153- check_tsc_sync_source (cpu );
1154- local_irq_restore (flags );
1192+ return err ;
1193+ }
11551194
1156- while (!cpu_online (cpu )) {
1157- cpu_relax ();
1158- touch_nmi_watchdog ();
1159- }
1195+ int native_cpu_up (unsigned int cpu , struct task_struct * tidle )
1196+ {
1197+ int ret ;
11601198
1161- return 0 ;
1199+ ret = native_kick_ap (cpu , tidle );
1200+ if (ret )
1201+ goto out ;
1202+
1203+ ret = wait_cpu_initialized (cpu );
1204+ if (ret )
1205+ goto out ;
1206+
1207+ wait_cpu_callin (cpu );
1208+ wait_cpu_online (cpu );
1209+
1210+ out :
1211+ /* Cleanup possible dangling ends... */
1212+ if (x86_platform .legacy .warm_reset )
1213+ smpboot_restore_warm_reset_vector ();
1214+
1215+ return ret ;
11621216}
11631217
11641218/**
0 commit comments