@@ -1732,15 +1732,26 @@ void migrate_enable(void)
17321732{
17331733 struct task_struct * p = current ;
17341734
1735- if (-- p -> migration_disabled )
1735+ if (p -> migration_disabled > 1 ) {
1736+ p -> migration_disabled -- ;
17361737 return ;
1738+ }
17371739
1740+ /*
1741+ * Ensure stop_task runs either before or after this, and that
1742+ * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
1743+ */
1744+ preempt_disable ();
1745+ if (p -> cpus_ptr != & p -> cpus_mask )
1746+ __set_cpus_allowed_ptr (p , & p -> cpus_mask , SCA_MIGRATE_ENABLE );
1747+ /*
1748+ * Mustn't clear migration_disabled() until cpus_ptr points back at the
1749+ * regular cpus_mask, otherwise things that race (eg.
1750+ * select_fallback_rq) get confused.
1751+ */
17381752 barrier ();
1739-
1740- if (p -> cpus_ptr == & p -> cpus_mask )
1741- return ;
1742-
1743- __set_cpus_allowed_ptr (p , & p -> cpus_mask , SCA_MIGRATE_ENABLE );
1753+ p -> migration_disabled = 0 ;
1754+ preempt_enable ();
17441755}
17451756EXPORT_SYMBOL_GPL (migrate_enable );
17461757
@@ -1805,8 +1816,16 @@ static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf,
18051816}
18061817
18071818struct migration_arg {
1808- struct task_struct * task ;
1809- int dest_cpu ;
1819+ struct task_struct * task ;
1820+ int dest_cpu ;
1821+ struct set_affinity_pending * pending ;
1822+ };
1823+
1824+ struct set_affinity_pending {
1825+ refcount_t refs ;
1826+ struct completion done ;
1827+ struct cpu_stop_work stop_work ;
1828+ struct migration_arg arg ;
18101829};
18111830
18121831/*
@@ -1838,16 +1857,19 @@ static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
18381857 */
18391858static int migration_cpu_stop (void * data )
18401859{
1860+ struct set_affinity_pending * pending ;
18411861 struct migration_arg * arg = data ;
18421862 struct task_struct * p = arg -> task ;
1863+ int dest_cpu = arg -> dest_cpu ;
18431864 struct rq * rq = this_rq ();
1865+ bool complete = false;
18441866 struct rq_flags rf ;
18451867
18461868 /*
18471869 * The original target CPU might have gone down and we might
18481870 * be on another CPU but it doesn't matter.
18491871 */
1850- local_irq_disable ( );
1872+ local_irq_save ( rf . flags );
18511873 /*
18521874 * We need to explicitly wake pending tasks before running
18531875 * __migrate_task() such that we will not miss enforcing cpus_ptr
@@ -1857,21 +1879,83 @@ static int migration_cpu_stop(void *data)
18571879
18581880 raw_spin_lock (& p -> pi_lock );
18591881 rq_lock (rq , & rf );
1882+
1883+ pending = p -> migration_pending ;
18601884 /*
18611885 * If task_rq(p) != rq, it cannot be migrated here, because we're
18621886 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
18631887 * we're holding p->pi_lock.
18641888 */
18651889 if (task_rq (p ) == rq ) {
1890+ if (is_migration_disabled (p ))
1891+ goto out ;
1892+
1893+ if (pending ) {
1894+ p -> migration_pending = NULL ;
1895+ complete = true;
1896+ }
1897+
1898+ /* migrate_enable() -- we must not race against SCA */
1899+ if (dest_cpu < 0 ) {
1900+ /*
1901+ * When this was migrate_enable() but we no longer
1902+ * have a @pending, a concurrent SCA 'fixed' things
1903+ * and we should be valid again. Nothing to do.
1904+ */
1905+ if (!pending ) {
1906+ WARN_ON_ONCE (!is_cpu_allowed (p , cpu_of (rq )));
1907+ goto out ;
1908+ }
1909+
1910+ dest_cpu = cpumask_any_distribute (& p -> cpus_mask );
1911+ }
1912+
18661913 if (task_on_rq_queued (p ))
1867- rq = __migrate_task (rq , & rf , p , arg -> dest_cpu );
1914+ rq = __migrate_task (rq , & rf , p , dest_cpu );
18681915 else
1869- p -> wake_cpu = arg -> dest_cpu ;
1916+ p -> wake_cpu = dest_cpu ;
1917+
1918+ } else if (dest_cpu < 0 ) {
1919+ /*
1920+ * This happens when we get migrated between migrate_enable()'s
1921+ * preempt_enable() and scheduling the stopper task. At that
1922+ * point we're a regular task again and not current anymore.
1923+ *
1924+ * A !PREEMPT kernel has a giant hole here, which makes it far
1925+ * more likely.
1926+ */
1927+
1928+ /*
1929+ * When this was migrate_enable() but we no longer have an
1930+ * @pending, a concurrent SCA 'fixed' things and we should be
1931+ * valid again. Nothing to do.
1932+ */
1933+ if (!pending ) {
1934+ WARN_ON_ONCE (!is_cpu_allowed (p , cpu_of (rq )));
1935+ goto out ;
1936+ }
1937+
1938+ /*
1939+ * When migrate_enable() hits a rq mis-match we can't reliably
1940+ * determine is_migration_disabled() and so have to chase after
1941+ * it.
1942+ */
1943+ task_rq_unlock (rq , p , & rf );
1944+ stop_one_cpu_nowait (task_cpu (p ), migration_cpu_stop ,
1945+ & pending -> arg , & pending -> stop_work );
1946+ return 0 ;
18701947 }
1871- rq_unlock (rq , & rf );
1872- raw_spin_unlock (& p -> pi_lock );
1948+ out :
1949+ task_rq_unlock (rq , p , & rf );
1950+
1951+ if (complete )
1952+ complete_all (& pending -> done );
1953+
1954+ /* For pending->{arg,stop_work} */
1955+ pending = arg -> pending ;
1956+ if (pending && refcount_dec_and_test (& pending -> refs ))
1957+ wake_up_var (& pending -> refs );
18731958
1874- local_irq_enable ();
18751959 return 0 ;
18761960}
18771961
@@ -1940,6 +2024,112 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
19402024 __do_set_cpus_allowed (p , new_mask , 0 );
19412025}
19422026
2027+ /*
2028+ * This function is wildly self concurrent, consider at least 3 times.
2029+ */
2030+ static int affine_move_task (struct rq * rq , struct task_struct * p , struct rq_flags * rf ,
2031+ int dest_cpu , unsigned int flags )
2032+ {
2033+ struct set_affinity_pending my_pending = { }, * pending = NULL ;
2034+ struct migration_arg arg = {
2035+ .task = p ,
2036+ .dest_cpu = dest_cpu ,
2037+ };
2038+ bool complete = false;
2039+
2040+ /* Can the task run on the task's current CPU? If so, we're done */
2041+ if (cpumask_test_cpu (task_cpu (p ), & p -> cpus_mask )) {
2042+ pending = p -> migration_pending ;
2043+ if (pending ) {
2044+ refcount_inc (& pending -> refs );
2045+ p -> migration_pending = NULL ;
2046+ complete = true;
2047+ }
2048+ task_rq_unlock (rq , p , rf );
2049+
2050+ if (complete )
2051+ goto do_complete ;
2052+
2053+ return 0 ;
2054+ }
2055+
2056+ if (!(flags & SCA_MIGRATE_ENABLE )) {
2057+ /* serialized by p->pi_lock */
2058+ if (!p -> migration_pending ) {
2059+ refcount_set (& my_pending .refs , 1 );
2060+ init_completion (& my_pending .done );
2061+ p -> migration_pending = & my_pending ;
2062+ } else {
2063+ pending = p -> migration_pending ;
2064+ refcount_inc (& pending -> refs );
2065+ }
2066+ }
2067+ pending = p -> migration_pending ;
2068+ /*
2069+ * - !MIGRATE_ENABLE:
2070+ * we'll have installed a pending if there wasn't one already.
2071+ *
2072+ * - MIGRATE_ENABLE:
2073+ * we're here because the current CPU isn't matching anymore,
2074+ * the only way that can happen is because of a concurrent
2075+ * set_cpus_allowed_ptr() call, which should then still be
2076+ * pending completion.
2077+ *
2078+ * Either way, we really should have a @pending here.
2079+ */
2080+ if (WARN_ON_ONCE (!pending )) {
2081+ task_rq_unlock (rq , p , rf );
2082+ return - EINVAL ;
2083+ }
2084+
2085+ if (flags & SCA_MIGRATE_ENABLE ) {
2086+
2087+ refcount_inc (& pending -> refs ); /* pending->{arg,stop_work} */
2088+ task_rq_unlock (rq , p , rf );
2089+
2090+ pending -> arg = (struct migration_arg ) {
2091+ .task = p ,
2092+ .dest_cpu = -1 ,
2093+ .pending = pending ,
2094+ };
2095+
2096+ stop_one_cpu_nowait (cpu_of (rq ), migration_cpu_stop ,
2097+ & pending -> arg , & pending -> stop_work );
2098+
2099+ return 0 ;
2100+ }
2101+
2102+ if (task_running (rq , p ) || p -> state == TASK_WAKING ) {
2103+
2104+ task_rq_unlock (rq , p , rf );
2105+ stop_one_cpu (cpu_of (rq ), migration_cpu_stop , & arg );
2106+
2107+ } else {
2108+
2109+ if (!is_migration_disabled (p )) {
2110+ if (task_on_rq_queued (p ))
2111+ rq = move_queued_task (rq , rf , p , dest_cpu );
2112+
2113+ p -> migration_pending = NULL ;
2114+ complete = true;
2115+ }
2116+ task_rq_unlock (rq , p , rf );
2117+
2118+ do_complete :
2119+ if (complete )
2120+ complete_all (& pending -> done );
2121+ }
2122+
2123+ wait_for_completion (& pending -> done );
2124+
2125+ if (refcount_dec_and_test (& pending -> refs ))
2126+ wake_up_var (& pending -> refs );
2127+
2128+ wait_var_event (& my_pending .refs , !refcount_read (& my_pending .refs ));
2129+
2130+ return 0 ;
2131+ }
2132+
19432133/*
19442134 * Change a given task's CPU affinity. Migrate the thread to a
19452135 * proper CPU and schedule it away if the CPU it's executing on
@@ -2009,23 +2199,8 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
20092199 p -> nr_cpus_allowed != 1 );
20102200 }
20112201
2012- /* Can the task run on the task's current CPU? If so, we're done */
2013- if (cpumask_test_cpu (task_cpu (p ), new_mask ))
2014- goto out ;
2202+ return affine_move_task (rq , p , & rf , dest_cpu , flags );
20152203
2016- if (task_running (rq , p ) || p -> state == TASK_WAKING ) {
2017- struct migration_arg arg = { p , dest_cpu };
2018- /* Need help from migration thread: drop lock and wait. */
2019- task_rq_unlock (rq , p , & rf );
2020- stop_one_cpu (cpu_of (rq ), migration_cpu_stop , & arg );
2021- return 0 ;
2022- } else if (task_on_rq_queued (p )) {
2023- /*
2024- * OK, since we're going to drop the lock immediately
2025- * afterwards anyway.
2026- */
2027- rq = move_queued_task (rq , & rf , p , dest_cpu );
2028- }
20292204out :
20302205 task_rq_unlock (rq , p , & rf );
20312206
@@ -3205,6 +3380,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
32053380 init_numa_balancing (clone_flags , p );
32063381#ifdef CONFIG_SMP
32073382 p -> wake_entry .u_flags = CSD_TYPE_TTWU ;
3383+ p -> migration_pending = NULL ;
32083384#endif
32093385}
32103386
0 commit comments