Skip to content

Commit e9d867a

Browse files
Peter Zijlstra (Intel)KAGA-KOKO
authored andcommitted
sched: Allow per-cpu kernel threads to run on online && !active
In order to enable symmetric hotplug, we must mirror the online && !active state of cpu-down on the cpu-up side. However, to retain sanity, limit this state to per-cpu kthreads. Aside from the change to set_cpus_allowed_ptr(), which allow moving the per-cpu kthreads on, the other critical piece is the cpu selection for pinned tasks in select_task_rq(). This avoids dropping into select_fallback_rq(). select_fallback_rq() cannot be allowed to select !active cpus because its used to migrate user tasks away. And we do not want to move user tasks onto cpus that are in transition. Requested-by: Thomas Gleixner <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Tested-by: Thomas Gleixner <[email protected]> Cc: Lai Jiangshan <[email protected]> Cc: Jan H. Schönherr <[email protected]> Cc: Oleg Nesterov <[email protected]> Cc: [email protected] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Thomas Gleixner <[email protected]>
1 parent 04974df commit e9d867a

File tree

4 files changed

+46
-13
lines changed

4 files changed

+46
-13
lines changed

arch/powerpc/kernel/smp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
565565
smp_ops->give_timebase();
566566

567567
/* Wait until cpu puts itself in the online & active maps */
568-
while (!cpu_online(cpu) || !cpu_active(cpu))
568+
while (!cpu_online(cpu))
569569
cpu_relax();
570570

571571
return 0;

arch/s390/kernel/smp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -832,7 +832,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
832832
pcpu_attach_task(pcpu, tidle);
833833
pcpu_start_fn(pcpu, smp_start_secondary, NULL);
834834
/* Wait until cpu puts itself in the online & active maps */
835-
while (!cpu_online(cpu) || !cpu_active(cpu))
835+
while (!cpu_online(cpu))
836836
cpu_relax();
837837
return 0;
838838
}

include/linux/cpumask.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -743,12 +743,10 @@ set_cpu_present(unsigned int cpu, bool present)
743743
static inline void
744744
set_cpu_online(unsigned int cpu, bool online)
745745
{
746-
if (online) {
746+
if (online)
747747
cpumask_set_cpu(cpu, &__cpu_online_mask);
748-
cpumask_set_cpu(cpu, &__cpu_active_mask);
749-
} else {
748+
else
750749
cpumask_clear_cpu(cpu, &__cpu_online_mask);
751-
}
752750
}
753751

754752
static inline void

kernel/sched/core.c

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1082,13 +1082,21 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
10821082
static int __set_cpus_allowed_ptr(struct task_struct *p,
10831083
const struct cpumask *new_mask, bool check)
10841084
{
1085+
const struct cpumask *cpu_valid_mask = cpu_active_mask;
1086+
unsigned int dest_cpu;
10851087
unsigned long flags;
10861088
struct rq *rq;
1087-
unsigned int dest_cpu;
10881089
int ret = 0;
10891090

10901091
rq = task_rq_lock(p, &flags);
10911092

1093+
if (p->flags & PF_KTHREAD) {
1094+
/*
1095+
* Kernel threads are allowed on online && !active CPUs
1096+
*/
1097+
cpu_valid_mask = cpu_online_mask;
1098+
}
1099+
10921100
/*
10931101
* Must re-check here, to close a race against __kthread_bind(),
10941102
* sched_setaffinity() is not guaranteed to observe the flag.
@@ -1101,18 +1109,28 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
11011109
if (cpumask_equal(&p->cpus_allowed, new_mask))
11021110
goto out;
11031111

1104-
if (!cpumask_intersects(new_mask, cpu_active_mask)) {
1112+
if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
11051113
ret = -EINVAL;
11061114
goto out;
11071115
}
11081116

11091117
do_set_cpus_allowed(p, new_mask);
11101118

1119+
if (p->flags & PF_KTHREAD) {
1120+
/*
1121+
* For kernel threads that do indeed end up on online &&
1122+
* !active we want to ensure they are strict per-cpu threads.
1123+
*/
1124+
WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
1125+
!cpumask_intersects(new_mask, cpu_active_mask) &&
1126+
p->nr_cpus_allowed != 1);
1127+
}
1128+
11111129
/* Can the task run on the task's current CPU? If so, we're done */
11121130
if (cpumask_test_cpu(task_cpu(p), new_mask))
11131131
goto out;
11141132

1115-
dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
1133+
dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
11161134
if (task_running(rq, p) || p->state == TASK_WAKING) {
11171135
struct migration_arg arg = { p, dest_cpu };
11181136
/* Need help from migration thread: drop lock and wait. */
@@ -1431,6 +1449,25 @@ EXPORT_SYMBOL_GPL(kick_process);
14311449

14321450
/*
14331451
* ->cpus_allowed is protected by both rq->lock and p->pi_lock
1452+
*
1453+
* A few notes on cpu_active vs cpu_online:
1454+
*
1455+
* - cpu_active must be a subset of cpu_online
1456+
*
1457+
* - on cpu-up we allow per-cpu kthreads on the online && !active cpu,
1458+
* see __set_cpus_allowed_ptr(). At this point the newly online
1459+
* cpu isn't yet part of the sched domains, and balancing will not
1460+
* see it.
1461+
*
1462+
* - on cpu-down we clear cpu_active() to mask the sched domains and
1463+
* avoid the load balancer to place new tasks on the to be removed
1464+
* cpu. Existing tasks will remain running there and will be taken
1465+
* off.
1466+
*
1467+
* This means that fallback selection must not select !active CPUs.
1468+
* And can assume that any active CPU must be online. Conversely
1469+
* select_task_rq() below may allow selection of !active CPUs in order
1470+
* to satisfy the above rules.
14341471
*/
14351472
static int select_fallback_rq(int cpu, struct task_struct *p)
14361473
{
@@ -1449,8 +1486,6 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
14491486

14501487
/* Look for allowed, online CPU in same node. */
14511488
for_each_cpu(dest_cpu, nodemask) {
1452-
if (!cpu_online(dest_cpu))
1453-
continue;
14541489
if (!cpu_active(dest_cpu))
14551490
continue;
14561491
if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
@@ -1461,8 +1496,6 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
14611496
for (;;) {
14621497
/* Any allowed, online CPU? */
14631498
for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
1464-
if (!cpu_online(dest_cpu))
1465-
continue;
14661499
if (!cpu_active(dest_cpu))
14671500
continue;
14681501
goto out;
@@ -1514,6 +1547,8 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
15141547

15151548
if (p->nr_cpus_allowed > 1)
15161549
cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
1550+
else
1551+
cpu = cpumask_any(tsk_cpus_allowed(p));
15171552

15181553
/*
15191554
* In order not to call set_task_cpu() on a blocking task we need

0 commit comments

Comments
 (0)