@@ -416,6 +416,41 @@ static inline bool is_in_v2_mode(void)
416416 (cpuset_cgrp_subsys .root -> flags & CGRP_ROOT_CPUSET_V2_MODE );
417417}
418418
419+ /**
420+ * partition_is_populated - check if partition has tasks
421+ * @cs: partition root to be checked
422+ * @excluded_child: a child cpuset to be excluded in task checking
423+ * Return: true if there are tasks, false otherwise
424+ *
425+ * It is assumed that @cs is a valid partition root. @excluded_child should
426+ * be non-NULL when this cpuset is going to become a partition itself.
427+ */
428+ static inline bool partition_is_populated (struct cpuset * cs ,
429+ struct cpuset * excluded_child )
430+ {
431+ struct cgroup_subsys_state * css ;
432+ struct cpuset * child ;
433+
434+ if (cs -> css .cgroup -> nr_populated_csets )
435+ return true;
436+ if (!excluded_child && !cs -> nr_subparts_cpus )
437+ return cgroup_is_populated (cs -> css .cgroup );
438+
439+ rcu_read_lock ();
440+ cpuset_for_each_child (child , css , cs ) {
441+ if (child == excluded_child )
442+ continue ;
443+ if (is_partition_valid (child ))
444+ continue ;
445+ if (cgroup_is_populated (child -> css .cgroup )) {
446+ rcu_read_unlock ();
447+ return true;
448+ }
449+ }
450+ rcu_read_unlock ();
451+ return false;
452+ }
453+
419454/*
420455 * Return in pmask the portion of a task's cpusets's cpus_allowed that
421456 * are online and are capable of running the task. If none are found,
@@ -1257,22 +1292,27 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
12571292 return - EBUSY ;
12581293
12591294 /*
1260- * Enabling partition root is not allowed if not all the CPUs
1261- * can be granted from parent's effective_cpus or at least one
1262- * CPU will be left after that.
1263- */
1264- if ((cmd == partcmd_enable ) &&
1265- (!cpumask_subset (cs -> cpus_allowed , parent -> effective_cpus ) ||
1266- cpumask_equal (cs -> cpus_allowed , parent -> effective_cpus )))
1267- return - EINVAL ;
1268-
1269- /*
1270- * A cpumask update cannot make parent's effective_cpus become empty.
12711295 * new_prs will only be changed for the partcmd_update command.
12721296 */
12731297 adding = deleting = false;
12741298 old_prs = new_prs = cs -> partition_root_state ;
12751299 if (cmd == partcmd_enable ) {
1300+ /*
1301+ * Enabling partition root is not allowed if not all the CPUs
1302+ * can be granted from parent's effective_cpus.
1303+ */
1304+ if (!cpumask_subset (cs -> cpus_allowed , parent -> effective_cpus ))
1305+ return - EINVAL ;
1306+
1307+ /*
1308+ * A parent can be left with no CPU as long as there is no
1309+ * task directly associated with the parent partition. For
1310+ * such a parent, no new task can be moved into it.
1311+ */
1312+ if (cpumask_equal (cs -> cpus_allowed , parent -> effective_cpus ) &&
1313+ partition_is_populated (parent , cs ))
1314+ return - EINVAL ;
1315+
12761316 cpumask_copy (tmp -> addmask , cs -> cpus_allowed );
12771317 adding = true;
12781318 } else if (cmd == partcmd_disable ) {
@@ -1294,10 +1334,12 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
12941334 adding = cpumask_andnot (tmp -> addmask , tmp -> addmask ,
12951335 parent -> subparts_cpus );
12961336 /*
1297- * Return error if the new effective_cpus could become empty.
1337+ * Return error if the new effective_cpus could become empty
1338+ * and there are tasks in the parent.
12981339 */
12991340 if (adding &&
1300- cpumask_equal (parent -> effective_cpus , tmp -> addmask )) {
1341+ cpumask_equal (parent -> effective_cpus , tmp -> addmask ) &&
1342+ partition_is_populated (parent , cs )) {
13011343 if (!deleting )
13021344 return - EINVAL ;
13031345 /*
@@ -1322,8 +1364,8 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
13221364 */
13231365 adding = cpumask_and (tmp -> addmask , cs -> cpus_allowed ,
13241366 parent -> effective_cpus );
1325- part_error = cpumask_equal (tmp -> addmask ,
1326- parent -> effective_cpus );
1367+ part_error = cpumask_equal (tmp -> addmask , parent -> effective_cpus ) &&
1368+ partition_is_populated ( parent , cs );
13271369 }
13281370
13291371 if (cmd == partcmd_update ) {
@@ -1425,9 +1467,15 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
14251467
14261468 /*
14271469 * If it becomes empty, inherit the effective mask of the
1428- * parent, which is guaranteed to have some CPUs.
1470+ * parent, which is guaranteed to have some CPUs unless
1471+ * it is a partition root that has explicitly distributed
1472+ * out all its CPUs.
14291473 */
14301474 if (is_in_v2_mode () && cpumask_empty (tmp -> new_cpus )) {
1475+ if (is_partition_valid (cp ) &&
1476+ cpumask_equal (cp -> cpus_allowed , cp -> subparts_cpus ))
1477+ goto update_parent_subparts ;
1478+
14311479 cpumask_copy (tmp -> new_cpus , parent -> effective_cpus );
14321480 if (!cp -> use_parent_ecpus ) {
14331481 cp -> use_parent_ecpus = true;
@@ -1449,6 +1497,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
14491497 continue ;
14501498 }
14511499
1500+ update_parent_subparts :
14521501 /*
14531502 * update_parent_subparts_cpumask() should have been called
14541503 * for cs already in update_cpumask(). We should also call
@@ -2254,6 +2303,12 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
22542303 (cpumask_empty (cs -> cpus_allowed ) || nodes_empty (cs -> mems_allowed )))
22552304 goto out_unlock ;
22562305
2306+ /*
2307+ * Task cannot be moved to a cpuset with empty effective cpus.
2308+ */
2309+ if (cpumask_empty (cs -> effective_cpus ))
2310+ goto out_unlock ;
2311+
22572312 cgroup_taskset_for_each (task , css , tset ) {
22582313 ret = task_can_attach (task , cs -> effective_cpus );
22592314 if (ret )
@@ -3119,7 +3174,8 @@ hotplug_update_tasks(struct cpuset *cs,
31193174 struct cpumask * new_cpus , nodemask_t * new_mems ,
31203175 bool cpus_updated , bool mems_updated )
31213176{
3122- if (cpumask_empty (new_cpus ))
3177+ /* A partition root is allowed to have empty effective cpus */
3178+ if (cpumask_empty (new_cpus ) && !is_partition_valid (cs ))
31233179 cpumask_copy (new_cpus , parent_cs (cs )-> effective_cpus );
31243180 if (nodes_empty (* new_mems ))
31253181 * new_mems = parent_cs (cs )-> effective_mems ;
@@ -3188,10 +3244,11 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp)
31883244
31893245 /*
31903246 * In the unlikely event that a partition root has empty
3191- * effective_cpus or its parent becomes invalid, we have to
3192- * transition it to the invalid state.
3247+ * effective_cpus with tasks or its parent becomes invalid, we
3248+ * have to transition it to the invalid state.
31933249 */
3194- if (is_partition_valid (cs ) && (cpumask_empty (& new_cpus ) ||
3250+ if (is_partition_valid (cs ) &&
3251+ ((cpumask_empty (& new_cpus ) && partition_is_populated (cs , NULL )) ||
31953252 is_partition_invalid (parent ))) {
31963253 if (cs -> nr_subparts_cpus ) {
31973254 spin_lock_irq (& callback_lock );
@@ -3202,13 +3259,15 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp)
32023259 }
32033260
32043261 /*
3205- * If the effective_cpus is empty because the child
3206- * partitions take away all the CPUs, we can keep
3207- * the current partition and let the child partitions
3208- * fight for available CPUs.
3262+ * Force the partition to become invalid if either one of
3263+ * the following conditions hold:
3264+ * 1) empty effective cpus but not valid empty partition.
3265+ * 2) parent is invalid or doesn't grant any cpus to child
3266+ * partitions.
32093267 */
32103268 if (is_partition_invalid (parent ) ||
3211- cpumask_empty (& new_cpus )) {
3269+ (cpumask_empty (& new_cpus ) &&
3270+ partition_is_populated (cs , NULL ))) {
32123271 int old_prs ;
32133272
32143273 update_parent_subparts_cpumask (cs , partcmd_disable ,
0 commit comments