@@ -204,6 +204,11 @@ struct cpuset {
204204 */
205205static cpumask_var_t subpartitions_cpus ;
206206
207+ /*
208+ * Exclusive CPUs in isolated partitions
209+ */
210+ static cpumask_var_t isolated_cpus ;
211+
207212/* List of remote partition root children */
208213static struct list_head remote_children ;
209214
@@ -1317,6 +1322,7 @@ static void compute_effective_cpumask(struct cpumask *new_cpus,
13171322 */
13181323enum partition_cmd {
13191324 partcmd_enable , /* Enable partition root */
1325+ partcmd_enablei , /* Enable isolated partition root */
13201326 partcmd_disable , /* Disable partition root */
13211327 partcmd_update , /* Update parent's effective_cpus */
13221328 partcmd_invalidate , /* Make partition invalid */
@@ -1418,6 +1424,74 @@ static void reset_partition_data(struct cpuset *cs)
14181424 }
14191425}
14201426
1427+ /*
1428+ * partition_xcpus_newstate - Exclusive CPUs state change
1429+ * @old_prs: old partition_root_state
1430+ * @new_prs: new partition_root_state
1431+ * @xcpus: exclusive CPUs with state change
1432+ */
1433+ static void partition_xcpus_newstate (int old_prs , int new_prs , struct cpumask * xcpus )
1434+ {
1435+ WARN_ON_ONCE (old_prs == new_prs );
1436+ if (new_prs == PRS_ISOLATED )
1437+ cpumask_or (isolated_cpus , isolated_cpus , xcpus );
1438+ else
1439+ cpumask_andnot (isolated_cpus , isolated_cpus , xcpus );
1440+ }
1441+
1442+ /*
1443+ * partition_xcpus_add - Add new exclusive CPUs to partition
1444+ * @new_prs: new partition_root_state
1445+ * @parent: parent cpuset
1446+ * @xcpus: exclusive CPUs to be added
1447+ *
1448+ * Remote partition if parent == NULL
1449+ */
1450+ static void partition_xcpus_add (int new_prs , struct cpuset * parent ,
1451+ struct cpumask * xcpus )
1452+ {
1453+ WARN_ON_ONCE (new_prs < 0 );
1454+ lockdep_assert_held (& callback_lock );
1455+ if (!parent )
1456+ parent = & top_cpuset ;
1457+
1458+ if (parent == & top_cpuset )
1459+ cpumask_or (subpartitions_cpus , subpartitions_cpus , xcpus );
1460+
1461+ if (new_prs != parent -> partition_root_state )
1462+ partition_xcpus_newstate (parent -> partition_root_state , new_prs ,
1463+ xcpus );
1464+
1465+ cpumask_andnot (parent -> effective_cpus , parent -> effective_cpus , xcpus );
1466+ }
1467+
1468+ /*
1469+ * partition_xcpus_del - Remove exclusive CPUs from partition
1470+ * @old_prs: old partition_root_state
1471+ * @parent: parent cpuset
1472+ * @xcpus: exclusive CPUs to be removed
1473+ *
1474+ * Remote partition if parent == NULL
1475+ */
1476+ static void partition_xcpus_del (int old_prs , struct cpuset * parent ,
1477+ struct cpumask * xcpus )
1478+ {
1479+ WARN_ON_ONCE (old_prs < 0 );
1480+ lockdep_assert_held (& callback_lock );
1481+ if (!parent )
1482+ parent = & top_cpuset ;
1483+
1484+ if (parent == & top_cpuset )
1485+ cpumask_andnot (subpartitions_cpus , subpartitions_cpus , xcpus );
1486+
1487+ if (old_prs != parent -> partition_root_state )
1488+ partition_xcpus_newstate (old_prs , parent -> partition_root_state ,
1489+ xcpus );
1490+
1491+ cpumask_and (xcpus , xcpus , cpu_active_mask );
1492+ cpumask_or (parent -> effective_cpus , parent -> effective_cpus , xcpus );
1493+ }
1494+
14211495/*
14221496 * compute_effective_exclusive_cpumask - compute effective exclusive CPUs
14231497 * @cs: cpuset
@@ -1456,13 +1530,15 @@ static inline bool is_local_partition(struct cpuset *cs)
14561530/*
14571531 * remote_partition_enable - Enable current cpuset as a remote partition root
14581532 * @cs: the cpuset to update
1533+ * @new_prs: new partition_root_state
14591534 * @tmp: temparary masks
14601535 * Return: 1 if successful, 0 if error
14611536 *
14621537 * Enable the current cpuset to become a remote partition root taking CPUs
14631538 * directly from the top cpuset. cpuset_mutex must be held by the caller.
14641539 */
1465- static int remote_partition_enable (struct cpuset * cs , struct tmpmasks * tmp )
1540+ static int remote_partition_enable (struct cpuset * cs , int new_prs ,
1541+ struct tmpmasks * tmp )
14661542{
14671543 /*
14681544 * The user must have sysadmin privilege.
@@ -1485,18 +1561,14 @@ static int remote_partition_enable(struct cpuset *cs, struct tmpmasks *tmp)
14851561 return 0 ;
14861562
14871563 spin_lock_irq (& callback_lock );
1488- cpumask_andnot (top_cpuset .effective_cpus ,
1489- top_cpuset .effective_cpus , tmp -> new_cpus );
1490- cpumask_or (subpartitions_cpus ,
1491- subpartitions_cpus , tmp -> new_cpus );
1492-
1564+ partition_xcpus_add (new_prs , NULL , tmp -> new_cpus );
1565+ list_add (& cs -> remote_sibling , & remote_children );
14931566 if (cs -> use_parent_ecpus ) {
14941567 struct cpuset * parent = parent_cs (cs );
14951568
14961569 cs -> use_parent_ecpus = false;
14971570 parent -> child_ecpus_count -- ;
14981571 }
1499- list_add (& cs -> remote_sibling , & remote_children );
15001572 spin_unlock_irq (& callback_lock );
15011573
15021574 /*
@@ -1524,13 +1596,8 @@ static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp)
15241596 WARN_ON_ONCE (!cpumask_subset (tmp -> new_cpus , subpartitions_cpus ));
15251597
15261598 spin_lock_irq (& callback_lock );
1527- cpumask_andnot (subpartitions_cpus ,
1528- subpartitions_cpus , tmp -> new_cpus );
1529- cpumask_and (tmp -> new_cpus ,
1530- tmp -> new_cpus , cpu_active_mask );
1531- cpumask_or (top_cpuset .effective_cpus ,
1532- top_cpuset .effective_cpus , tmp -> new_cpus );
15331599 list_del_init (& cs -> remote_sibling );
1600+ partition_xcpus_del (cs -> partition_root_state , NULL , tmp -> new_cpus );
15341601 cs -> partition_root_state = - cs -> partition_root_state ;
15351602 if (!cs -> prs_err )
15361603 cs -> prs_err = PERR_INVCPUS ;
@@ -1557,6 +1624,7 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask,
15571624 struct tmpmasks * tmp )
15581625{
15591626 bool adding , deleting ;
1627+ int prs = cs -> partition_root_state ;
15601628
15611629 if (WARN_ON_ONCE (!is_remote_partition (cs )))
15621630 return ;
@@ -1580,20 +1648,10 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask,
15801648 goto invalidate ;
15811649
15821650 spin_lock_irq (& callback_lock );
1583- if (adding ) {
1584- cpumask_or (subpartitions_cpus ,
1585- subpartitions_cpus , tmp -> addmask );
1586- cpumask_andnot (top_cpuset .effective_cpus ,
1587- top_cpuset .effective_cpus , tmp -> addmask );
1588- }
1589- if (deleting ) {
1590- cpumask_andnot (subpartitions_cpus ,
1591- subpartitions_cpus , tmp -> delmask );
1592- cpumask_and (tmp -> delmask ,
1593- tmp -> delmask , cpu_active_mask );
1594- cpumask_or (top_cpuset .effective_cpus ,
1595- top_cpuset .effective_cpus , tmp -> delmask );
1596- }
1651+ if (adding )
1652+ partition_xcpus_add (prs , NULL , tmp -> addmask );
1653+ if (deleting )
1654+ partition_xcpus_del (prs , NULL , tmp -> delmask );
15971655 spin_unlock_irq (& callback_lock );
15981656
15991657 /*
@@ -1676,11 +1734,11 @@ static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus)
16761734 * @tmp: Temporary addmask and delmask
16771735 * Return: 0 or a partition root state error code
16781736 *
1679- * For partcmd_enable, the cpuset is being transformed from a non-partition
1680- * root to a partition root. The effective_xcpus (cpus_allowed if effective_xcpus
1681- * not set) mask of the given cpuset will be taken away from parent's
1682- * effective_cpus. The function will return 0 if all the CPUs listed in
1683- * effective_xcpus can be granted or an error code will be returned.
1737+ * For partcmd_enable* , the cpuset is being transformed from a non-partition
1738+ * root to a partition root. The effective_xcpus (cpus_allowed if
1739+ * effective_xcpus not set) mask of the given cpuset will be taken away from
1740+ * parent's effective_cpus. The function will return 0 if all the CPUs listed
1741+ * in effective_xcpus can be granted or an error code will be returned.
16841742 *
16851743 * For partcmd_disable, the cpuset is being transformed from a partition
16861744 * root back to a non-partition root. Any CPUs in effective_xcpus will be
@@ -1695,7 +1753,7 @@ static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus)
16951753 *
16961754 * For partcmd_invalidate, the current partition will be made invalid.
16971755 *
1698- * The partcmd_enable and partcmd_disable commands are used by
1756+ * The partcmd_enable* and partcmd_disable commands are used by
16991757 * update_prstate(). An error code may be returned and the caller will check
17001758 * for error.
17011759 *
@@ -1760,7 +1818,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
17601818
17611819 nocpu = tasks_nocpu_error (parent , cs , xcpus );
17621820
1763- if (cmd == partcmd_enable ) {
1821+ if (( cmd == partcmd_enable ) || ( cmd == partcmd_enablei ) ) {
17641822 /*
17651823 * Enabling partition root is not allowed if its
17661824 * effective_xcpus is empty or doesn't overlap with
@@ -1783,6 +1841,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
17831841 cpumask_copy (tmp -> delmask , xcpus );
17841842 deleting = true;
17851843 subparts_delta ++ ;
1844+ new_prs = (cmd == partcmd_enable ) ? PRS_ROOT : PRS_ISOLATED ;
17861845 } else if (cmd == partcmd_disable ) {
17871846 /*
17881847 * May need to add cpus to parent's effective_cpus for
@@ -1792,6 +1851,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
17921851 cpumask_and (tmp -> addmask , xcpus , parent -> effective_xcpus );
17931852 if (adding )
17941853 subparts_delta -- ;
1854+ new_prs = PRS_MEMBER ;
17951855 } else if (newmask ) {
17961856 /*
17971857 * Empty cpumask is not allowed
@@ -1940,37 +2000,24 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
19402000 * newly deleted ones will be added back to effective_cpus.
19412001 */
19422002 spin_lock_irq (& callback_lock );
1943- if (adding ) {
1944- if (parent == & top_cpuset )
1945- cpumask_andnot (subpartitions_cpus ,
1946- subpartitions_cpus , tmp -> addmask );
1947- /*
1948- * Some of the CPUs in effective_xcpus might have been offlined.
1949- */
1950- cpumask_or (parent -> effective_cpus ,
1951- parent -> effective_cpus , tmp -> addmask );
1952- cpumask_and (parent -> effective_cpus ,
1953- parent -> effective_cpus , cpu_active_mask );
1954- }
1955- if (deleting ) {
1956- if (parent == & top_cpuset )
1957- cpumask_or (subpartitions_cpus ,
1958- subpartitions_cpus , tmp -> delmask );
1959- cpumask_andnot (parent -> effective_cpus ,
1960- parent -> effective_cpus , tmp -> delmask );
1961- }
1962-
1963- if (is_partition_valid (parent )) {
1964- parent -> nr_subparts += subparts_delta ;
1965- WARN_ON_ONCE (parent -> nr_subparts < 0 );
1966- }
1967-
19682003 if (old_prs != new_prs ) {
19692004 cs -> partition_root_state = new_prs ;
19702005 if (new_prs <= 0 )
19712006 cs -> nr_subparts = 0 ;
19722007 }
2008+ /*
2009+ * Adding to parent's effective_cpus means deletion CPUs from cs
2010+ * and vice versa.
2011+ */
2012+ if (adding )
2013+ partition_xcpus_del (old_prs , parent , tmp -> addmask );
2014+ if (deleting )
2015+ partition_xcpus_add (new_prs , parent , tmp -> delmask );
19732016
2017+ if (is_partition_valid (parent )) {
2018+ parent -> nr_subparts += subparts_delta ;
2019+ WARN_ON_ONCE (parent -> nr_subparts < 0 );
2020+ }
19742021 spin_unlock_irq (& callback_lock );
19752022
19762023 if ((old_prs != new_prs ) && (cmd == partcmd_update ))
@@ -2948,6 +2995,7 @@ static int update_prstate(struct cpuset *cs, int new_prs)
29482995 int err = PERR_NONE , old_prs = cs -> partition_root_state ;
29492996 struct cpuset * parent = parent_cs (cs );
29502997 struct tmpmasks tmpmask ;
2998+ bool new_xcpus_state = false;
29512999
29523000 if (old_prs == new_prs )
29533001 return 0 ;
@@ -2977,6 +3025,9 @@ static int update_prstate(struct cpuset *cs, int new_prs)
29773025 goto out ;
29783026
29793027 if (!old_prs ) {
3028+ enum partition_cmd cmd = (new_prs == PRS_ROOT )
3029+ ? partcmd_enable : partcmd_enablei ;
3030+
29803031 /*
29813032 * cpus_allowed cannot be empty.
29823033 */
@@ -2985,19 +3036,18 @@ static int update_prstate(struct cpuset *cs, int new_prs)
29853036 goto out ;
29863037 }
29873038
2988- err = update_parent_effective_cpumask (cs , partcmd_enable ,
2989- NULL , & tmpmask );
3039+ err = update_parent_effective_cpumask (cs , cmd , NULL , & tmpmask );
29903040 /*
29913041 * If an attempt to become local partition root fails,
29923042 * try to become a remote partition root instead.
29933043 */
2994- if (err && remote_partition_enable (cs , & tmpmask ))
3044+ if (err && remote_partition_enable (cs , new_prs , & tmpmask ))
29953045 err = 0 ;
29963046 } else if (old_prs && new_prs ) {
29973047 /*
29983048 * A change in load balance state only, no change in cpumasks.
29993049 */
3000- ;
3050+ new_xcpus_state = true ;
30013051 } else {
30023052 /*
30033053 * Switching back to member is always allowed even if it
@@ -3029,6 +3079,8 @@ static int update_prstate(struct cpuset *cs, int new_prs)
30293079 WRITE_ONCE (cs -> prs_err , err );
30303080 if (!is_partition_valid (cs ))
30313081 reset_partition_data (cs );
3082+ else if (new_xcpus_state )
3083+ partition_xcpus_newstate (old_prs , new_prs , cs -> effective_xcpus );
30323084 spin_unlock_irq (& callback_lock );
30333085
30343086 /* Force update if switching back to member */
@@ -3386,6 +3438,7 @@ typedef enum {
33863438 FILE_SUBPARTS_CPULIST ,
33873439 FILE_EXCLUSIVE_CPULIST ,
33883440 FILE_EFFECTIVE_XCPULIST ,
3441+ FILE_ISOLATED_CPULIST ,
33893442 FILE_CPU_EXCLUSIVE ,
33903443 FILE_MEM_EXCLUSIVE ,
33913444 FILE_MEM_HARDWALL ,
@@ -3582,6 +3635,9 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
35823635 case FILE_SUBPARTS_CPULIST :
35833636 seq_printf (sf , "%*pbl\n" , cpumask_pr_args (subpartitions_cpus ));
35843637 break ;
3638+ case FILE_ISOLATED_CPULIST :
3639+ seq_printf (sf , "%*pbl\n" , cpumask_pr_args (isolated_cpus ));
3640+ break ;
35853641 default :
35863642 ret = - EINVAL ;
35873643 }
@@ -3875,6 +3931,13 @@ static struct cftype dfl_files[] = {
38753931 .flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_DEBUG ,
38763932 },
38773933
3934+ {
3935+ .name = "cpus.isolated" ,
3936+ .seq_show = cpuset_common_seq_show ,
3937+ .private = FILE_ISOLATED_CPULIST ,
3938+ .flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_DEBUG ,
3939+ },
3940+
38783941 { } /* terminate */
38793942};
38803943
@@ -4194,6 +4257,7 @@ int __init cpuset_init(void)
41944257 BUG_ON (!alloc_cpumask_var (& top_cpuset .effective_xcpus , GFP_KERNEL ));
41954258 BUG_ON (!alloc_cpumask_var (& top_cpuset .exclusive_cpus , GFP_KERNEL ));
41964259 BUG_ON (!zalloc_cpumask_var (& subpartitions_cpus , GFP_KERNEL ));
4260+ BUG_ON (!zalloc_cpumask_var (& isolated_cpus , GFP_KERNEL ));
41974261
41984262 cpumask_setall (top_cpuset .cpus_allowed );
41994263 nodes_setall (top_cpuset .mems_allowed );
0 commit comments