Skip to content

Commit 11e5f40

Browse files
Waiman-Longhtejun
authored andcommitted
cgroup/cpuset: Keep track of CPUs in isolated partitions
Add a new internal isolated_cpus mask to keep track of the CPUs that are in isolated partitions. Expose that new cpumask as a new root-only control file ".cpuset.cpus.isolated". tj: Updated patch description to reflect dropping __DEBUG__ prefix. Signed-off-by: Waiman Long <[email protected]> Signed-off-by: Tejun Heo <[email protected]>
1 parent 14060df commit 11e5f40

File tree

1 file changed

+127
-63
lines changed

1 file changed

+127
-63
lines changed

kernel/cgroup/cpuset.c

Lines changed: 127 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,11 @@ struct cpuset {
204204
*/
205205
static cpumask_var_t subpartitions_cpus;
206206

207+
/*
208+
* Exclusive CPUs in isolated partitions
209+
*/
210+
static cpumask_var_t isolated_cpus;
211+
207212
/* List of remote partition root children */
208213
static struct list_head remote_children;
209214

@@ -1317,6 +1322,7 @@ static void compute_effective_cpumask(struct cpumask *new_cpus,
13171322
*/
13181323
enum partition_cmd {
13191324
partcmd_enable, /* Enable partition root */
1325+
partcmd_enablei, /* Enable isolated partition root */
13201326
partcmd_disable, /* Disable partition root */
13211327
partcmd_update, /* Update parent's effective_cpus */
13221328
partcmd_invalidate, /* Make partition invalid */
@@ -1418,6 +1424,74 @@ static void reset_partition_data(struct cpuset *cs)
14181424
}
14191425
}
14201426

1427+
/*
1428+
* partition_xcpus_newstate - Exclusive CPUs state change
1429+
* @old_prs: old partition_root_state
1430+
* @new_prs: new partition_root_state
1431+
* @xcpus: exclusive CPUs with state change
1432+
*/
1433+
static void partition_xcpus_newstate(int old_prs, int new_prs, struct cpumask *xcpus)
1434+
{
1435+
WARN_ON_ONCE(old_prs == new_prs);
1436+
if (new_prs == PRS_ISOLATED)
1437+
cpumask_or(isolated_cpus, isolated_cpus, xcpus);
1438+
else
1439+
cpumask_andnot(isolated_cpus, isolated_cpus, xcpus);
1440+
}
1441+
1442+
/*
1443+
* partition_xcpus_add - Add new exclusive CPUs to partition
1444+
* @new_prs: new partition_root_state
1445+
* @parent: parent cpuset
1446+
* @xcpus: exclusive CPUs to be added
1447+
*
1448+
* Remote partition if parent == NULL
1449+
*/
1450+
static void partition_xcpus_add(int new_prs, struct cpuset *parent,
1451+
struct cpumask *xcpus)
1452+
{
1453+
WARN_ON_ONCE(new_prs < 0);
1454+
lockdep_assert_held(&callback_lock);
1455+
if (!parent)
1456+
parent = &top_cpuset;
1457+
1458+
if (parent == &top_cpuset)
1459+
cpumask_or(subpartitions_cpus, subpartitions_cpus, xcpus);
1460+
1461+
if (new_prs != parent->partition_root_state)
1462+
partition_xcpus_newstate(parent->partition_root_state, new_prs,
1463+
xcpus);
1464+
1465+
cpumask_andnot(parent->effective_cpus, parent->effective_cpus, xcpus);
1466+
}
1467+
1468+
/*
1469+
* partition_xcpus_del - Remove exclusive CPUs from partition
1470+
* @old_prs: old partition_root_state
1471+
* @parent: parent cpuset
1472+
* @xcpus: exclusive CPUs to be removed
1473+
*
1474+
* Remote partition if parent == NULL
1475+
*/
1476+
static void partition_xcpus_del(int old_prs, struct cpuset *parent,
1477+
struct cpumask *xcpus)
1478+
{
1479+
WARN_ON_ONCE(old_prs < 0);
1480+
lockdep_assert_held(&callback_lock);
1481+
if (!parent)
1482+
parent = &top_cpuset;
1483+
1484+
if (parent == &top_cpuset)
1485+
cpumask_andnot(subpartitions_cpus, subpartitions_cpus, xcpus);
1486+
1487+
if (old_prs != parent->partition_root_state)
1488+
partition_xcpus_newstate(old_prs, parent->partition_root_state,
1489+
xcpus);
1490+
1491+
cpumask_and(xcpus, xcpus, cpu_active_mask);
1492+
cpumask_or(parent->effective_cpus, parent->effective_cpus, xcpus);
1493+
}
1494+
14211495
/*
14221496
* compute_effective_exclusive_cpumask - compute effective exclusive CPUs
14231497
* @cs: cpuset
@@ -1456,13 +1530,15 @@ static inline bool is_local_partition(struct cpuset *cs)
14561530
/*
14571531
* remote_partition_enable - Enable current cpuset as a remote partition root
14581532
* @cs: the cpuset to update
1533+
* @new_prs: new partition_root_state
14591534
* @tmp: temparary masks
14601535
* Return: 1 if successful, 0 if error
14611536
*
14621537
* Enable the current cpuset to become a remote partition root taking CPUs
14631538
* directly from the top cpuset. cpuset_mutex must be held by the caller.
14641539
*/
1465-
static int remote_partition_enable(struct cpuset *cs, struct tmpmasks *tmp)
1540+
static int remote_partition_enable(struct cpuset *cs, int new_prs,
1541+
struct tmpmasks *tmp)
14661542
{
14671543
/*
14681544
* The user must have sysadmin privilege.
@@ -1485,18 +1561,14 @@ static int remote_partition_enable(struct cpuset *cs, struct tmpmasks *tmp)
14851561
return 0;
14861562

14871563
spin_lock_irq(&callback_lock);
1488-
cpumask_andnot(top_cpuset.effective_cpus,
1489-
top_cpuset.effective_cpus, tmp->new_cpus);
1490-
cpumask_or(subpartitions_cpus,
1491-
subpartitions_cpus, tmp->new_cpus);
1492-
1564+
partition_xcpus_add(new_prs, NULL, tmp->new_cpus);
1565+
list_add(&cs->remote_sibling, &remote_children);
14931566
if (cs->use_parent_ecpus) {
14941567
struct cpuset *parent = parent_cs(cs);
14951568

14961569
cs->use_parent_ecpus = false;
14971570
parent->child_ecpus_count--;
14981571
}
1499-
list_add(&cs->remote_sibling, &remote_children);
15001572
spin_unlock_irq(&callback_lock);
15011573

15021574
/*
@@ -1524,13 +1596,8 @@ static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp)
15241596
WARN_ON_ONCE(!cpumask_subset(tmp->new_cpus, subpartitions_cpus));
15251597

15261598
spin_lock_irq(&callback_lock);
1527-
cpumask_andnot(subpartitions_cpus,
1528-
subpartitions_cpus, tmp->new_cpus);
1529-
cpumask_and(tmp->new_cpus,
1530-
tmp->new_cpus, cpu_active_mask);
1531-
cpumask_or(top_cpuset.effective_cpus,
1532-
top_cpuset.effective_cpus, tmp->new_cpus);
15331599
list_del_init(&cs->remote_sibling);
1600+
partition_xcpus_del(cs->partition_root_state, NULL, tmp->new_cpus);
15341601
cs->partition_root_state = -cs->partition_root_state;
15351602
if (!cs->prs_err)
15361603
cs->prs_err = PERR_INVCPUS;
@@ -1557,6 +1624,7 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask,
15571624
struct tmpmasks *tmp)
15581625
{
15591626
bool adding, deleting;
1627+
int prs = cs->partition_root_state;
15601628

15611629
if (WARN_ON_ONCE(!is_remote_partition(cs)))
15621630
return;
@@ -1580,20 +1648,10 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask,
15801648
goto invalidate;
15811649

15821650
spin_lock_irq(&callback_lock);
1583-
if (adding) {
1584-
cpumask_or(subpartitions_cpus,
1585-
subpartitions_cpus, tmp->addmask);
1586-
cpumask_andnot(top_cpuset.effective_cpus,
1587-
top_cpuset.effective_cpus, tmp->addmask);
1588-
}
1589-
if (deleting) {
1590-
cpumask_andnot(subpartitions_cpus,
1591-
subpartitions_cpus, tmp->delmask);
1592-
cpumask_and(tmp->delmask,
1593-
tmp->delmask, cpu_active_mask);
1594-
cpumask_or(top_cpuset.effective_cpus,
1595-
top_cpuset.effective_cpus, tmp->delmask);
1596-
}
1651+
if (adding)
1652+
partition_xcpus_add(prs, NULL, tmp->addmask);
1653+
if (deleting)
1654+
partition_xcpus_del(prs, NULL, tmp->delmask);
15971655
spin_unlock_irq(&callback_lock);
15981656

15991657
/*
@@ -1676,11 +1734,11 @@ static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus)
16761734
* @tmp: Temporary addmask and delmask
16771735
* Return: 0 or a partition root state error code
16781736
*
1679-
* For partcmd_enable, the cpuset is being transformed from a non-partition
1680-
* root to a partition root. The effective_xcpus (cpus_allowed if effective_xcpus
1681-
* not set) mask of the given cpuset will be taken away from parent's
1682-
* effective_cpus. The function will return 0 if all the CPUs listed in
1683-
* effective_xcpus can be granted or an error code will be returned.
1737+
* For partcmd_enable*, the cpuset is being transformed from a non-partition
1738+
* root to a partition root. The effective_xcpus (cpus_allowed if
1739+
* effective_xcpus not set) mask of the given cpuset will be taken away from
1740+
* parent's effective_cpus. The function will return 0 if all the CPUs listed
1741+
* in effective_xcpus can be granted or an error code will be returned.
16841742
*
16851743
* For partcmd_disable, the cpuset is being transformed from a partition
16861744
* root back to a non-partition root. Any CPUs in effective_xcpus will be
@@ -1695,7 +1753,7 @@ static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus)
16951753
*
16961754
* For partcmd_invalidate, the current partition will be made invalid.
16971755
*
1698-
* The partcmd_enable and partcmd_disable commands are used by
1756+
* The partcmd_enable* and partcmd_disable commands are used by
16991757
* update_prstate(). An error code may be returned and the caller will check
17001758
* for error.
17011759
*
@@ -1760,7 +1818,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
17601818

17611819
nocpu = tasks_nocpu_error(parent, cs, xcpus);
17621820

1763-
if (cmd == partcmd_enable) {
1821+
if ((cmd == partcmd_enable) || (cmd == partcmd_enablei)) {
17641822
/*
17651823
* Enabling partition root is not allowed if its
17661824
* effective_xcpus is empty or doesn't overlap with
@@ -1783,6 +1841,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
17831841
cpumask_copy(tmp->delmask, xcpus);
17841842
deleting = true;
17851843
subparts_delta++;
1844+
new_prs = (cmd == partcmd_enable) ? PRS_ROOT : PRS_ISOLATED;
17861845
} else if (cmd == partcmd_disable) {
17871846
/*
17881847
* May need to add cpus to parent's effective_cpus for
@@ -1792,6 +1851,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
17921851
cpumask_and(tmp->addmask, xcpus, parent->effective_xcpus);
17931852
if (adding)
17941853
subparts_delta--;
1854+
new_prs = PRS_MEMBER;
17951855
} else if (newmask) {
17961856
/*
17971857
* Empty cpumask is not allowed
@@ -1940,37 +2000,24 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
19402000
* newly deleted ones will be added back to effective_cpus.
19412001
*/
19422002
spin_lock_irq(&callback_lock);
1943-
if (adding) {
1944-
if (parent == &top_cpuset)
1945-
cpumask_andnot(subpartitions_cpus,
1946-
subpartitions_cpus, tmp->addmask);
1947-
/*
1948-
* Some of the CPUs in effective_xcpus might have been offlined.
1949-
*/
1950-
cpumask_or(parent->effective_cpus,
1951-
parent->effective_cpus, tmp->addmask);
1952-
cpumask_and(parent->effective_cpus,
1953-
parent->effective_cpus, cpu_active_mask);
1954-
}
1955-
if (deleting) {
1956-
if (parent == &top_cpuset)
1957-
cpumask_or(subpartitions_cpus,
1958-
subpartitions_cpus, tmp->delmask);
1959-
cpumask_andnot(parent->effective_cpus,
1960-
parent->effective_cpus, tmp->delmask);
1961-
}
1962-
1963-
if (is_partition_valid(parent)) {
1964-
parent->nr_subparts += subparts_delta;
1965-
WARN_ON_ONCE(parent->nr_subparts < 0);
1966-
}
1967-
19682003
if (old_prs != new_prs) {
19692004
cs->partition_root_state = new_prs;
19702005
if (new_prs <= 0)
19712006
cs->nr_subparts = 0;
19722007
}
2008+
/*
2009+
* Adding to parent's effective_cpus means deletion CPUs from cs
2010+
* and vice versa.
2011+
*/
2012+
if (adding)
2013+
partition_xcpus_del(old_prs, parent, tmp->addmask);
2014+
if (deleting)
2015+
partition_xcpus_add(new_prs, parent, tmp->delmask);
19732016

2017+
if (is_partition_valid(parent)) {
2018+
parent->nr_subparts += subparts_delta;
2019+
WARN_ON_ONCE(parent->nr_subparts < 0);
2020+
}
19742021
spin_unlock_irq(&callback_lock);
19752022

19762023
if ((old_prs != new_prs) && (cmd == partcmd_update))
@@ -2948,6 +2995,7 @@ static int update_prstate(struct cpuset *cs, int new_prs)
29482995
int err = PERR_NONE, old_prs = cs->partition_root_state;
29492996
struct cpuset *parent = parent_cs(cs);
29502997
struct tmpmasks tmpmask;
2998+
bool new_xcpus_state = false;
29512999

29523000
if (old_prs == new_prs)
29533001
return 0;
@@ -2977,6 +3025,9 @@ static int update_prstate(struct cpuset *cs, int new_prs)
29773025
goto out;
29783026

29793027
if (!old_prs) {
3028+
enum partition_cmd cmd = (new_prs == PRS_ROOT)
3029+
? partcmd_enable : partcmd_enablei;
3030+
29803031
/*
29813032
* cpus_allowed cannot be empty.
29823033
*/
@@ -2985,19 +3036,18 @@ static int update_prstate(struct cpuset *cs, int new_prs)
29853036
goto out;
29863037
}
29873038

2988-
err = update_parent_effective_cpumask(cs, partcmd_enable,
2989-
NULL, &tmpmask);
3039+
err = update_parent_effective_cpumask(cs, cmd, NULL, &tmpmask);
29903040
/*
29913041
* If an attempt to become local partition root fails,
29923042
* try to become a remote partition root instead.
29933043
*/
2994-
if (err && remote_partition_enable(cs, &tmpmask))
3044+
if (err && remote_partition_enable(cs, new_prs, &tmpmask))
29953045
err = 0;
29963046
} else if (old_prs && new_prs) {
29973047
/*
29983048
* A change in load balance state only, no change in cpumasks.
29993049
*/
3000-
;
3050+
new_xcpus_state = true;
30013051
} else {
30023052
/*
30033053
* Switching back to member is always allowed even if it
@@ -3029,6 +3079,8 @@ static int update_prstate(struct cpuset *cs, int new_prs)
30293079
WRITE_ONCE(cs->prs_err, err);
30303080
if (!is_partition_valid(cs))
30313081
reset_partition_data(cs);
3082+
else if (new_xcpus_state)
3083+
partition_xcpus_newstate(old_prs, new_prs, cs->effective_xcpus);
30323084
spin_unlock_irq(&callback_lock);
30333085

30343086
/* Force update if switching back to member */
@@ -3386,6 +3438,7 @@ typedef enum {
33863438
FILE_SUBPARTS_CPULIST,
33873439
FILE_EXCLUSIVE_CPULIST,
33883440
FILE_EFFECTIVE_XCPULIST,
3441+
FILE_ISOLATED_CPULIST,
33893442
FILE_CPU_EXCLUSIVE,
33903443
FILE_MEM_EXCLUSIVE,
33913444
FILE_MEM_HARDWALL,
@@ -3582,6 +3635,9 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
35823635
case FILE_SUBPARTS_CPULIST:
35833636
seq_printf(sf, "%*pbl\n", cpumask_pr_args(subpartitions_cpus));
35843637
break;
3638+
case FILE_ISOLATED_CPULIST:
3639+
seq_printf(sf, "%*pbl\n", cpumask_pr_args(isolated_cpus));
3640+
break;
35853641
default:
35863642
ret = -EINVAL;
35873643
}
@@ -3875,6 +3931,13 @@ static struct cftype dfl_files[] = {
38753931
.flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_DEBUG,
38763932
},
38773933

3934+
{
3935+
.name = "cpus.isolated",
3936+
.seq_show = cpuset_common_seq_show,
3937+
.private = FILE_ISOLATED_CPULIST,
3938+
.flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_DEBUG,
3939+
},
3940+
38783941
{ } /* terminate */
38793942
};
38803943

@@ -4194,6 +4257,7 @@ int __init cpuset_init(void)
41944257
BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_xcpus, GFP_KERNEL));
41954258
BUG_ON(!alloc_cpumask_var(&top_cpuset.exclusive_cpus, GFP_KERNEL));
41964259
BUG_ON(!zalloc_cpumask_var(&subpartitions_cpus, GFP_KERNEL));
4260+
BUG_ON(!zalloc_cpumask_var(&isolated_cpus, GFP_KERNEL));
41974261

41984262
cpumask_setall(top_cpuset.cpus_allowed);
41994263
nodes_setall(top_cpuset.mems_allowed);

0 commit comments

Comments
 (0)