Skip to content

Commit bf9e08c

Browse files
committed
Merge: cgroup: Backport upstream cgroup commits up to v6.5
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/3346 JIRA: https://issues.redhat.com/browse/RHEL-16027 MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/3346 This MR backports upstream RCU commits up to v6.5 with related fixes, if applicable. It also pulls a number of relevant kernfs commits because of the dependency of some of the cgroup commits. In particular, it also backports 6a010a4 ("cgroup: Make !percpu threadgroup_rwsem operations optional"). This commit may slow down workloads that perform frequent cgroup configuration changes like OCP. As a result, commit 9b81d3a ("cgroup: add cgroup_favordynmods= command-line option") from v6.7 is also backported to provide a way out by favoring dynmaic cgroup modification using the "cgroup_favordynmods=y" boot command line option. Signed-off-by: Waiman Long <[email protected]> Approved-by: Phil Auld <[email protected]> Approved-by: Rafael Aquini <[email protected]> Signed-off-by: Scott Weaver <[email protected]>
2 parents c3628a0 + 6aa0b59 commit bf9e08c

File tree

22 files changed

+542
-299
lines changed

22 files changed

+542
-299
lines changed

Documentation/admin-guide/cgroup-v2.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,14 @@ cgroup v2 currently supports the following mount options.
184184
ignored on non-init namespace mounts. Please refer to the
185185
Delegation section for details.
186186

187+
favordynmods
188+
Reduce the latencies of dynamic cgroup modifications such as
189+
task migrations and controller on/offs at the cost of making
190+
hot path operations such as forks and exits more expensive.
191+
The static usage pattern of creating a cgroup, enabling
192+
controllers, and then seeding it with CLONE_INTO_CGROUP is
193+
not affected by this option.
194+
187195
memory_localevents
188196
Only populate memory.events with data for the current cgroup,
189197
and not any subtrees. This is legacy behaviour, the default

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,10 @@
580580
named mounts. Specifying both "all" and "named" disables
581581
all v1 hierarchies.
582582

583+
cgroup_favordynmods= [KNL] Enable or Disable favordynmods.
584+
Format: { "true" | "false" }
585+
Defaults to the value of CONFIG_CGROUP_FAVOR_DYNMODS.
586+
583587
cgroup.memory= [KNL] Pass options to the cgroup memory controller.
584588
Format: <string>
585589
nosocket -- Disable socket memory accounting.

fs/kernfs/dir.c

Lines changed: 78 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,15 @@ static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */
2323

2424
#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
2525

26+
static bool __kernfs_active(struct kernfs_node *kn)
27+
{
28+
return atomic_read(&kn->active) >= 0;
29+
}
30+
2631
static bool kernfs_active(struct kernfs_node *kn)
2732
{
2833
lockdep_assert_held(&kernfs_root(kn)->kernfs_rwsem);
29-
return atomic_read(&kn->active) >= 0;
34+
return __kernfs_active(kn);
3035
}
3136

3237
static bool kernfs_lockdep(struct kernfs_node *kn)
@@ -464,6 +469,16 @@ static void kernfs_drain(struct kernfs_node *kn)
464469
lockdep_assert_held_write(&root->kernfs_rwsem);
465470
WARN_ON_ONCE(kernfs_active(kn));
466471

472+
/*
473+
* Skip draining if already fully drained. This avoids draining and its
474+
* lockdep annotations for nodes which have never been activated
475+
* allowing embedding kernfs_remove() in create error paths without
476+
* worrying about draining.
477+
*/
478+
if (atomic_read(&kn->active) == KN_DEACTIVATED_BIAS &&
479+
!kernfs_should_drain_open_files(kn))
480+
return;
481+
467482
up_write(&root->kernfs_rwsem);
468483

469484
if (kernfs_lockdep(kn)) {
@@ -472,7 +487,6 @@ static void kernfs_drain(struct kernfs_node *kn)
472487
lock_contended(&kn->dep_map, _RET_IP_);
473488
}
474489

475-
/* but everyone should wait for draining */
476490
wait_event(root->deactivate_waitq,
477491
atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
478492

@@ -481,7 +495,8 @@ static void kernfs_drain(struct kernfs_node *kn)
481495
rwsem_release(&kn->dep_map, _RET_IP_);
482496
}
483497

484-
kernfs_drain_open_files(kn);
498+
if (kernfs_should_drain_open_files(kn))
499+
kernfs_drain_open_files(kn);
485500

486501
down_write(&root->kernfs_rwsem);
487502
}
@@ -688,12 +703,11 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
688703
}
689704

690705
/*
691-
* ACTIVATED is protected with kernfs_mutex but it was clear when
692-
* @kn was added to idr and we just wanna see it set. No need to
693-
* grab kernfs_mutex.
706+
* We should fail if @kn has never been activated and guarantee success
707+
* if the caller knows that @kn is active. Both can be achieved by
708+
* __kernfs_active() which tests @kn->active without kernfs_rwsem.
694709
*/
695-
if (unlikely(!(kn->flags & KERNFS_ACTIVATED) ||
696-
!atomic_inc_not_zero(&kn->count)))
710+
if (unlikely(!__kernfs_active(kn) || !atomic_inc_not_zero(&kn->count)))
697711
goto err_unlock;
698712

699713
spin_unlock(&kernfs_idr_lock);
@@ -735,10 +749,7 @@ int kernfs_add_one(struct kernfs_node *kn)
735749
goto out_unlock;
736750

737751
ret = -ENOENT;
738-
if (parent->flags & KERNFS_EMPTY_DIR)
739-
goto out_unlock;
740-
741-
if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent))
752+
if (parent->flags & (KERNFS_REMOVING | KERNFS_EMPTY_DIR))
742753
goto out_unlock;
743754

744755
kn->hash = kernfs_name_hash(kn->name, kn->ns);
@@ -1307,6 +1318,21 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
13071318
return pos->parent;
13081319
}
13091320

1321+
static void kernfs_activate_one(struct kernfs_node *kn)
1322+
{
1323+
lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem);
1324+
1325+
kn->flags |= KERNFS_ACTIVATED;
1326+
1327+
if (kernfs_active(kn) || (kn->flags & (KERNFS_HIDDEN | KERNFS_REMOVING)))
1328+
return;
1329+
1330+
WARN_ON_ONCE(kn->parent && RB_EMPTY_NODE(&kn->rb));
1331+
WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
1332+
1333+
atomic_sub(KN_DEACTIVATED_BIAS, &kn->active);
1334+
}
1335+
13101336
/**
13111337
* kernfs_activate - activate a node which started deactivated
13121338
* @kn: kernfs_node whose subtree is to be activated
@@ -1328,15 +1354,42 @@ void kernfs_activate(struct kernfs_node *kn)
13281354
down_write(&root->kernfs_rwsem);
13291355

13301356
pos = NULL;
1331-
while ((pos = kernfs_next_descendant_post(pos, kn))) {
1332-
if (pos->flags & KERNFS_ACTIVATED)
1333-
continue;
1357+
while ((pos = kernfs_next_descendant_post(pos, kn)))
1358+
kernfs_activate_one(pos);
13341359

1335-
WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb));
1336-
WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS);
1360+
up_write(&root->kernfs_rwsem);
1361+
}
1362+
1363+
/**
1364+
* kernfs_show - show or hide a node
1365+
* @kn: kernfs_node to show or hide
1366+
* @show: whether to show or hide
1367+
*
1368+
* If @show is %false, @kn is marked hidden and deactivated. A hidden node is
1369+
* ignored in future activaitons. If %true, the mark is removed and activation
1370+
* state is restored. This function won't implicitly activate a new node in a
1371+
* %KERNFS_ROOT_CREATE_DEACTIVATED root which hasn't been activated yet.
1372+
*
1373+
* To avoid recursion complexities, directories aren't supported for now.
1374+
*/
1375+
void kernfs_show(struct kernfs_node *kn, bool show)
1376+
{
1377+
struct kernfs_root *root = kernfs_root(kn);
1378+
1379+
if (WARN_ON_ONCE(kernfs_type(kn) == KERNFS_DIR))
1380+
return;
13371381

1338-
atomic_sub(KN_DEACTIVATED_BIAS, &pos->active);
1339-
pos->flags |= KERNFS_ACTIVATED;
1382+
down_write(&root->kernfs_rwsem);
1383+
1384+
if (show) {
1385+
kn->flags &= ~KERNFS_HIDDEN;
1386+
if (kn->flags & KERNFS_ACTIVATED)
1387+
kernfs_activate_one(kn);
1388+
} else {
1389+
kn->flags |= KERNFS_HIDDEN;
1390+
if (kernfs_active(kn))
1391+
atomic_add(KN_DEACTIVATED_BIAS, &kn->active);
1392+
kernfs_drain(kn);
13401393
}
13411394

13421395
up_write(&root->kernfs_rwsem);
@@ -1361,34 +1414,27 @@ static void __kernfs_remove(struct kernfs_node *kn)
13611414

13621415
pr_debug("kernfs %s: removing\n", kn->name);
13631416

1364-
/* prevent any new usage under @kn by deactivating all nodes */
1417+
/* prevent new usage by marking all nodes removing and deactivating */
13651418
pos = NULL;
1366-
while ((pos = kernfs_next_descendant_post(pos, kn)))
1419+
while ((pos = kernfs_next_descendant_post(pos, kn))) {
1420+
pos->flags |= KERNFS_REMOVING;
13671421
if (kernfs_active(pos))
13681422
atomic_add(KN_DEACTIVATED_BIAS, &pos->active);
1423+
}
13691424

13701425
/* deactivate and unlink the subtree node-by-node */
13711426
do {
13721427
pos = kernfs_leftmost_descendant(kn);
13731428

13741429
/*
1375-
* kernfs_drain() drops kernfs_rwsem temporarily and @pos's
1430+
* kernfs_drain() may drop kernfs_rwsem temporarily and @pos's
13761431
* base ref could have been put by someone else by the time
13771432
* the function returns. Make sure it doesn't go away
13781433
* underneath us.
13791434
*/
13801435
kernfs_get(pos);
13811436

1382-
/*
1383-
* Drain iff @kn was activated. This avoids draining and
1384-
* its lockdep annotations for nodes which have never been
1385-
* activated and allows embedding kernfs_remove() in create
1386-
* error paths without worrying about draining.
1387-
*/
1388-
if (kn->flags & KERNFS_ACTIVATED)
1389-
kernfs_drain(pos);
1390-
else
1391-
WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
1437+
kernfs_drain(pos);
13921438

13931439
/*
13941440
* kernfs_unlink_sibling() succeeds once per node. Use it

0 commit comments

Comments
 (0)