Skip to content

Commit a9e7f65

Browse files
htejunIngo Molnar
authored andcommitted
sched/fair: Fix O(nr_cgroups) in load balance path
Currently, rq->leaf_cfs_rq_list is a traversal ordered list of all live cfs_rqs which have ever been active on the CPU; unfortunately, this makes update_blocked_averages() O(# total cgroups) which isn't scalable at all. This shows up as a small CPU consumption and scheduling latency increase in the load balancing path in systems with CPU controller enabled across most cgroups. In an edge case where temporary cgroups were leaking, this caused the kernel to consume good several tens of percents of CPU cycles running update_blocked_averages(), each run taking multiple millisecs. This patch fixes the issue by taking empty and fully decayed cfs_rqs off the rq->leaf_cfs_rq_list. Signed-off-by: Tejun Heo <[email protected]> [ Added cfs_rq_is_decayed() ] Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Acked-by: Vincent Guittot <[email protected]> Cc: Chris Mason <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Mike Galbraith <[email protected]> Cc: Paul Turner <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Thomas Gleixner <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent 502ce00 commit a9e7f65

File tree

1 file changed

+34
-8
lines changed

1 file changed

+34
-8
lines changed

kernel/sched/fair.c

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -369,8 +369,9 @@ static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
369369
}
370370

371371
/* Iterate thr' all leaf cfs_rq's on a runqueue */
372-
#define for_each_leaf_cfs_rq(rq, cfs_rq) \
373-
list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
372+
#define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) \
373+
list_for_each_entry_safe(cfs_rq, pos, &rq->leaf_cfs_rq_list, \
374+
leaf_cfs_rq_list)
374375

375376
/* Do the two (enqueued) entities belong to the same group ? */
376377
static inline struct cfs_rq *
@@ -463,8 +464,8 @@ static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
463464
{
464465
}
465466

466-
#define for_each_leaf_cfs_rq(rq, cfs_rq) \
467-
for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)
467+
#define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) \
468+
for (cfs_rq = &rq->cfs, pos = NULL; cfs_rq; cfs_rq = pos)
468469

469470
static inline struct sched_entity *parent_entity(struct sched_entity *se)
470471
{
@@ -6953,10 +6954,28 @@ static void attach_tasks(struct lb_env *env)
69536954
}
69546955

69556956
#ifdef CONFIG_FAIR_GROUP_SCHED
6957+
6958+
static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
6959+
{
6960+
if (cfs_rq->load.weight)
6961+
return false;
6962+
6963+
if (cfs_rq->avg.load_sum)
6964+
return false;
6965+
6966+
if (cfs_rq->avg.util_sum)
6967+
return false;
6968+
6969+
if (cfs_rq->runnable_load_sum)
6970+
return false;
6971+
6972+
return true;
6973+
}
6974+
69566975
static void update_blocked_averages(int cpu)
69576976
{
69586977
struct rq *rq = cpu_rq(cpu);
6959-
struct cfs_rq *cfs_rq;
6978+
struct cfs_rq *cfs_rq, *pos;
69606979
struct rq_flags rf;
69616980

69626981
rq_lock_irqsave(rq, &rf);
@@ -6966,7 +6985,7 @@ static void update_blocked_averages(int cpu)
69666985
* Iterates the task_group tree in a bottom up fashion, see
69676986
* list_add_leaf_cfs_rq() for details.
69686987
*/
6969-
for_each_leaf_cfs_rq(rq, cfs_rq) {
6988+
for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) {
69706989
struct sched_entity *se;
69716990

69726991
/* throttled entities do not contribute to load */
@@ -6980,6 +6999,13 @@ static void update_blocked_averages(int cpu)
69806999
se = cfs_rq->tg->se[cpu];
69817000
if (se && !skip_blocked_update(se))
69827001
update_load_avg(se, 0);
7002+
7003+
/*
7004+
* There can be a lot of idle CPU cgroups. Don't let fully
7005+
* decayed cfs_rqs linger on the list.
7006+
*/
7007+
if (cfs_rq_is_decayed(cfs_rq))
7008+
list_del_leaf_cfs_rq(cfs_rq);
69837009
}
69847010
rq_unlock_irqrestore(rq, &rf);
69857011
}
@@ -9503,10 +9529,10 @@ const struct sched_class fair_sched_class = {
95039529
#ifdef CONFIG_SCHED_DEBUG
95049530
void print_cfs_stats(struct seq_file *m, int cpu)
95059531
{
9506-
struct cfs_rq *cfs_rq;
9532+
struct cfs_rq *cfs_rq, *pos;
95079533

95089534
rcu_read_lock();
9509-
for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)
9535+
for_each_leaf_cfs_rq_safe(cpu_rq(cpu), cfs_rq, pos)
95109536
print_cfs_rq(m, cpu, cfs_rq);
95119537
rcu_read_unlock();
95129538
}

0 commit comments

Comments
 (0)