Skip to content

Commit 82762d2

Browse files
deggemanPeter Zijlstra
authored andcommitted
sched/fair: Replace CFS internal cpu_util() with cpu_util_cfs()
cpu_util_cfs() was created by commit d4edd66 ("sched/cpufreq: Use the DEADLINE utilization signal") to enable the access to CPU utilization from the Schedutil CPUfreq governor. Commit a07630b ("sched/cpufreq/schedutil: Use util_est for OPP selection") added util_est support later. The only thing cpu_util() is doing on top of what cpu_util_cfs() already does is to clamp the return value to the [0..capacity_orig] capacity range of the CPU. Integrating this into cpu_util_cfs() is not harming the existing users (Schedutil and CPUfreq cooling (latter via sched_cpu_util() wrapper)). For straightforwardness, prefer to keep using `int cpu` as the function parameter over using `struct rq *rq` which might avoid some calls to cpu_rq(cpu) -> per_cpu(runqueues, cpu) -> RELOC_HIDE(). Update cfs_util()'s documentation and reuse it for cpu_util_cfs(). Remove cpu_util(). Signed-off-by: Dietmar Eggemann <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Vincent Guittot <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent ef8df97 commit 82762d2

File tree

4 files changed

+50
-69
lines changed

4 files changed

+50
-69
lines changed

kernel/sched/core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7166,7 +7166,7 @@ unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
71667166

71677167
unsigned long sched_cpu_util(int cpu, unsigned long max)
71687168
{
7169-
return effective_cpu_util(cpu, cpu_util_cfs(cpu_rq(cpu)), max,
7169+
return effective_cpu_util(cpu, cpu_util_cfs(cpu), max,
71707170
ENERGY_UTIL, NULL);
71717171
}
71727172
#endif /* CONFIG_SMP */

kernel/sched/cpufreq_schedutil.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)
168168

169169
sg_cpu->max = max;
170170
sg_cpu->bw_dl = cpu_bw_dl(rq);
171-
sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(rq), max,
171+
sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), max,
172172
FREQUENCY_UTIL, NULL);
173173
}
174174

kernel/sched/fair.c

Lines changed: 8 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1502,7 +1502,6 @@ struct task_numa_env {
15021502

15031503
static unsigned long cpu_load(struct rq *rq);
15041504
static unsigned long cpu_runnable(struct rq *rq);
1505-
static unsigned long cpu_util(int cpu);
15061505
static inline long adjust_numa_imbalance(int imbalance,
15071506
int dst_running, int dst_weight);
15081507

@@ -1569,7 +1568,7 @@ static void update_numa_stats(struct task_numa_env *env,
15691568

15701569
ns->load += cpu_load(rq);
15711570
ns->runnable += cpu_runnable(rq);
1572-
ns->util += cpu_util(cpu);
1571+
ns->util += cpu_util_cfs(cpu);
15731572
ns->nr_running += rq->cfs.h_nr_running;
15741573
ns->compute_capacity += capacity_of(cpu);
15751574

@@ -3240,7 +3239,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq, int flags)
32403239
* As is, the util number is not freq-invariant (we'd have to
32413240
* implement arch_scale_freq_capacity() for that).
32423241
*
3243-
* See cpu_util().
3242+
* See cpu_util_cfs().
32443243
*/
32453244
cpufreq_update_util(rq, flags);
32463245
}
@@ -5510,11 +5509,9 @@ static inline void hrtick_update(struct rq *rq)
55105509
#endif
55115510

55125511
#ifdef CONFIG_SMP
5513-
static inline unsigned long cpu_util(int cpu);
5514-
55155512
static inline bool cpu_overutilized(int cpu)
55165513
{
5517-
return !fits_capacity(cpu_util(cpu), capacity_of(cpu));
5514+
return !fits_capacity(cpu_util_cfs(cpu), capacity_of(cpu));
55185515
}
55195516

55205517
static inline void update_overutilized_status(struct rq *rq)
@@ -6459,58 +6456,6 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
64596456
return target;
64606457
}
64616458

6462-
/**
6463-
* cpu_util - Estimates the amount of capacity of a CPU used by CFS tasks.
6464-
* @cpu: the CPU to get the utilization of
6465-
*
6466-
* The unit of the return value must be the one of capacity so we can compare
6467-
* the utilization with the capacity of the CPU that is available for CFS task
6468-
* (ie cpu_capacity).
6469-
*
6470-
* cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the
6471-
* recent utilization of currently non-runnable tasks on a CPU. It represents
6472-
* the amount of utilization of a CPU in the range [0..capacity_orig] where
6473-
* capacity_orig is the cpu_capacity available at the highest frequency
6474-
* (arch_scale_freq_capacity()).
6475-
* The utilization of a CPU converges towards a sum equal to or less than the
6476-
* current capacity (capacity_curr <= capacity_orig) of the CPU because it is
6477-
* the running time on this CPU scaled by capacity_curr.
6478-
*
6479-
* The estimated utilization of a CPU is defined to be the maximum between its
6480-
* cfs_rq.avg.util_avg and the sum of the estimated utilization of the tasks
6481-
* currently RUNNABLE on that CPU.
6482-
* This allows to properly represent the expected utilization of a CPU which
6483-
* has just got a big task running since a long sleep period. At the same time
6484-
* however it preserves the benefits of the "blocked utilization" in
6485-
* describing the potential for other tasks waking up on the same CPU.
6486-
*
6487-
* Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even
6488-
* higher than capacity_orig because of unfortunate rounding in
6489-
* cfs.avg.util_avg or just after migrating tasks and new task wakeups until
6490-
* the average stabilizes with the new running time. We need to check that the
6491-
* utilization stays within the range of [0..capacity_orig] and cap it if
6492-
* necessary. Without utilization capping, a group could be seen as overloaded
6493-
* (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of
6494-
* available capacity. We allow utilization to overshoot capacity_curr (but not
6495-
* capacity_orig) as it useful for predicting the capacity required after task
6496-
* migrations (scheduler-driven DVFS).
6497-
*
6498-
* Return: the (estimated) utilization for the specified CPU
6499-
*/
6500-
static inline unsigned long cpu_util(int cpu)
6501-
{
6502-
struct cfs_rq *cfs_rq;
6503-
unsigned int util;
6504-
6505-
cfs_rq = &cpu_rq(cpu)->cfs;
6506-
util = READ_ONCE(cfs_rq->avg.util_avg);
6507-
6508-
if (sched_feat(UTIL_EST))
6509-
util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued));
6510-
6511-
return min_t(unsigned long, util, capacity_orig_of(cpu));
6512-
}
6513-
65146459
/*
65156460
* cpu_util_without: compute cpu utilization without any contributions from *p
65166461
* @cpu: the CPU which utilization is requested
@@ -6531,7 +6476,7 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
65316476

65326477
/* Task has no contribution or is new */
65336478
if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
6534-
return cpu_util(cpu);
6479+
return cpu_util_cfs(cpu);
65356480

65366481
cfs_rq = &cpu_rq(cpu)->cfs;
65376482
util = READ_ONCE(cfs_rq->avg.util_avg);
@@ -6595,7 +6540,7 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
65956540
/*
65966541
* Utilization (estimated) can exceed the CPU capacity, thus let's
65976542
* clamp to the maximum CPU capacity to ensure consistency with
6598-
* the cpu_util call.
6543+
* cpu_util.
65996544
*/
66006545
return min_t(unsigned long, util, capacity_orig_of(cpu));
66016546
}
@@ -6627,7 +6572,7 @@ static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu)
66276572
* During wake-up, the task isn't enqueued yet and doesn't
66286573
* appear in the cfs_rq->avg.util_est.enqueued of any rq,
66296574
* so just add it (if needed) to "simulate" what will be
6630-
* cpu_util() after the task has been enqueued.
6575+
* cpu_util after the task has been enqueued.
66316576
*/
66326577
if (dst_cpu == cpu)
66336578
util_est += _task_util_est(p);
@@ -8689,7 +8634,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
86898634
struct rq *rq = cpu_rq(i);
86908635

86918636
sgs->group_load += cpu_load(rq);
8692-
sgs->group_util += cpu_util(i);
8637+
sgs->group_util += cpu_util_cfs(i);
86938638
sgs->group_runnable += cpu_runnable(rq);
86948639
sgs->sum_h_nr_running += rq->cfs.h_nr_running;
86958640

@@ -9707,7 +9652,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
97079652
break;
97089653

97099654
case migrate_util:
9710-
util = cpu_util(cpu_of(rq));
9655+
util = cpu_util_cfs(i);
97119656

97129657
/*
97139658
* Don't try to pull utilization from a CPU with one

kernel/sched/sched.h

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2966,16 +2966,52 @@ static inline unsigned long cpu_util_dl(struct rq *rq)
29662966
return READ_ONCE(rq->avg_dl.util_avg);
29672967
}
29682968

2969-
static inline unsigned long cpu_util_cfs(struct rq *rq)
2969+
/**
2970+
* cpu_util_cfs() - Estimates the amount of CPU capacity used by CFS tasks.
2971+
* @cpu: the CPU to get the utilization for.
2972+
*
2973+
* The unit of the return value must be the same as the one of CPU capacity
2974+
* so that CPU utilization can be compared with CPU capacity.
2975+
*
2976+
* CPU utilization is the sum of running time of runnable tasks plus the
2977+
* recent utilization of currently non-runnable tasks on that CPU.
2978+
* It represents the amount of CPU capacity currently used by CFS tasks in
2979+
* the range [0..max CPU capacity] with max CPU capacity being the CPU
2980+
* capacity at f_max.
2981+
*
2982+
* The estimated CPU utilization is defined as the maximum between CPU
2983+
* utilization and sum of the estimated utilization of the currently
2984+
* runnable tasks on that CPU. It preserves a utilization "snapshot" of
2985+
* previously-executed tasks, which helps better deduce how busy a CPU will
2986+
* be when a long-sleeping task wakes up. The contribution to CPU utilization
2987+
* of such a task would be significantly decayed at this point of time.
2988+
*
2989+
* CPU utilization can be higher than the current CPU capacity
2990+
* (f_curr/f_max * max CPU capacity) or even the max CPU capacity because
2991+
* of rounding errors as well as task migrations or wakeups of new tasks.
2992+
* CPU utilization has to be capped to fit into the [0..max CPU capacity]
2993+
* range. Otherwise a group of CPUs (CPU0 util = 121% + CPU1 util = 80%)
2994+
* could be seen as over-utilized even though CPU1 has 20% of spare CPU
2995+
* capacity. CPU utilization is allowed to overshoot current CPU capacity
2996+
* though since this is useful for predicting the CPU capacity required
2997+
* after task migrations (scheduler-driven DVFS).
2998+
*
2999+
* Return: (Estimated) utilization for the specified CPU.
3000+
*/
3001+
static inline unsigned long cpu_util_cfs(int cpu)
29703002
{
2971-
unsigned long util = READ_ONCE(rq->cfs.avg.util_avg);
3003+
struct cfs_rq *cfs_rq;
3004+
unsigned long util;
3005+
3006+
cfs_rq = &cpu_rq(cpu)->cfs;
3007+
util = READ_ONCE(cfs_rq->avg.util_avg);
29723008

29733009
if (sched_feat(UTIL_EST)) {
29743010
util = max_t(unsigned long, util,
2975-
READ_ONCE(rq->cfs.avg.util_est.enqueued));
3011+
READ_ONCE(cfs_rq->avg.util_est.enqueued));
29763012
}
29773013

2978-
return util;
3014+
return min(util, capacity_orig_of(cpu));
29793015
}
29803016

29813017
static inline unsigned long cpu_util_rt(struct rq *rq)

0 commit comments

Comments
 (0)