Skip to content

Commit d741f29

Browse files
Daniel Bristot de OliveiraPeter Zijlstra
authored andcommitted
sched/fair: Fair server interface
Add an interface for fair server setup on debugfs. Each CPU has two files under /debug/sched/fair_server/cpu{ID}: - runtime: set runtime in ns - period: set period in ns This then leaves /proc/sys/kernel/sched_rt_{period,runtime}_us to set bounds on admission control. The interface also add the server to the dl bandwidth accounting. Signed-off-by: Daniel Bristot de Oliveira <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Tested-by: Juri Lelli <[email protected]> Link: https://lore.kernel.org/r/a9ef9fc69bcedb44bddc9bc34f2b313296052819.1716811044.git.bristot@kernel.org
1 parent a110a81 commit d741f29

File tree

4 files changed

+256
-17
lines changed

4 files changed

+256
-17
lines changed

kernel/sched/deadline.c

Lines changed: 86 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -320,33 +320,38 @@ void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
320320
__sub_running_bw(dl_se->dl_bw, dl_rq);
321321
}
322322

323-
static void dl_change_utilization(struct task_struct *p, u64 new_bw)
323+
static void dl_rq_change_utilization(struct rq *rq, struct sched_dl_entity *dl_se, u64 new_bw)
324324
{
325-
struct rq *rq;
326-
327-
WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV);
328-
329-
if (task_on_rq_queued(p))
330-
return;
325+
if (dl_se->dl_non_contending) {
326+
sub_running_bw(dl_se, &rq->dl);
327+
dl_se->dl_non_contending = 0;
331328

332-
rq = task_rq(p);
333-
if (p->dl.dl_non_contending) {
334-
sub_running_bw(&p->dl, &rq->dl);
335-
p->dl.dl_non_contending = 0;
336329
/*
337330
* If the timer handler is currently running and the
338331
* timer cannot be canceled, inactive_task_timer()
339332
* will see that dl_not_contending is not set, and
340333
* will not touch the rq's active utilization,
341334
* so we are still safe.
342335
*/
343-
if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
344-
put_task_struct(p);
336+
if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1) {
337+
if (!dl_server(dl_se))
338+
put_task_struct(dl_task_of(dl_se));
339+
}
345340
}
346-
__sub_rq_bw(p->dl.dl_bw, &rq->dl);
341+
__sub_rq_bw(dl_se->dl_bw, &rq->dl);
347342
__add_rq_bw(new_bw, &rq->dl);
348343
}
349344

345+
static void dl_change_utilization(struct task_struct *p, u64 new_bw)
346+
{
347+
WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV);
348+
349+
if (task_on_rq_queued(p))
350+
return;
351+
352+
dl_rq_change_utilization(task_rq(p), &p->dl, new_bw);
353+
}
354+
350355
static void __dl_clear_params(struct sched_dl_entity *dl_se);
351356

352357
/*
@@ -1621,11 +1626,17 @@ void dl_server_start(struct sched_dl_entity *dl_se)
16211626
{
16221627
struct rq *rq = dl_se->rq;
16231628

1629+
/*
1630+
* XXX: the apply do not work fine at the init phase for the
1631+
* fair server because things are not yet set. We need to improve
1632+
* this before getting generic.
1633+
*/
16241634
if (!dl_server(dl_se)) {
16251635
/* Disabled */
1626-
dl_se->dl_runtime = 0;
1627-
dl_se->dl_deadline = 1000 * NSEC_PER_MSEC;
1628-
dl_se->dl_period = 1000 * NSEC_PER_MSEC;
1636+
u64 runtime = 0;
1637+
u64 period = 1000 * NSEC_PER_MSEC;
1638+
1639+
dl_server_apply_params(dl_se, runtime, period, 1);
16291640

16301641
dl_se->dl_server = 1;
16311642
dl_se->dl_defer = 1;
@@ -1660,6 +1671,64 @@ void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
16601671
dl_se->server_pick = pick;
16611672
}
16621673

1674+
void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq)
1675+
{
1676+
u64 new_bw = dl_se->dl_bw;
1677+
int cpu = cpu_of(rq);
1678+
struct dl_bw *dl_b;
1679+
1680+
dl_b = dl_bw_of(cpu_of(rq));
1681+
guard(raw_spinlock)(&dl_b->lock);
1682+
1683+
if (!dl_bw_cpus(cpu))
1684+
return;
1685+
1686+
__dl_add(dl_b, new_bw, dl_bw_cpus(cpu));
1687+
}
1688+
1689+
int dl_server_apply_params(struct sched_dl_entity *dl_se, u64 runtime, u64 period, bool init)
1690+
{
1691+
u64 old_bw = init ? 0 : to_ratio(dl_se->dl_period, dl_se->dl_runtime);
1692+
u64 new_bw = to_ratio(period, runtime);
1693+
struct rq *rq = dl_se->rq;
1694+
int cpu = cpu_of(rq);
1695+
struct dl_bw *dl_b;
1696+
unsigned long cap;
1697+
int retval = 0;
1698+
int cpus;
1699+
1700+
dl_b = dl_bw_of(cpu);
1701+
guard(raw_spinlock)(&dl_b->lock);
1702+
1703+
cpus = dl_bw_cpus(cpu);
1704+
cap = dl_bw_capacity(cpu);
1705+
1706+
if (__dl_overflow(dl_b, cap, old_bw, new_bw))
1707+
return -EBUSY;
1708+
1709+
if (init) {
1710+
__add_rq_bw(new_bw, &rq->dl);
1711+
__dl_add(dl_b, new_bw, cpus);
1712+
} else {
1713+
__dl_sub(dl_b, dl_se->dl_bw, cpus);
1714+
__dl_add(dl_b, new_bw, cpus);
1715+
1716+
dl_rq_change_utilization(rq, dl_se, new_bw);
1717+
}
1718+
1719+
dl_se->dl_runtime = runtime;
1720+
dl_se->dl_deadline = period;
1721+
dl_se->dl_period = period;
1722+
1723+
dl_se->runtime = 0;
1724+
dl_se->deadline = 0;
1725+
1726+
dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
1727+
dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
1728+
1729+
return retval;
1730+
}
1731+
16631732
/*
16641733
* Update the current task's runtime statistics (provided it is still
16651734
* a -deadline task and has not been removed from the dl_rq).

kernel/sched/debug.c

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,8 +333,165 @@ static const struct file_operations sched_debug_fops = {
333333
.release = seq_release,
334334
};
335335

336+
enum dl_param {
337+
DL_RUNTIME = 0,
338+
DL_PERIOD,
339+
};
340+
341+
static unsigned long fair_server_period_max = (1 << 22) * NSEC_PER_USEC; /* ~4 seconds */
342+
static unsigned long fair_server_period_min = (100) * NSEC_PER_USEC; /* 100 us */
343+
344+
static ssize_t sched_fair_server_write(struct file *filp, const char __user *ubuf,
345+
size_t cnt, loff_t *ppos, enum dl_param param)
346+
{
347+
long cpu = (long) ((struct seq_file *) filp->private_data)->private;
348+
struct rq *rq = cpu_rq(cpu);
349+
u64 runtime, period;
350+
size_t err;
351+
int retval;
352+
u64 value;
353+
354+
err = kstrtoull_from_user(ubuf, cnt, 10, &value);
355+
if (err)
356+
return err;
357+
358+
scoped_guard (rq_lock_irqsave, rq) {
359+
runtime = rq->fair_server.dl_runtime;
360+
period = rq->fair_server.dl_period;
361+
362+
switch (param) {
363+
case DL_RUNTIME:
364+
if (runtime == value)
365+
break;
366+
runtime = value;
367+
break;
368+
case DL_PERIOD:
369+
if (value == period)
370+
break;
371+
period = value;
372+
break;
373+
}
374+
375+
if (runtime > period ||
376+
period > fair_server_period_max ||
377+
period < fair_server_period_min) {
378+
return -EINVAL;
379+
}
380+
381+
if (rq->cfs.h_nr_running) {
382+
update_rq_clock(rq);
383+
dl_server_stop(&rq->fair_server);
384+
}
385+
386+
retval = dl_server_apply_params(&rq->fair_server, runtime, period, 0);
387+
if (retval)
388+
cnt = retval;
389+
390+
if (!runtime)
391+
printk_deferred("Fair server disabled in CPU %d, system may crash due to starvation.\n",
392+
cpu_of(rq));
393+
394+
if (rq->cfs.h_nr_running)
395+
dl_server_start(&rq->fair_server);
396+
}
397+
398+
*ppos += cnt;
399+
return cnt;
400+
}
401+
402+
static size_t sched_fair_server_show(struct seq_file *m, void *v, enum dl_param param)
403+
{
404+
unsigned long cpu = (unsigned long) m->private;
405+
struct rq *rq = cpu_rq(cpu);
406+
u64 value;
407+
408+
switch (param) {
409+
case DL_RUNTIME:
410+
value = rq->fair_server.dl_runtime;
411+
break;
412+
case DL_PERIOD:
413+
value = rq->fair_server.dl_period;
414+
break;
415+
}
416+
417+
seq_printf(m, "%llu\n", value);
418+
return 0;
419+
420+
}
421+
422+
static ssize_t
423+
sched_fair_server_runtime_write(struct file *filp, const char __user *ubuf,
424+
size_t cnt, loff_t *ppos)
425+
{
426+
return sched_fair_server_write(filp, ubuf, cnt, ppos, DL_RUNTIME);
427+
}
428+
429+
static int sched_fair_server_runtime_show(struct seq_file *m, void *v)
430+
{
431+
return sched_fair_server_show(m, v, DL_RUNTIME);
432+
}
433+
434+
static int sched_fair_server_runtime_open(struct inode *inode, struct file *filp)
435+
{
436+
return single_open(filp, sched_fair_server_runtime_show, inode->i_private);
437+
}
438+
439+
static const struct file_operations fair_server_runtime_fops = {
440+
.open = sched_fair_server_runtime_open,
441+
.write = sched_fair_server_runtime_write,
442+
.read = seq_read,
443+
.llseek = seq_lseek,
444+
.release = single_release,
445+
};
446+
447+
static ssize_t
448+
sched_fair_server_period_write(struct file *filp, const char __user *ubuf,
449+
size_t cnt, loff_t *ppos)
450+
{
451+
return sched_fair_server_write(filp, ubuf, cnt, ppos, DL_PERIOD);
452+
}
453+
454+
static int sched_fair_server_period_show(struct seq_file *m, void *v)
455+
{
456+
return sched_fair_server_show(m, v, DL_PERIOD);
457+
}
458+
459+
static int sched_fair_server_period_open(struct inode *inode, struct file *filp)
460+
{
461+
return single_open(filp, sched_fair_server_period_show, inode->i_private);
462+
}
463+
464+
static const struct file_operations fair_server_period_fops = {
465+
.open = sched_fair_server_period_open,
466+
.write = sched_fair_server_period_write,
467+
.read = seq_read,
468+
.llseek = seq_lseek,
469+
.release = single_release,
470+
};
471+
336472
static struct dentry *debugfs_sched;
337473

474+
static void debugfs_fair_server_init(void)
475+
{
476+
struct dentry *d_fair;
477+
unsigned long cpu;
478+
479+
d_fair = debugfs_create_dir("fair_server", debugfs_sched);
480+
if (!d_fair)
481+
return;
482+
483+
for_each_possible_cpu(cpu) {
484+
struct dentry *d_cpu;
485+
char buf[32];
486+
487+
snprintf(buf, sizeof(buf), "cpu%lu", cpu);
488+
d_cpu = debugfs_create_dir(buf, d_fair);
489+
490+
debugfs_create_file("runtime", 0644, d_cpu, (void *) cpu, &fair_server_runtime_fops);
491+
debugfs_create_file("period", 0644, d_cpu, (void *) cpu, &fair_server_period_fops);
492+
}
493+
}
494+
338495
static __init int sched_init_debug(void)
339496
{
340497
struct dentry __maybe_unused *numa;
@@ -374,6 +531,8 @@ static __init int sched_init_debug(void)
374531

375532
debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops);
376533

534+
debugfs_fair_server_init();
535+
377536
return 0;
378537
}
379538
late_initcall(sched_init_debug);

kernel/sched/sched.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,9 @@ extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
366366
extern void dl_server_update_idle_time(struct rq *rq,
367367
struct task_struct *p);
368368
extern void fair_server_init(struct rq *rq);
369+
extern void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq);
370+
extern int dl_server_apply_params(struct sched_dl_entity *dl_se,
371+
u64 runtime, u64 period, bool init);
369372

370373
#ifdef CONFIG_CGROUP_SCHED
371374

kernel/sched/topology.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,14 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
516516
if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
517517
set_rq_online(rq);
518518

519+
/*
520+
* Because the rq is not a task, dl_add_task_root_domain() did not
521+
* move the fair server bw to the rd if it already started.
522+
* Add it now.
523+
*/
524+
if (rq->fair_server.dl_server)
525+
__dl_server_attach_root(&rq->fair_server, rq);
526+
519527
rq_unlock_irqrestore(rq, &rf);
520528

521529
if (old_rd)

0 commit comments

Comments
 (0)