Skip to content

Commit 30d0a53

Browse files
vingu-linarogregkh
authored andcommitted
sched/fair: Sanitize vruntime of entity being migrated
commit a53ce18 upstream. Commit 829c165 ("sched/fair: sanitize vruntime of entity being placed") fixes an overflowing bug, but ignore a case that se->exec_start is reset after a migration. For fixing this case, we delay the reset of se->exec_start after placing the entity which se->exec_start to detect long sleeping task. In order to take into account a possible divergence between the clock_task of 2 rqs, we increase the threshold to around 104 days. Fixes: 829c165 ("sched/fair: sanitize vruntime of entity being placed") Originally-by: Zhang Qiao <[email protected]> Signed-off-by: Vincent Guittot <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Tested-by: Zhang Qiao <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent a398059 commit 30d0a53

File tree

2 files changed

+47
-11
lines changed

2 files changed

+47
-11
lines changed

kernel/sched/core.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,9 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
741741

742742
void activate_task(struct rq *rq, struct task_struct *p, int flags)
743743
{
744+
if (task_on_rq_migrating(p))
745+
flags |= ENQUEUE_MIGRATED;
746+
744747
if (task_contributes_to_load(p))
745748
rq->nr_uninterruptible--;
746749

kernel/sched/fair.c

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3854,11 +3854,33 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
38543854
#endif
38553855
}
38563856

3857+
static inline bool entity_is_long_sleeper(struct sched_entity *se)
3858+
{
3859+
struct cfs_rq *cfs_rq;
3860+
u64 sleep_time;
3861+
3862+
if (se->exec_start == 0)
3863+
return false;
3864+
3865+
cfs_rq = cfs_rq_of(se);
3866+
3867+
sleep_time = rq_clock_task(rq_of(cfs_rq));
3868+
3869+
/* Happen while migrating because of clock task divergence */
3870+
if (sleep_time <= se->exec_start)
3871+
return false;
3872+
3873+
sleep_time -= se->exec_start;
3874+
if (sleep_time > ((1ULL << 63) / scale_load_down(NICE_0_LOAD)))
3875+
return true;
3876+
3877+
return false;
3878+
}
3879+
38573880
static void
38583881
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
38593882
{
38603883
u64 vruntime = cfs_rq->min_vruntime;
3861-
u64 sleep_time;
38623884

38633885
/*
38643886
* The 'current' period is already promised to the current tasks,
@@ -3885,13 +3907,24 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
38853907

38863908
/*
38873909
* Pull vruntime of the entity being placed to the base level of
3888-
* cfs_rq, to prevent boosting it if placed backwards. If the entity
3889-
* slept for a long time, don't even try to compare its vruntime with
3890-
* the base as it may be too far off and the comparison may get
3891-
* inversed due to s64 overflow.
3892-
*/
3893-
sleep_time = rq_clock_task(rq_of(cfs_rq)) - se->exec_start;
3894-
if ((s64)sleep_time > 60LL * NSEC_PER_SEC)
3910+
* cfs_rq, to prevent boosting it if placed backwards.
3911+
* However, min_vruntime can advance much faster than real time, with
3912+
* the extreme being when an entity with the minimal weight always runs
3913+
* on the cfs_rq. If the waking entity slept for a long time, its
3914+
* vruntime difference from min_vruntime may overflow s64 and their
3915+
* comparison may get inversed, so ignore the entity's original
3916+
* vruntime in that case.
3917+
* The maximal vruntime speedup is given by the ratio of normal to
3918+
* minimal weight: scale_load_down(NICE_0_LOAD) / MIN_SHARES.
3919+
* When placing a migrated waking entity, its exec_start has been set
3920+
* from a different rq. In order to take into account a possible
3921+
* divergence between new and prev rq's clocks task because of irq and
3922+
* stolen time, we take an additional margin.
3923+
* So, cutting off on the sleep time of
3924+
* 2^63 / scale_load_down(NICE_0_LOAD) ~ 104 days
3925+
* should be safe.
3926+
*/
3927+
if (entity_is_long_sleeper(se))
38953928
se->vruntime = vruntime;
38963929
else
38973930
se->vruntime = max_vruntime(se->vruntime, vruntime);
@@ -3989,6 +4022,9 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
39894022

39904023
if (flags & ENQUEUE_WAKEUP)
39914024
place_entity(cfs_rq, se, 0);
4025+
/* Entity has migrated, no longer consider this task hot */
4026+
if (flags & ENQUEUE_MIGRATED)
4027+
se->exec_start = 0;
39924028

39934029
check_schedstat_required();
39944030
update_stats_enqueue(cfs_rq, se, flags);
@@ -6555,9 +6591,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
65556591
/* Tell new CPU we are migrated */
65566592
p->se.avg.last_update_time = 0;
65576593

6558-
/* We have migrated, no longer consider this task hot */
6559-
p->se.exec_start = 0;
6560-
65616594
update_scan_period(p, new_cpu);
65626595
}
65636596

0 commit comments

Comments
 (0)