@@ -3666,41 +3666,140 @@ static inline void
36663666dequeue_load_avg (struct cfs_rq * cfs_rq , struct sched_entity * se ) { }
36673667#endif
36683668
3669+ static void reweight_eevdf (struct cfs_rq * cfs_rq , struct sched_entity * se ,
3670+ unsigned long weight )
3671+ {
3672+ unsigned long old_weight = se -> load .weight ;
3673+ u64 avruntime = avg_vruntime (cfs_rq );
3674+ s64 vlag , vslice ;
3675+
3676+ /*
3677+ * VRUNTIME
3678+ * ========
3679+ *
3680+ * COROLLARY #1: The virtual runtime of the entity needs to be
3681+ * adjusted if re-weight at !0-lag point.
3682+ *
3683+ * Proof: For contradiction assume this is not true, so we can
3684+ * re-weight without changing vruntime at !0-lag point.
3685+ *
3686+ * Weight VRuntime Avg-VRuntime
3687+ * before w v V
3688+ * after w' v' V'
3689+ *
3690+ * Since lag needs to be preserved through re-weight:
3691+ *
3692+ * lag = (V - v)*w = (V'- v')*w', where v = v'
3693+ * ==> V' = (V - v)*w/w' + v (1)
3694+ *
3695+ * Let W be the total weight of the entities before reweight,
3696+ * since V' is the new weighted average of entities:
3697+ *
3698+ * V' = (WV + w'v - wv) / (W + w' - w) (2)
3699+ *
3700+ * by using (1) & (2) we obtain:
3701+ *
3702+ * (WV + w'v - wv) / (W + w' - w) = (V - v)*w/w' + v
3703+ * ==> (WV-Wv+Wv+w'v-wv)/(W+w'-w) = (V - v)*w/w' + v
3704+ * ==> (WV - Wv)/(W + w' - w) + v = (V - v)*w/w' + v
3705+ * ==> (V - v)*W/(W + w' - w) = (V - v)*w/w' (3)
3706+ *
3707+ * Since we are doing at !0-lag point which means V != v, we
3708+ * can simplify (3):
3709+ *
3710+ * ==> W / (W + w' - w) = w / w'
3711+ * ==> Ww' = Ww + ww' - ww
3712+ * ==> W * (w' - w) = w * (w' - w)
3713+ * ==> W = w (re-weight indicates w' != w)
3714+ *
3715+ * So the cfs_rq contains only one entity, hence vruntime of
3716+ * the entity @v should always equal to the cfs_rq's weighted
3717+ * average vruntime @V, which means we will always re-weight
3718+ * at 0-lag point, thus breach assumption. Proof completed.
3719+ *
3720+ *
3721+ * COROLLARY #2: Re-weight does NOT affect weighted average
3722+ * vruntime of all the entities.
3723+ *
3724+ * Proof: According to corollary #1, Eq. (1) should be:
3725+ *
3726+ * (V - v)*w = (V' - v')*w'
3727+ * ==> v' = V' - (V - v)*w/w' (4)
3728+ *
3729+ * According to the weighted average formula, we have:
3730+ *
3731+ * V' = (WV - wv + w'v') / (W - w + w')
3732+ * = (WV - wv + w'(V' - (V - v)w/w')) / (W - w + w')
3733+ * = (WV - wv + w'V' - Vw + wv) / (W - w + w')
3734+ * = (WV + w'V' - Vw) / (W - w + w')
3735+ *
3736+ * ==> V'*(W - w + w') = WV + w'V' - Vw
3737+ * ==> V' * (W - w) = (W - w) * V (5)
3738+ *
3739+ * If the entity is the only one in the cfs_rq, then reweight
3740+ * always occurs at 0-lag point, so V won't change. Or else
3741+ * there are other entities, hence W != w, then Eq. (5) turns
3742+ * into V' = V. So V won't change in either case, proof done.
3743+ *
3744+ *
3745+ * So according to corollary #1 & #2, the effect of re-weight
3746+ * on vruntime should be:
3747+ *
3748+ * v' = V' - (V - v) * w / w' (4)
3749+ * = V - (V - v) * w / w'
3750+ * = V - vl * w / w'
3751+ * = V - vl'
3752+ */
3753+ if (avruntime != se -> vruntime ) {
3754+ vlag = (s64 )(avruntime - se -> vruntime );
3755+ vlag = div_s64 (vlag * old_weight , weight );
3756+ se -> vruntime = avruntime - vlag ;
3757+ }
3758+
3759+ /*
3760+ * DEADLINE
3761+ * ========
3762+ *
3763+ * When the weight changes, the virtual time slope changes and
3764+ * we should adjust the relative virtual deadline accordingly.
3765+ *
3766+ * d' = v' + (d - v)*w/w'
3767+ * = V' - (V - v)*w/w' + (d - v)*w/w'
3768+ * = V - (V - v)*w/w' + (d - v)*w/w'
3769+ * = V + (d - V)*w/w'
3770+ */
3771+ vslice = (s64 )(se -> deadline - avruntime );
3772+ vslice = div_s64 (vslice * old_weight , weight );
3773+ se -> deadline = avruntime + vslice ;
3774+ }
3775+
36693776static void reweight_entity (struct cfs_rq * cfs_rq , struct sched_entity * se ,
36703777 unsigned long weight )
36713778{
3672- unsigned long old_weight = se -> load . weight ;
3779+ bool curr = cfs_rq -> curr == se ;
36733780
36743781 if (se -> on_rq ) {
36753782 /* commit outstanding execution time */
3676- if (cfs_rq -> curr == se )
3783+ if (curr )
36773784 update_curr (cfs_rq );
36783785 else
3679- avg_vruntime_sub (cfs_rq , se );
3786+ __dequeue_entity (cfs_rq , se );
36803787 update_load_sub (& cfs_rq -> load , se -> load .weight );
36813788 }
36823789 dequeue_load_avg (cfs_rq , se );
36833790
3684- update_load_set (& se -> load , weight );
3685-
36863791 if (!se -> on_rq ) {
36873792 /*
36883793 * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
36893794 * we need to scale se->vlag when w_i changes.
36903795 */
3691- se -> vlag = div_s64 (se -> vlag * old_weight , weight );
3796+ se -> vlag = div_s64 (se -> vlag * se -> load . weight , weight );
36923797 } else {
3693- s64 deadline = se -> deadline - se -> vruntime ;
3694- /*
3695- * When the weight changes, the virtual time slope changes and
3696- * we should adjust the relative virtual deadline accordingly.
3697- */
3698- deadline = div_s64 (deadline * old_weight , weight );
3699- se -> deadline = se -> vruntime + deadline ;
3700- if (se != cfs_rq -> curr )
3701- min_deadline_cb_propagate (& se -> run_node , NULL );
3798+ reweight_eevdf (cfs_rq , se , weight );
37023799 }
37033800
3801+ update_load_set (& se -> load , weight );
3802+
37043803#ifdef CONFIG_SMP
37053804 do {
37063805 u32 divider = get_pelt_divider (& se -> avg );
@@ -3712,8 +3811,17 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
37123811 enqueue_load_avg (cfs_rq , se );
37133812 if (se -> on_rq ) {
37143813 update_load_add (& cfs_rq -> load , se -> load .weight );
3715- if (cfs_rq -> curr != se )
3716- avg_vruntime_add (cfs_rq , se );
3814+ if (!curr ) {
3815+ /*
3816+ * The entity's vruntime has been adjusted, so let's check
3817+ * whether the rq-wide min_vruntime needs updated too. Since
3818+ * the calculations above require stable min_vruntime rather
3819+ * than up-to-date one, we do the update at the end of the
3820+ * reweight process.
3821+ */
3822+ __enqueue_entity (cfs_rq , se );
3823+ update_min_vruntime (cfs_rq );
3824+ }
37173825 }
37183826}
37193827
@@ -3857,14 +3965,11 @@ static void update_cfs_group(struct sched_entity *se)
38573965
38583966#ifndef CONFIG_SMP
38593967 shares = READ_ONCE (gcfs_rq -> tg -> shares );
3860-
3861- if (likely (se -> load .weight == shares ))
3862- return ;
38633968#else
3864- shares = calc_group_shares (gcfs_rq );
3969+ shares = calc_group_shares (gcfs_rq );
38653970#endif
3866-
3867- reweight_entity (cfs_rq_of (se ), se , shares );
3971+ if ( unlikely ( se -> load . weight != shares ))
3972+ reweight_entity (cfs_rq_of (se ), se , shares );
38683973}
38693974
38703975#else /* CONFIG_FAIR_GROUP_SCHED */
@@ -11079,12 +11184,16 @@ static int should_we_balance(struct lb_env *env)
1107911184 continue ;
1108011185 }
1108111186
11082- /* Are we the first idle CPU? */
11187+ /*
11188+ * Are we the first idle core in a non-SMT domain or higher,
11189+ * or the first idle CPU in a SMT domain?
11190+ */
1108311191 return cpu == env -> dst_cpu ;
1108411192 }
1108511193
11086- if (idle_smt == env -> dst_cpu )
11087- return true;
11194+ /* Are we the first idle CPU with busy siblings? */
11195+ if (idle_smt != -1 )
11196+ return idle_smt == env -> dst_cpu ;
1108811197
1108911198 /* Are we the first CPU of this group ? */
1109011199 return group_balance_cpu (sg ) == env -> dst_cpu ;
0 commit comments