Skip to content

Commit b0853ab

Browse files
htejunaxboe
authored andcommitted
blk-iocost: revamp in-period donation snapbacks
When the margin drops below the minimum on a donating iocg, donation is immediately canceled in full. There are a couple shortcomings with the current behavior. * It's abrupt. A small temporary budget deficit can lead to a wide swing in weight allocation and a large surplus. * It's open coded in the issue path but not implemented for the merge path. A series of merges at a low inuse can make the iocg incur debts and stall incorrectly. This patch reimplements in-period donation snapbacks so that * inuse adjustment and cost calculations are factored into adjust_inuse_and_calc_cost() which is called from both the issue and merge paths. * Snapbacks are more gradual. It occurs in quarter steps. * A snapback triggers if the margin goes below the low threshold and is lower than the budget at the time of the last adjustment. * For the above, __propagate_weights() stores the margin in iocg->saved_margin. Move iocg->last_inuse storing together into __propagate_weights() for consistency. * Full snapback is guaranteed when there are waiters. * With precise donation and gradual snapbacks, inuse adjustments are now a lot more effective and the value of scaling inuse on weight changes isn't clear. Removed inuse scaling from weight_update(). Signed-off-by: Tejun Heo <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent f1de243 commit b0853ab

File tree

1 file changed

+96
-37
lines changed

1 file changed

+96
-37
lines changed

block/blk-iocost.c

Lines changed: 96 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,8 @@ enum {
226226
MARGIN_TARGET_PCT = 50,
227227
MARGIN_MAX_PCT = 100,
228228

229+
INUSE_ADJ_STEP_PCT = 25,
230+
229231
/* Have some play in timer operations */
230232
TIMER_SLACK_PCT = 1,
231233

@@ -443,12 +445,17 @@ struct ioc_gq {
443445
*
444446
* `last_inuse` remembers `inuse` while an iocg is idle to persist
445447
* surplus adjustments.
448+
*
449+
* `inuse` may be adjusted dynamically during period. `saved_*` are used
450+
* to determine and track adjustments.
446451
*/
447452
u32 cfg_weight;
448453
u32 weight;
449454
u32 active;
450455
u32 inuse;
456+
451457
u32 last_inuse;
458+
s64 saved_margin;
452459

453460
sector_t cursor; /* to detect randio */
454461

@@ -934,9 +941,11 @@ static void ioc_start_period(struct ioc *ioc, struct ioc_now *now)
934941

935942
/*
936943
* Update @iocg's `active` and `inuse` to @active and @inuse, update level
937-
* weight sums and propagate upwards accordingly.
944+
* weight sums and propagate upwards accordingly. If @save, the current margin
945+
* is saved to be used as reference for later inuse in-period adjustments.
938946
*/
939-
static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse)
947+
static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse,
948+
bool save, struct ioc_now *now)
940949
{
941950
struct ioc *ioc = iocg->ioc;
942951
int lvl;
@@ -945,6 +954,10 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse)
945954

946955
inuse = clamp_t(u32, inuse, 1, active);
947956

957+
iocg->last_inuse = iocg->inuse;
958+
if (save)
959+
iocg->saved_margin = now->vnow - atomic64_read(&iocg->vtime);
960+
948961
if (active == iocg->active && inuse == iocg->inuse)
949962
return;
950963

@@ -996,9 +1009,10 @@ static void commit_weights(struct ioc *ioc)
9961009
}
9971010
}
9981011

999-
static void propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse)
1012+
static void propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse,
1013+
bool save, struct ioc_now *now)
10001014
{
1001-
__propagate_weights(iocg, active, inuse);
1015+
__propagate_weights(iocg, active, inuse, save, now);
10021016
commit_weights(iocg->ioc);
10031017
}
10041018

@@ -1082,7 +1096,7 @@ static u32 current_hweight_max(struct ioc_gq *iocg)
10821096
return max_t(u32, hwm, 1);
10831097
}
10841098

1085-
static void weight_updated(struct ioc_gq *iocg)
1099+
static void weight_updated(struct ioc_gq *iocg, struct ioc_now *now)
10861100
{
10871101
struct ioc *ioc = iocg->ioc;
10881102
struct blkcg_gq *blkg = iocg_to_blkg(iocg);
@@ -1093,9 +1107,7 @@ static void weight_updated(struct ioc_gq *iocg)
10931107

10941108
weight = iocg->cfg_weight ?: iocc->dfl_weight;
10951109
if (weight != iocg->weight && iocg->active)
1096-
propagate_weights(iocg, weight,
1097-
DIV64_U64_ROUND_UP((u64)iocg->inuse * weight,
1098-
iocg->weight));
1110+
propagate_weights(iocg, weight, iocg->inuse, true, now);
10991111
iocg->weight = weight;
11001112
}
11011113

@@ -1165,8 +1177,9 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now)
11651177
*/
11661178
iocg->hweight_gen = atomic_read(&ioc->hweight_gen) - 1;
11671179
list_add(&iocg->active_list, &ioc->active_iocgs);
1180+
11681181
propagate_weights(iocg, iocg->weight,
1169-
iocg->last_inuse ?: iocg->weight);
1182+
iocg->last_inuse ?: iocg->weight, true, now);
11701183

11711184
TRACE_IOCG_PATH(iocg_activate, iocg, now,
11721185
last_period, cur_period, vtime);
@@ -1789,7 +1802,7 @@ static void transfer_surpluses(struct list_head *surpluses, struct ioc_now *now)
17891802
inuse = DIV64_U64_ROUND_UP(
17901803
parent->child_adjusted_sum * iocg->hweight_after_donation,
17911804
parent->hweight_inuse);
1792-
__propagate_weights(iocg, iocg->active, inuse);
1805+
__propagate_weights(iocg, iocg->active, inuse, true, now);
17931806
}
17941807

17951808
/* walk list should be dissolved after use */
@@ -1844,8 +1857,7 @@ static void ioc_timer_fn(struct timer_list *timer)
18441857
iocg_kick_waitq(iocg, true, &now);
18451858
} else if (iocg_is_idle(iocg)) {
18461859
/* no waiter and idle, deactivate */
1847-
iocg->last_inuse = iocg->inuse;
1848-
__propagate_weights(iocg, 0, 0);
1860+
__propagate_weights(iocg, 0, 0, false, &now);
18491861
list_del_init(&iocg->active_list);
18501862
}
18511863

@@ -1925,7 +1937,7 @@ static void ioc_timer_fn(struct timer_list *timer)
19251937
list_add(&iocg->surplus_list, &surpluses);
19261938
} else {
19271939
__propagate_weights(iocg, iocg->active,
1928-
iocg->active);
1940+
iocg->active, true, &now);
19291941
nr_shortages++;
19301942
}
19311943
} else {
@@ -2055,6 +2067,50 @@ static void ioc_timer_fn(struct timer_list *timer)
20552067
spin_unlock_irq(&ioc->lock);
20562068
}
20572069

2070+
static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime,
2071+
u64 abs_cost, struct ioc_now *now)
2072+
{
2073+
struct ioc *ioc = iocg->ioc;
2074+
struct ioc_margins *margins = &ioc->margins;
2075+
u32 adj_step = DIV_ROUND_UP(iocg->active * INUSE_ADJ_STEP_PCT, 100);
2076+
u32 hwi;
2077+
s64 margin;
2078+
u64 cost, new_inuse;
2079+
2080+
current_hweight(iocg, NULL, &hwi);
2081+
cost = abs_cost_to_cost(abs_cost, hwi);
2082+
margin = now->vnow - vtime - cost;
2083+
2084+
/*
2085+
* We only increase inuse during period and do so iff the margin has
2086+
* deteriorated since the previous adjustment.
2087+
*/
2088+
if (margin >= iocg->saved_margin || margin >= margins->low ||
2089+
iocg->inuse == iocg->active)
2090+
return cost;
2091+
2092+
spin_lock_irq(&ioc->lock);
2093+
2094+
/* we own inuse only when @iocg is in the normal active state */
2095+
if (list_empty(&iocg->active_list)) {
2096+
spin_unlock_irq(&ioc->lock);
2097+
return cost;
2098+
}
2099+
2100+
/* bump up inuse till @abs_cost fits in the existing budget */
2101+
new_inuse = iocg->inuse;
2102+
do {
2103+
new_inuse = new_inuse + adj_step;
2104+
propagate_weights(iocg, iocg->active, new_inuse, true, now);
2105+
current_hweight(iocg, NULL, &hwi);
2106+
cost = abs_cost_to_cost(abs_cost, hwi);
2107+
} while (time_after64(vtime + cost, now->vnow) &&
2108+
iocg->inuse != iocg->active);
2109+
2110+
spin_unlock_irq(&ioc->lock);
2111+
return cost;
2112+
}
2113+
20582114
static void calc_vtime_cost_builtin(struct bio *bio, struct ioc_gq *iocg,
20592115
bool is_merge, u64 *costp)
20602116
{
@@ -2136,7 +2192,6 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
21362192
struct ioc_gq *iocg = blkg_to_iocg(blkg);
21372193
struct ioc_now now;
21382194
struct iocg_wait wait;
2139-
u32 hw_active, hw_inuse;
21402195
u64 abs_cost, cost, vtime;
21412196
bool use_debt, ioc_locked;
21422197
unsigned long flags;
@@ -2154,21 +2209,8 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
21542209
return;
21552210

21562211
iocg->cursor = bio_end_sector(bio);
2157-
21582212
vtime = atomic64_read(&iocg->vtime);
2159-
current_hweight(iocg, &hw_active, &hw_inuse);
2160-
2161-
if (hw_inuse < hw_active &&
2162-
time_after_eq64(vtime + ioc->margins.min, now.vnow)) {
2163-
TRACE_IOCG_PATH(inuse_reset, iocg, &now,
2164-
iocg->inuse, iocg->weight, hw_inuse, hw_active);
2165-
spin_lock_irq(&ioc->lock);
2166-
propagate_weights(iocg, iocg->weight, iocg->weight);
2167-
spin_unlock_irq(&ioc->lock);
2168-
current_hweight(iocg, &hw_active, &hw_inuse);
2169-
}
2170-
2171-
cost = abs_cost_to_cost(abs_cost, hw_inuse);
2213+
cost = adjust_inuse_and_calc_cost(iocg, vtime, abs_cost, &now);
21722214

21732215
/*
21742216
* If no one's waiting and within budget, issue right away. The
@@ -2190,7 +2232,7 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
21902232
*/
21912233
use_debt = bio_issue_as_root_blkg(bio) || fatal_signal_pending(current);
21922234
ioc_locked = use_debt || READ_ONCE(iocg->abs_vdebt);
2193-
2235+
retry_lock:
21942236
iocg_lock(iocg, ioc_locked, &flags);
21952237

21962238
/*
@@ -2232,6 +2274,17 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
22322274
return;
22332275
}
22342276

2277+
/* guarantee that iocgs w/ waiters have maximum inuse */
2278+
if (iocg->inuse != iocg->active) {
2279+
if (!ioc_locked) {
2280+
iocg_unlock(iocg, false, &flags);
2281+
ioc_locked = true;
2282+
goto retry_lock;
2283+
}
2284+
propagate_weights(iocg, iocg->active, iocg->active, true,
2285+
&now);
2286+
}
2287+
22352288
/*
22362289
* Append self to the waitq and schedule the wakeup timer if we're
22372290
* the first waiter. The timer duration is calculated based on the
@@ -2274,8 +2327,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
22742327
struct ioc *ioc = iocg->ioc;
22752328
sector_t bio_end = bio_end_sector(bio);
22762329
struct ioc_now now;
2277-
u32 hw_inuse;
2278-
u64 abs_cost, cost;
2330+
u64 vtime, abs_cost, cost;
22792331
unsigned long flags;
22802332

22812333
/* bypass if disabled or for root cgroup */
@@ -2287,8 +2339,9 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
22872339
return;
22882340

22892341
ioc_now(ioc, &now);
2290-
current_hweight(iocg, NULL, &hw_inuse);
2291-
cost = abs_cost_to_cost(abs_cost, hw_inuse);
2342+
2343+
vtime = atomic64_read(&iocg->vtime);
2344+
cost = adjust_inuse_and_calc_cost(iocg, vtime, abs_cost, &now);
22922345

22932346
/* update cursor if backmerging into the request at the cursor */
22942347
if (blk_rq_pos(rq) < bio_end &&
@@ -2530,7 +2583,7 @@ static void ioc_pd_init(struct blkg_policy_data *pd)
25302583
}
25312584

25322585
spin_lock_irqsave(&ioc->lock, flags);
2533-
weight_updated(iocg);
2586+
weight_updated(iocg, &now);
25342587
spin_unlock_irqrestore(&ioc->lock, flags);
25352588
}
25362589

@@ -2544,7 +2597,10 @@ static void ioc_pd_free(struct blkg_policy_data *pd)
25442597
spin_lock_irqsave(&ioc->lock, flags);
25452598

25462599
if (!list_empty(&iocg->active_list)) {
2547-
propagate_weights(iocg, 0, 0);
2600+
struct ioc_now now;
2601+
2602+
ioc_now(ioc, &now);
2603+
propagate_weights(iocg, 0, 0, false, &now);
25482604
list_del_init(&iocg->active_list);
25492605
}
25502606

@@ -2612,6 +2668,7 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
26122668
struct blkcg *blkcg = css_to_blkcg(of_css(of));
26132669
struct ioc_cgrp *iocc = blkcg_to_iocc(blkcg);
26142670
struct blkg_conf_ctx ctx;
2671+
struct ioc_now now;
26152672
struct ioc_gq *iocg;
26162673
u32 v;
26172674
int ret;
@@ -2632,7 +2689,8 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
26322689

26332690
if (iocg) {
26342691
spin_lock_irq(&iocg->ioc->lock);
2635-
weight_updated(iocg);
2692+
ioc_now(iocg->ioc, &now);
2693+
weight_updated(iocg, &now);
26362694
spin_unlock_irq(&iocg->ioc->lock);
26372695
}
26382696
}
@@ -2658,7 +2716,8 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
26582716

26592717
spin_lock(&iocg->ioc->lock);
26602718
iocg->cfg_weight = v * WEIGHT_ONE;
2661-
weight_updated(iocg);
2719+
ioc_now(iocg->ioc, &now);
2720+
weight_updated(iocg, &now);
26622721
spin_unlock(&iocg->ioc->lock);
26632722

26642723
blkg_conf_finish(&ctx);

0 commit comments

Comments
 (0)