Skip to content

Commit 0d2602c

Browse files
committed
blk-mq: improve support for shared tags maps
This adds support for active queue tracking, meaning that the blk-mq tagging maintains a count of active users of a tag set. This allows us to maintain a notion of fairness between users, so that we can distribute the tag depth evenly without starving some users while allowing others to try unfair deep queues. If sharing of a tag set is detected, each hardware queue will track the depth of its own queue. And if this exceeds the total depth divided by the number of active queues, the user is actively throttled down. The active queue count is done lazily to avoid bouncing that data between submitter and completer. Each hardware queue gets marked active when it allocates its first tag, and gets marked inactive when 1) the last tag is cleared, and 2) the queue timeout grace period has passed. Signed-off-by: Jens Axboe <[email protected]>
1 parent 1f236ab commit 0d2602c

File tree

9 files changed

+236
-27
lines changed

9 files changed

+236
-27
lines changed

block/blk-mq-sysfs.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,11 @@ static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page)
208208
return blk_mq_tag_sysfs_show(hctx->tags, page);
209209
}
210210

211+
static ssize_t blk_mq_hw_sysfs_active_show(struct blk_mq_hw_ctx *hctx, char *page)
212+
{
213+
return sprintf(page, "%u\n", atomic_read(&hctx->nr_active));
214+
}
215+
211216
static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
212217
{
213218
unsigned int i, first = 1;
@@ -267,6 +272,10 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_dispatched = {
267272
.attr = {.name = "dispatched", .mode = S_IRUGO },
268273
.show = blk_mq_hw_sysfs_dispatched_show,
269274
};
275+
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_active = {
276+
.attr = {.name = "active", .mode = S_IRUGO },
277+
.show = blk_mq_hw_sysfs_active_show,
278+
};
270279
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = {
271280
.attr = {.name = "pending", .mode = S_IRUGO },
272281
.show = blk_mq_hw_sysfs_rq_list_show,
@@ -287,6 +296,7 @@ static struct attribute *default_hw_ctx_attrs[] = {
287296
&blk_mq_hw_sysfs_pending.attr,
288297
&blk_mq_hw_sysfs_tags.attr,
289298
&blk_mq_hw_sysfs_cpus.attr,
299+
&blk_mq_hw_sysfs_active.attr,
290300
NULL,
291301
};
292302

block/blk-mq-tag.c

Lines changed: 95 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,12 @@
77
#include "blk-mq.h"
88
#include "blk-mq-tag.h"
99

10-
void blk_mq_wait_for_tags(struct blk_mq_tags *tags, struct blk_mq_hw_ctx *hctx,
11-
bool reserved)
10+
void blk_mq_wait_for_tags(struct blk_mq_hw_ctx *hctx, bool reserved)
1211
{
1312
int tag, zero = 0;
1413

15-
tag = blk_mq_get_tag(tags, hctx, &zero, __GFP_WAIT, reserved);
16-
blk_mq_put_tag(tags, tag, &zero);
14+
tag = blk_mq_get_tag(hctx, &zero, __GFP_WAIT, reserved);
15+
blk_mq_put_tag(hctx, tag, &zero);
1716
}
1817

1918
static bool bt_has_free_tags(struct blk_mq_bitmap_tags *bt)
@@ -40,6 +39,84 @@ bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
4039
return bt_has_free_tags(&tags->bitmap_tags);
4140
}
4241

42+
static inline void bt_index_inc(unsigned int *index)
43+
{
44+
*index = (*index + 1) & (BT_WAIT_QUEUES - 1);
45+
}
46+
47+
/*
48+
* If a previously inactive queue goes active, bump the active user count.
49+
*/
50+
bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
51+
{
52+
if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
53+
!test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
54+
atomic_inc(&hctx->tags->active_queues);
55+
56+
return true;
57+
}
58+
59+
/*
60+
* If a previously busy queue goes inactive, potential waiters could now
61+
* be allowed to queue. Wake them up and check.
62+
*/
63+
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
64+
{
65+
struct blk_mq_tags *tags = hctx->tags;
66+
struct blk_mq_bitmap_tags *bt;
67+
int i, wake_index;
68+
69+
if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
70+
return;
71+
72+
atomic_dec(&tags->active_queues);
73+
74+
/*
75+
* Will only throttle depth on non-reserved tags
76+
*/
77+
bt = &tags->bitmap_tags;
78+
wake_index = bt->wake_index;
79+
for (i = 0; i < BT_WAIT_QUEUES; i++) {
80+
struct bt_wait_state *bs = &bt->bs[wake_index];
81+
82+
if (waitqueue_active(&bs->wait))
83+
wake_up(&bs->wait);
84+
85+
bt_index_inc(&wake_index);
86+
}
87+
}
88+
89+
/*
90+
* For shared tag users, we track the number of currently active users
91+
* and attempt to provide a fair share of the tag depth for each of them.
92+
*/
93+
static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
94+
struct blk_mq_bitmap_tags *bt)
95+
{
96+
unsigned int depth, users;
97+
98+
if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED))
99+
return true;
100+
if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
101+
return true;
102+
103+
/*
104+
* Don't try dividing an ant
105+
*/
106+
if (bt->depth == 1)
107+
return true;
108+
109+
users = atomic_read(&hctx->tags->active_queues);
110+
if (!users)
111+
return true;
112+
113+
/*
114+
* Allow at least some tags
115+
*/
116+
depth = max((bt->depth + users - 1) / users, 4U);
117+
return atomic_read(&hctx->nr_active) < depth;
118+
}
119+
43120
static int __bt_get_word(struct blk_mq_bitmap *bm, unsigned int last_tag)
44121
{
45122
int tag, org_last_tag, end;
@@ -78,11 +155,15 @@ static int __bt_get_word(struct blk_mq_bitmap *bm, unsigned int last_tag)
78155
* multiple users will tend to stick to different cachelines, at least
79156
* until the map is exhausted.
80157
*/
81-
static int __bt_get(struct blk_mq_bitmap_tags *bt, unsigned int *tag_cache)
158+
static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
159+
unsigned int *tag_cache)
82160
{
83161
unsigned int last_tag, org_last_tag;
84162
int index, i, tag;
85163

164+
if (!hctx_may_queue(hctx, bt))
165+
return -1;
166+
86167
last_tag = org_last_tag = *tag_cache;
87168
index = TAG_TO_INDEX(bt, last_tag);
88169

@@ -117,11 +198,6 @@ static int __bt_get(struct blk_mq_bitmap_tags *bt, unsigned int *tag_cache)
117198
return tag;
118199
}
119200

120-
static inline void bt_index_inc(unsigned int *index)
121-
{
122-
*index = (*index + 1) & (BT_WAIT_QUEUES - 1);
123-
}
124-
125201
static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt,
126202
struct blk_mq_hw_ctx *hctx)
127203
{
@@ -142,7 +218,7 @@ static int bt_get(struct blk_mq_bitmap_tags *bt, struct blk_mq_hw_ctx *hctx,
142218
DEFINE_WAIT(wait);
143219
int tag;
144220

145-
tag = __bt_get(bt, last_tag);
221+
tag = __bt_get(hctx, bt, last_tag);
146222
if (tag != -1)
147223
return tag;
148224

@@ -156,7 +232,7 @@ static int bt_get(struct blk_mq_bitmap_tags *bt, struct blk_mq_hw_ctx *hctx,
156232
was_empty = list_empty(&wait.task_list);
157233
prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE);
158234

159-
tag = __bt_get(bt, last_tag);
235+
tag = __bt_get(hctx, bt, last_tag);
160236
if (tag != -1)
161237
break;
162238

@@ -200,14 +276,13 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_tags *tags,
200276
return tag;
201277
}
202278

203-
unsigned int blk_mq_get_tag(struct blk_mq_tags *tags,
204-
struct blk_mq_hw_ctx *hctx, unsigned int *last_tag,
279+
unsigned int blk_mq_get_tag(struct blk_mq_hw_ctx *hctx, unsigned int *last_tag,
205280
gfp_t gfp, bool reserved)
206281
{
207282
if (!reserved)
208-
return __blk_mq_get_tag(tags, hctx, last_tag, gfp);
283+
return __blk_mq_get_tag(hctx->tags, hctx, last_tag, gfp);
209284

210-
return __blk_mq_get_reserved_tag(tags, gfp);
285+
return __blk_mq_get_reserved_tag(hctx->tags, gfp);
211286
}
212287

213288
static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt)
@@ -265,9 +340,11 @@ static void __blk_mq_put_reserved_tag(struct blk_mq_tags *tags,
265340
bt_clear_tag(&tags->breserved_tags, tag);
266341
}
267342

268-
void blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag,
343+
void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
269344
unsigned int *last_tag)
270345
{
346+
struct blk_mq_tags *tags = hctx->tags;
347+
271348
if (tag >= tags->nr_reserved_tags) {
272349
const int real_tag = tag - tags->nr_reserved_tags;
273350

@@ -465,6 +542,7 @@ ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
465542
res = bt_unused_tags(&tags->breserved_tags);
466543

467544
page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", free, res);
545+
page += sprintf(page, "active_queues=%u\n", atomic_read(&tags->active_queues));
468546

469547
return page - orig_page;
470548
}

block/blk-mq-tag.h

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ struct blk_mq_tags {
3838
unsigned int nr_tags;
3939
unsigned int nr_reserved_tags;
4040

41+
atomic_t active_queues;
42+
4143
struct blk_mq_bitmap_tags bitmap_tags;
4244
struct blk_mq_bitmap_tags breserved_tags;
4345

@@ -49,9 +51,9 @@ struct blk_mq_tags {
4951
extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node);
5052
extern void blk_mq_free_tags(struct blk_mq_tags *tags);
5153

52-
extern unsigned int blk_mq_get_tag(struct blk_mq_tags *tags, struct blk_mq_hw_ctx *hctx, unsigned int *last_tag, gfp_t gfp, bool reserved);
53-
extern void blk_mq_wait_for_tags(struct blk_mq_tags *tags, struct blk_mq_hw_ctx *hctx, bool reserved);
54-
extern void blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag, unsigned int *last_tag);
54+
extern unsigned int blk_mq_get_tag(struct blk_mq_hw_ctx *hctx, unsigned int *last_tag, gfp_t gfp, bool reserved);
55+
extern void blk_mq_wait_for_tags(struct blk_mq_hw_ctx *hctx, bool reserved);
56+
extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag, unsigned int *last_tag);
5557
extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data, unsigned long *), void *data);
5658
extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
5759
extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
@@ -68,4 +70,23 @@ enum {
6870
BLK_MQ_TAG_MAX = BLK_MQ_TAG_FAIL - 1,
6971
};
7072

73+
extern bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *);
74+
extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *);
75+
76+
static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
77+
{
78+
if (!(hctx->flags & BLK_MQ_F_TAG_SHARED))
79+
return false;
80+
81+
return __blk_mq_tag_busy(hctx);
82+
}
83+
84+
static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
85+
{
86+
if (!(hctx->flags & BLK_MQ_F_TAG_SHARED))
87+
return;
88+
89+
__blk_mq_tag_idle(hctx);
90+
}
91+
7192
#endif

0 commit comments

Comments
 (0)