Skip to content

Commit cf43e6b

Browse files
committed
block: add scalable completion tracking of requests
For legacy block, we simply track them in the request queue. For blk-mq, we track them on a per-sw queue basis, which we can then sum up through the hardware queues and finally to a per device state. The stats are tracked in, roughly, 0.1s interval windows. Add sysfs files to display the stats. The feature is off by default, to avoid any extra overhead. In-kernel users of it can turn it on by setting QUEUE_FLAG_STATS in the queue flags. We currently don't turn it on if someone just reads any of the stats files, that is something we could add as well. Signed-off-by: Jens Axboe <[email protected]>
1 parent ebc4ff6 commit cf43e6b

File tree

10 files changed

+427
-3
lines changed

10 files changed

+427
-3
lines changed

block/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
66
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
77
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
8-
blk-lib.o blk-mq.o blk-mq-tag.o \
8+
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
99
blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \
1010
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
1111
badblocks.o partitions/

block/blk-core.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2464,6 +2464,11 @@ void blk_start_request(struct request *req)
24642464
{
24652465
blk_dequeue_request(req);
24662466

2467+
if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
2468+
blk_stat_set_issue_time(&req->issue_stat);
2469+
req->rq_flags |= RQF_STATS;
2470+
}
2471+
24672472
/*
24682473
* We are now handing the request to the hardware, initialize
24692474
* resid_len to full count and add the timeout handler.
@@ -2683,8 +2688,13 @@ EXPORT_SYMBOL_GPL(blk_unprep_request);
26832688
*/
26842689
void blk_finish_request(struct request *req, int error)
26852690
{
2691+
struct request_queue *q = req->q;
2692+
2693+
if (req->rq_flags & RQF_STATS)
2694+
blk_stat_add(&q->rq_stats[rq_data_dir(req)], req);
2695+
26862696
if (req->rq_flags & RQF_QUEUED)
2687-
blk_queue_end_tag(req->q, req);
2697+
blk_queue_end_tag(q, req);
26882698

26892699
BUG_ON(blk_queued_rq(req));
26902700

@@ -2704,7 +2714,7 @@ void blk_finish_request(struct request *req, int error)
27042714
if (blk_bidi_rq(req))
27052715
__blk_put_request(req->next_rq->q, req->next_rq);
27062716

2707-
__blk_put_request(req->q, req);
2717+
__blk_put_request(q, req);
27082718
}
27092719
}
27102720
EXPORT_SYMBOL(blk_finish_request);

block/blk-mq-sysfs.c

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,47 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
259259
return ret;
260260
}
261261

262+
static void blk_mq_stat_clear(struct blk_mq_hw_ctx *hctx)
263+
{
264+
struct blk_mq_ctx *ctx;
265+
unsigned int i;
266+
267+
hctx_for_each_ctx(hctx, ctx, i) {
268+
blk_stat_init(&ctx->stat[BLK_STAT_READ]);
269+
blk_stat_init(&ctx->stat[BLK_STAT_WRITE]);
270+
}
271+
}
272+
273+
static ssize_t blk_mq_hw_sysfs_stat_store(struct blk_mq_hw_ctx *hctx,
274+
const char *page, size_t count)
275+
{
276+
blk_mq_stat_clear(hctx);
277+
return count;
278+
}
279+
280+
static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre)
281+
{
282+
return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n",
283+
pre, (long long) stat->nr_samples,
284+
(long long) stat->mean, (long long) stat->min,
285+
(long long) stat->max);
286+
}
287+
288+
static ssize_t blk_mq_hw_sysfs_stat_show(struct blk_mq_hw_ctx *hctx, char *page)
289+
{
290+
struct blk_rq_stat stat[2];
291+
ssize_t ret;
292+
293+
blk_stat_init(&stat[BLK_STAT_READ]);
294+
blk_stat_init(&stat[BLK_STAT_WRITE]);
295+
296+
blk_hctx_stat_get(hctx, stat);
297+
298+
ret = print_stat(page, &stat[BLK_STAT_READ], "read :");
299+
ret += print_stat(page + ret, &stat[BLK_STAT_WRITE], "write:");
300+
return ret;
301+
}
302+
262303
static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = {
263304
.attr = {.name = "dispatched", .mode = S_IRUGO },
264305
.show = blk_mq_sysfs_dispatched_show,
@@ -317,6 +358,11 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = {
317358
.show = blk_mq_hw_sysfs_poll_show,
318359
.store = blk_mq_hw_sysfs_poll_store,
319360
};
361+
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_stat = {
362+
.attr = {.name = "stats", .mode = S_IRUGO | S_IWUSR },
363+
.show = blk_mq_hw_sysfs_stat_show,
364+
.store = blk_mq_hw_sysfs_stat_store,
365+
};
320366

321367
static struct attribute *default_hw_ctx_attrs[] = {
322368
&blk_mq_hw_sysfs_queued.attr,
@@ -327,6 +373,7 @@ static struct attribute *default_hw_ctx_attrs[] = {
327373
&blk_mq_hw_sysfs_cpus.attr,
328374
&blk_mq_hw_sysfs_active.attr,
329375
&blk_mq_hw_sysfs_poll.attr,
376+
&blk_mq_hw_sysfs_stat.attr,
330377
NULL,
331378
};
332379

block/blk-mq.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "blk.h"
3131
#include "blk-mq.h"
3232
#include "blk-mq-tag.h"
33+
#include "blk-stat.h"
3334

3435
static DEFINE_MUTEX(all_q_mutex);
3536
static LIST_HEAD(all_q_list);
@@ -403,10 +404,27 @@ static void blk_mq_ipi_complete_request(struct request *rq)
403404
put_cpu();
404405
}
405406

407+
static void blk_mq_stat_add(struct request *rq)
408+
{
409+
if (rq->rq_flags & RQF_STATS) {
410+
/*
411+
* We could rq->mq_ctx here, but there's less of a risk
412+
* of races if we have the completion event add the stats
413+
* to the local software queue.
414+
*/
415+
struct blk_mq_ctx *ctx;
416+
417+
ctx = __blk_mq_get_ctx(rq->q, raw_smp_processor_id());
418+
blk_stat_add(&ctx->stat[rq_data_dir(rq)], rq);
419+
}
420+
}
421+
406422
static void __blk_mq_complete_request(struct request *rq)
407423
{
408424
struct request_queue *q = rq->q;
409425

426+
blk_mq_stat_add(rq);
427+
410428
if (!q->softirq_done_fn)
411429
blk_mq_end_request(rq, rq->errors);
412430
else
@@ -450,6 +468,11 @@ void blk_mq_start_request(struct request *rq)
450468
if (unlikely(blk_bidi_rq(rq)))
451469
rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq);
452470

471+
if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
472+
blk_stat_set_issue_time(&rq->issue_stat);
473+
rq->rq_flags |= RQF_STATS;
474+
}
475+
453476
blk_add_timer(rq);
454477

455478
/*
@@ -1784,6 +1807,8 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
17841807
spin_lock_init(&__ctx->lock);
17851808
INIT_LIST_HEAD(&__ctx->rq_list);
17861809
__ctx->queue = q;
1810+
blk_stat_init(&__ctx->stat[BLK_STAT_READ]);
1811+
blk_stat_init(&__ctx->stat[BLK_STAT_WRITE]);
17871812

17881813
/* If the cpu isn't online, the cpu is mapped to first hctx */
17891814
if (!cpu_online(i))

block/blk-mq.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#ifndef INT_BLK_MQ_H
22
#define INT_BLK_MQ_H
33

4+
#include "blk-stat.h"
5+
46
struct blk_mq_tag_set;
57

68
struct blk_mq_ctx {
@@ -18,6 +20,7 @@ struct blk_mq_ctx {
1820

1921
/* incremented at completion time */
2022
unsigned long ____cacheline_aligned_in_smp rq_completed[2];
23+
struct blk_rq_stat stat[2];
2124

2225
struct request_queue *queue;
2326
struct kobject kobj;

0 commit comments

Comments
 (0)