Skip to content

Commit 8cbff9d

Browse files
author
Ming Lei
committed
block: unifying elevator change
JIRA: https://issues.redhat.com/browse/RHEL-112997 commit 1e44bed Author: Ming Lei <[email protected]> Date: Mon May 5 22:17:54 2025 +0800 block: unifying elevator change Elevator change is one well-define behavior: - tear down current elevator if it exists - setup new elevator It is supposed to cover any case for changing elevator by single internal API, typically the following cases: - setup default elevator in add_disk() - switch to none in del_disk() - reset elevator in blk_mq_update_nr_hw_queues() - switch elevator in sysfs `store` elevator attribute This patch uses elevator_change() to cover all above cases: - every elevator switch is serialized with each other: add_disk/del_disk/ store elevator is serialized already, blk_mq_update_nr_hw_queues() uses srcu for syncing with the other three cases - for both add_disk()/del_disk(), queue freeze works at atomic mode or has been froze, so the freeze in elevator_change() won't add extra delay - `struct elev_change_ctx` instance holds any info for changing elevator Reviewed-by: Christoph Hellwig <[email protected]> Reviewed-by: Nilay Shroff <[email protected]> Reviewed-by: Hannes Reinecke <[email protected]> Signed-off-by: Ming Lei <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jens Axboe <[email protected]> Signed-off-by: Ming Lei <[email protected]>
1 parent 6d8d503 commit 8cbff9d

File tree

4 files changed

+67
-101
lines changed

4 files changed

+67
-101
lines changed

block/blk-sysfs.c

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -876,14 +876,9 @@ int blk_register_queue(struct gendisk *disk)
876876
if (ret)
877877
goto out_unregister_ia_ranges;
878878

879+
if (queue_is_mq(q))
880+
elevator_set_default(q);
879881
mutex_lock(&q->elevator_lock);
880-
if (q->elevator) {
881-
ret = elv_register_queue(q, false);
882-
if (ret) {
883-
mutex_unlock(&q->elevator_lock);
884-
goto out_crypto_sysfs_unregister;
885-
}
886-
}
887882
wbt_enable_default(disk);
888883
mutex_unlock(&q->elevator_lock);
889884

@@ -909,8 +904,6 @@ int blk_register_queue(struct gendisk *disk)
909904

910905
return ret;
911906

912-
out_crypto_sysfs_unregister:
913-
blk_crypto_sysfs_unregister(disk);
914907
out_unregister_ia_ranges:
915908
disk_unregister_independent_access_ranges(disk);
916909
out_debugfs_remove:
@@ -958,9 +951,11 @@ void blk_unregister_queue(struct gendisk *disk)
958951
blk_mq_sysfs_unregister(disk);
959952
blk_crypto_sysfs_unregister(disk);
960953

961-
mutex_lock(&q->elevator_lock);
962-
elv_unregister_queue(q);
963-
mutex_unlock(&q->elevator_lock);
954+
if (queue_is_mq(q)) {
955+
blk_mq_quiesce_queue(q);
956+
elevator_set_none(q);
957+
blk_mq_unquiesce_queue(q);
958+
}
964959

965960
mutex_lock(&q->sysfs_lock);
966961
disk_unregister_independent_access_ranges(disk);

block/blk.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -323,9 +323,8 @@ bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
323323
bool blk_insert_flush(struct request *rq);
324324

325325
void elv_update_nr_hw_queues(struct request_queue *q);
326-
void elevator_exit(struct request_queue *q);
327-
int elv_register_queue(struct request_queue *q, bool uevent);
328-
void elv_unregister_queue(struct request_queue *q);
326+
void elevator_set_default(struct request_queue *q);
327+
void elevator_set_none(struct request_queue *q);
329328

330329
ssize_t part_size_show(struct device *dev, struct device_attribute *attr,
331330
char *buf);

block/elevator.c

Lines changed: 53 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ static void elevator_release(struct kobject *kobj)
154154
kfree(e);
155155
}
156156

157-
void elevator_exit(struct request_queue *q)
157+
static void elevator_exit(struct request_queue *q)
158158
{
159159
struct elevator_queue *e = q->elevator;
160160

@@ -458,7 +458,7 @@ static const struct kobj_type elv_ktype = {
458458
.release = elevator_release,
459459
};
460460

461-
int elv_register_queue(struct request_queue *q, bool uevent)
461+
static int elv_register_queue(struct request_queue *q, bool uevent)
462462
{
463463
struct elevator_queue *e = q->elevator;
464464
int error;
@@ -488,7 +488,7 @@ int elv_register_queue(struct request_queue *q, bool uevent)
488488
return error;
489489
}
490490

491-
void elv_unregister_queue(struct request_queue *q)
491+
static void elv_unregister_queue(struct request_queue *q)
492492
{
493493
struct elevator_queue *e = q->elevator;
494494

@@ -561,66 +561,6 @@ void elv_unregister(struct elevator_type *e)
561561
}
562562
EXPORT_SYMBOL_GPL(elv_unregister);
563563

564-
/*
565-
* For single queue devices, default to using mq-deadline. If we have multiple
566-
* queues or mq-deadline is not available, default to "none".
567-
*/
568-
static struct elevator_type *elevator_get_default(struct request_queue *q)
569-
{
570-
if (q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
571-
return NULL;
572-
573-
if (q->nr_hw_queues != 1 &&
574-
!blk_mq_is_shared_tags(q->tag_set->flags))
575-
return NULL;
576-
577-
return elevator_find_get("mq-deadline");
578-
}
579-
580-
/*
581-
* Use the default elevator settings. If the chosen elevator initialization
582-
* fails, fall back to the "none" elevator (no elevator).
583-
*/
584-
void elevator_init_mq(struct request_queue *q)
585-
{
586-
struct elevator_type *e;
587-
unsigned int memflags;
588-
int err;
589-
590-
WARN_ON_ONCE(blk_queue_registered(q));
591-
592-
if (unlikely(q->elevator))
593-
return;
594-
595-
e = elevator_get_default(q);
596-
if (!e)
597-
return;
598-
599-
/*
600-
* We are called before adding disk, when there isn't any FS I/O,
601-
* so freezing queue plus canceling dispatch work is enough to
602-
* drain any dispatch activities originated from passthrough
603-
* requests, then no need to quiesce queue which may add long boot
604-
* latency, especially when lots of disks are involved.
605-
*
606-
* Disk isn't added yet, so verifying queue lock only manually.
607-
*/
608-
memflags = blk_mq_freeze_queue(q);
609-
610-
blk_mq_cancel_work_sync(q);
611-
612-
err = blk_mq_init_sched(q, e);
613-
614-
blk_mq_unfreeze_queue(q, memflags);
615-
616-
if (err) {
617-
pr_warn("\"%s\" elevator initialization failed, "
618-
"falling back to \"none\"\n", e->elevator_name);
619-
}
620-
621-
elevator_put(e);
622-
}
623-
624564
/*
625565
* Switch to new_e io scheduler.
626566
*
@@ -688,6 +628,16 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx)
688628
lockdep_assert_held(&q->tag_set->update_nr_hwq_lock);
689629

690630
memflags = blk_mq_freeze_queue(q);
631+
/*
632+
* May be called before adding disk, when there isn't any FS I/O,
633+
* so freezing queue plus canceling dispatch work is enough to
634+
* drain any dispatch activities originated from passthrough
635+
* requests, then no need to quiesce queue which may add long boot
636+
* latency, especially when lots of disks are involved.
637+
*
638+
* Disk isn't added yet, so verifying queue lock only manually.
639+
*/
640+
blk_mq_cancel_work_sync(q);
691641
mutex_lock(&q->elevator_lock);
692642
if (!(q->elevator && elevator_match(q->elevator->type, ctx->name)))
693643
ret = elevator_switch(q, ctx);
@@ -716,6 +666,46 @@ void elv_update_nr_hw_queues(struct request_queue *q)
716666
mutex_unlock(&q->elevator_lock);
717667
}
718668

669+
/*
670+
* Use the default elevator settings. If the chosen elevator initialization
671+
* fails, fall back to the "none" elevator (no elevator).
672+
*/
673+
void elevator_set_default(struct request_queue *q)
674+
{
675+
struct elv_change_ctx ctx = {
676+
.name = "mq-deadline",
677+
.no_uevent = true,
678+
};
679+
int err = 0;
680+
681+
if (q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
682+
return;
683+
684+
/*
685+
* For single queue devices, default to using mq-deadline. If we
686+
* have multiple queues or mq-deadline is not available, default
687+
* to "none".
688+
*/
689+
if (elevator_find_get(ctx.name) && (q->nr_hw_queues == 1 ||
690+
blk_mq_is_shared_tags(q->tag_set->flags)))
691+
err = elevator_change(q, &ctx);
692+
if (err < 0)
693+
pr_warn("\"%s\" elevator initialization, failed %d, "
694+
"falling back to \"none\"\n", ctx.name, err);
695+
}
696+
697+
void elevator_set_none(struct request_queue *q)
698+
{
699+
struct elv_change_ctx ctx = {
700+
.name = "none",
701+
};
702+
int err;
703+
704+
err = elevator_change(q, &ctx);
705+
if (err < 0)
706+
pr_warn("%s: set none elevator failed %d\n", __func__, err);
707+
}
708+
719709
static void elv_iosched_load_module(const char *elevator_name)
720710
{
721711
struct elevator_type *found;

block/genhd.c

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -432,12 +432,6 @@ static int __add_disk(struct device *parent, struct gendisk *disk,
432432
*/
433433
if (disk->fops->submit_bio || disk->fops->poll_bio)
434434
return -EINVAL;
435-
436-
/*
437-
* Initialize the I/O scheduler code and pick a default one if
438-
* needed.
439-
*/
440-
elevator_init_mq(disk->queue);
441435
} else {
442436
if (!disk->fops->submit_bio)
443437
return -EINVAL;
@@ -454,7 +448,7 @@ static int __add_disk(struct device *parent, struct gendisk *disk,
454448
ret = -EINVAL;
455449
if (disk->major) {
456450
if (WARN_ON(!disk->minors))
457-
goto out_exit_elevator;
451+
goto out;
458452

459453
if (disk->minors > DISK_MAX_PARTS) {
460454
pr_err("block: can't allocate more than %d partitions\n",
@@ -464,14 +458,14 @@ static int __add_disk(struct device *parent, struct gendisk *disk,
464458
if (disk->first_minor > MINORMASK ||
465459
disk->minors > MINORMASK + 1 ||
466460
disk->first_minor + disk->minors > MINORMASK + 1)
467-
goto out_exit_elevator;
461+
goto out;
468462
} else {
469463
if (WARN_ON(disk->minors))
470-
goto out_exit_elevator;
464+
goto out;
471465

472466
ret = blk_alloc_ext_minor();
473467
if (ret < 0)
474-
goto out_exit_elevator;
468+
goto out;
475469
disk->major = BLOCK_EXT_MAJOR;
476470
disk->first_minor = ret;
477471
}
@@ -561,12 +555,7 @@ static int __add_disk(struct device *parent, struct gendisk *disk,
561555
out_free_ext_minor:
562556
if (disk->major == BLOCK_EXT_MAJOR)
563557
blk_free_ext_minor(disk->first_minor);
564-
out_exit_elevator:
565-
if (disk->queue->elevator) {
566-
mutex_lock(&disk->queue->elevator_lock);
567-
elevator_exit(disk->queue);
568-
mutex_unlock(&disk->queue->elevator_lock);
569-
}
558+
out:
570559
return ret;
571560
}
572561

@@ -753,14 +742,7 @@ static void __del_gendisk(struct gendisk *disk)
753742
if (queue_is_mq(q))
754743
blk_mq_cancel_work_sync(q);
755744

756-
blk_mq_quiesce_queue(q);
757-
if (q->elevator) {
758-
mutex_lock(&q->elevator_lock);
759-
elevator_exit(q);
760-
mutex_unlock(&q->elevator_lock);
761-
}
762745
rq_qos_exit(q);
763-
blk_mq_unquiesce_queue(q);
764746

765747
/*
766748
* If the disk does not own the queue, allow using passthrough requests

0 commit comments

Comments
 (0)