Skip to content

Commit b89f625

Browse files
Ming Leiaxboe
authored andcommitted
block: don't release queue's sysfs lock during switching elevator
cecf5d8 ("block: split .sysfs_lock into two locks") starts to release & acquire sysfs_lock before registering/un-registering elevator queue during switching elevator for avoiding potential deadlock from showing & storing 'queue/iosched' attributes and removing elevator's kobject. Turns out there isn't such deadlock because 'q->sysfs_lock' isn't required in .show & .store of queue/iosched's attributes, and just elevator's sysfs lock is acquired in elv_iosched_store() and elv_iosched_show(). So it is safe to hold queue's sysfs lock when registering/un-registering elevator queue. The biggest issue is that commit cecf5d8 assumes that concurrent write on 'queue/scheduler' can't happen. However, this assumption isn't true, because kernfs_fop_write() only guarantees that concurrent write aren't called on the same open file, but the write could be from different open on the file. So we can't release & re-acquire queue's sysfs lock during switching elevator, otherwise use-after-free on elevator could be triggered. Fixes the issue by not releasing queue's sysfs lock during switching elevator. Fixes: cecf5d8 ("block: split .sysfs_lock into two locks") Cc: Christoph Hellwig <[email protected]> Cc: Hannes Reinecke <[email protected]> Cc: Greg KH <[email protected]> Cc: Mike Snitzer <[email protected]> Reviewed-by: Bart Van Assche <[email protected]> Signed-off-by: Ming Lei <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent 284b94b commit b89f625

File tree

2 files changed

+5
-39
lines changed

2 files changed

+5
-39
lines changed

block/blk-sysfs.c

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -989,13 +989,11 @@ int blk_register_queue(struct gendisk *disk)
989989
blk_mq_debugfs_register(q);
990990
}
991991

992-
/*
993-
* The flag of QUEUE_FLAG_REGISTERED isn't set yet, so elevator
994-
* switch won't happen at all.
995-
*/
992+
mutex_lock(&q->sysfs_lock);
996993
if (q->elevator) {
997994
ret = elv_register_queue(q, false);
998995
if (ret) {
996+
mutex_unlock(&q->sysfs_lock);
999997
mutex_unlock(&q->sysfs_dir_lock);
1000998
kobject_del(&q->kobj);
1001999
blk_trace_remove_sysfs(dev);
@@ -1005,7 +1003,6 @@ int blk_register_queue(struct gendisk *disk)
10051003
has_elevator = true;
10061004
}
10071005

1008-
mutex_lock(&q->sysfs_lock);
10091006
blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
10101007
wbt_enable_default(q);
10111008
blk_throtl_register_queue(q);
@@ -1062,12 +1059,10 @@ void blk_unregister_queue(struct gendisk *disk)
10621059
kobject_del(&q->kobj);
10631060
blk_trace_remove_sysfs(disk_to_dev(disk));
10641061

1065-
/*
1066-
* q->kobj has been removed, so it is safe to check if elevator
1067-
* exists without holding q->sysfs_lock.
1068-
*/
1062+
mutex_lock(&q->sysfs_lock);
10691063
if (q->elevator)
10701064
elv_unregister_queue(q);
1065+
mutex_unlock(&q->sysfs_lock);
10711066
mutex_unlock(&q->sysfs_dir_lock);
10721067

10731068
kobject_put(&disk_to_dev(disk)->kobj);

block/elevator.c

Lines changed: 1 addition & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -503,9 +503,7 @@ int elv_register_queue(struct request_queue *q, bool uevent)
503503
if (uevent)
504504
kobject_uevent(&e->kobj, KOBJ_ADD);
505505

506-
mutex_lock(&q->sysfs_lock);
507506
e->registered = 1;
508-
mutex_unlock(&q->sysfs_lock);
509507
}
510508
return error;
511509
}
@@ -523,11 +521,9 @@ void elv_unregister_queue(struct request_queue *q)
523521
kobject_uevent(&e->kobj, KOBJ_REMOVE);
524522
kobject_del(&e->kobj);
525523

526-
mutex_lock(&q->sysfs_lock);
527524
e->registered = 0;
528525
/* Re-enable throttling in case elevator disabled it */
529526
wbt_enable_default(q);
530-
mutex_unlock(&q->sysfs_lock);
531527
}
532528
}
533529

@@ -590,44 +586,19 @@ int elevator_switch_mq(struct request_queue *q,
590586
lockdep_assert_held(&q->sysfs_lock);
591587

592588
if (q->elevator) {
593-
if (q->elevator->registered) {
594-
mutex_unlock(&q->sysfs_lock);
595-
596-
/*
597-
* Concurrent elevator switch can't happen becasue
598-
* sysfs write is always exclusively on same file.
599-
*
600-
* Also the elevator queue won't be freed after
601-
* sysfs_lock is released becasue kobject_del() in
602-
* blk_unregister_queue() waits for completion of
603-
* .store & .show on its attributes.
604-
*/
589+
if (q->elevator->registered)
605590
elv_unregister_queue(q);
606591

607-
mutex_lock(&q->sysfs_lock);
608-
}
609592
ioc_clear_queue(q);
610593
elevator_exit(q, q->elevator);
611-
612-
/*
613-
* sysfs_lock may be dropped, so re-check if queue is
614-
* unregistered. If yes, don't switch to new elevator
615-
* any more
616-
*/
617-
if (!blk_queue_registered(q))
618-
return 0;
619594
}
620595

621596
ret = blk_mq_init_sched(q, new_e);
622597
if (ret)
623598
goto out;
624599

625600
if (new_e) {
626-
mutex_unlock(&q->sysfs_lock);
627-
628601
ret = elv_register_queue(q, true);
629-
630-
mutex_lock(&q->sysfs_lock);
631602
if (ret) {
632603
elevator_exit(q, q->elevator);
633604
goto out;

0 commit comments

Comments
 (0)