Skip to content

Commit c8c00a6

Browse files
committed
Remove deadlock potential in md_open
A recent commit: commit 449aad3 introduced the possibility of an A-B/B-A deadlock between bd_mutex and reconfig_mutex. __blkdev_get holds bd_mutex while calling md_open which takes reconfig_mutex, do_md_run is always called with reconfig_mutex held, and it now takes bd_mutex in the call the revalidate_disk. This potential deadlock was not caught by lockdep due to the use of mutex_lock_interruptible_nexted which was introduced by commit d63a5a7 do avoid a warning of an impossible deadlock. It is quite possible to split reconfig_mutex in to two locks. One protects the array data structures while it is being reconfigured, the other ensures that an array is never even partially open while it is being deactivated. In particular, the second lock prevents an open from completing between the time when do_md_stop checks if there are any active opens, and the time when the array is either set read-only, or when ->pers is set to NULL. So we can be certain that no IO is in flight as the array is being destroyed. So create a new lock, open_mutex, just to ensure exclusion between 'open' and 'stop'. This avoids the deadlock and also avoids the lockdep warning mentioned in commit d63a5a7 Reported-by: "Mike Snitzer" <[email protected]> Reported-by: "H. Peter Anvin" <[email protected]> Signed-off-by: NeilBrown <[email protected]>
1 parent 7b2aa03 commit c8c00a6

File tree

2 files changed

+20
-8
lines changed

2 files changed

+20
-8
lines changed

drivers/md/md.c

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit)
359359
else
360360
new->md_minor = MINOR(unit) >> MdpMinorShift;
361361

362+
mutex_init(&new->open_mutex);
362363
mutex_init(&new->reconfig_mutex);
363364
INIT_LIST_HEAD(&new->disks);
364365
INIT_LIST_HEAD(&new->all_mddevs);
@@ -4304,12 +4305,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
43044305
struct gendisk *disk = mddev->gendisk;
43054306
mdk_rdev_t *rdev;
43064307

4308+
mutex_lock(&mddev->open_mutex);
43074309
if (atomic_read(&mddev->openers) > is_open) {
43084310
printk("md: %s still in use.\n",mdname(mddev));
4309-
return -EBUSY;
4310-
}
4311-
4312-
if (mddev->pers) {
4311+
err = -EBUSY;
4312+
} else if (mddev->pers) {
43134313

43144314
if (mddev->sync_thread) {
43154315
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -4367,7 +4367,10 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
43674367
set_disk_ro(disk, 1);
43684368
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
43694369
}
4370-
4370+
out:
4371+
mutex_unlock(&mddev->open_mutex);
4372+
if (err)
4373+
return err;
43714374
/*
43724375
* Free resources if final stop
43734376
*/
@@ -4433,7 +4436,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
44334436
blk_integrity_unregister(disk);
44344437
md_new_event(mddev);
44354438
sysfs_notify_dirent(mddev->sysfs_state);
4436-
out:
44374439
return err;
44384440
}
44394441

@@ -5518,12 +5520,12 @@ static int md_open(struct block_device *bdev, fmode_t mode)
55185520
}
55195521
BUG_ON(mddev != bdev->bd_disk->private_data);
55205522

5521-
if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1)))
5523+
if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
55225524
goto out;
55235525

55245526
err = 0;
55255527
atomic_inc(&mddev->openers);
5526-
mddev_unlock(mddev);
5528+
mutex_unlock(&mddev->open_mutex);
55275529

55285530
check_disk_change(bdev);
55295531
out:

drivers/md/md.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,16 @@ struct mddev_s
223223
* so we don't loop trying */
224224

225225
int in_sync; /* know to not need resync */
226+
/* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
227+
* that we are never stopping an array while it is open.
228+
* 'reconfig_mutex' protects all other reconfiguration.
229+
* These locks are separate due to conflicting interactions
230+
* with bdev->bd_mutex.
231+
* Lock ordering is:
232+
* reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk
233+
* bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open
234+
*/
235+
struct mutex open_mutex;
226236
struct mutex reconfig_mutex;
227237
atomic_t active; /* general refcount */
228238
atomic_t openers; /* number of active opens */

0 commit comments

Comments
 (0)