Skip to content

Commit cd02dca

Browse files
committed
Btrfs: account for missing devices in RAID allocation profiles
When we mount in RAID degraded mode without adding a new device to replace the failed one, we can end up using the wrong RAID flags for allocations. This results in strange combinations of block groups (raid1 in a raid10 filesystem) and corruptions when we try to allocate blocks from single spindle chunks on drives that are actually missing. The first device has two small 4MB chunks in it that mkfs creates and these are usually unused in a raid1 or raid10 setup. But, in -o degraded, the allocator will fall back to these because the mask of desired raid groups isn't correct. The fix here is to count the missing devices as we build up the list of devices in the system. This count is used when picking the raid level to make sure we continue using the same levels that were in place before we lost a drive. Signed-off-by: Chris Mason <[email protected]>
1 parent 68433b7 commit cd02dca

File tree

3 files changed

+36
-3
lines changed

3 files changed

+36
-3
lines changed

fs/btrfs/extent-tree.c

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3044,7 +3044,13 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
30443044

30453045
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
30463046
{
3047-
u64 num_devices = root->fs_info->fs_devices->rw_devices;
3047+
/*
3048+
* we add in the count of missing devices because we want
3049+
* to make sure that any RAID levels on a degraded FS
3050+
* continue to be honored.
3051+
*/
3052+
u64 num_devices = root->fs_info->fs_devices->rw_devices +
3053+
root->fs_info->fs_devices->missing_devices;
30483054

30493055
if (num_devices == 1)
30503056
flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
@@ -7891,7 +7897,14 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
78917897
u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
78927898
BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
78937899

7894-
num_devices = root->fs_info->fs_devices->rw_devices;
7900+
/*
7901+
* we add in the count of missing devices because we want
7902+
* to make sure that any RAID levels on a degraded FS
7903+
* continue to be honored.
7904+
*/
7905+
num_devices = root->fs_info->fs_devices->rw_devices +
7906+
root->fs_info->fs_devices->missing_devices;
7907+
78957908
if (num_devices == 1) {
78967909
stripped |= BTRFS_BLOCK_GROUP_DUP;
78977910
stripped = flags & ~stripped;

fs/btrfs/volumes.c

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,12 +413,16 @@ static noinline int device_list_add(const char *path,
413413

414414
device->fs_devices = fs_devices;
415415
fs_devices->num_devices++;
416-
} else if (strcmp(device->name, path)) {
416+
} else if (!device->name || strcmp(device->name, path)) {
417417
name = kstrdup(path, GFP_NOFS);
418418
if (!name)
419419
return -ENOMEM;
420420
kfree(device->name);
421421
device->name = name;
422+
if (device->missing) {
423+
fs_devices->missing_devices--;
424+
device->missing = 0;
425+
}
422426
}
423427

424428
if (found_transid > fs_devices->latest_trans) {
@@ -1238,6 +1242,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
12381242

12391243
device->fs_devices->num_devices--;
12401244

1245+
if (device->missing)
1246+
root->fs_info->fs_devices->missing_devices--;
1247+
12411248
next_device = list_entry(root->fs_info->fs_devices->devices.next,
12421249
struct btrfs_device, dev_list);
12431250
if (device->bdev == root->fs_info->sb->s_bdev)
@@ -3084,7 +3091,9 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
30843091
device->devid = devid;
30853092
device->work.func = pending_bios_fn;
30863093
device->fs_devices = fs_devices;
3094+
device->missing = 1;
30873095
fs_devices->num_devices++;
3096+
fs_devices->missing_devices++;
30883097
spin_lock_init(&device->io_lock);
30893098
INIT_LIST_HEAD(&device->dev_alloc_list);
30903099
memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
@@ -3282,6 +3291,15 @@ static int read_one_dev(struct btrfs_root *root,
32823291
device = add_missing_dev(root, devid, dev_uuid);
32833292
if (!device)
32843293
return -ENOMEM;
3294+
} else if (!device->missing) {
3295+
/*
3296+
* this happens when a device that was properly setup
3297+
* in the device info lists suddenly goes bad.
3298+
* device->bdev is NULL, and so we have to set
3299+
* device->missing to one here
3300+
*/
3301+
root->fs_info->fs_devices->missing_devices++;
3302+
device->missing = 1;
32853303
}
32863304
}
32873305

fs/btrfs/volumes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ struct btrfs_device {
4545
int barriers;
4646
int writeable;
4747
int in_fs_metadata;
48+
int missing;
4849

4950
spinlock_t io_lock;
5051

@@ -94,6 +95,7 @@ struct btrfs_fs_devices {
9495
u64 num_devices;
9596
u64 open_devices;
9697
u64 rw_devices;
98+
u64 missing_devices;
9799
u64 total_rw_bytes;
98100
struct block_device *latest_bdev;
99101

0 commit comments

Comments
 (0)