Skip to content

Commit 78ce9fc

Browse files
naotakdave
authored andcommitted
btrfs: zoned: mark block groups to copy for device-replace
This is the 1/4 patch to support device-replace on zoned filesystems. We have two types of IOs during the device replace process. One is an IO to "copy" (by the scrub functions) all the device extents from the source device to the destination device. The other one is an IO to "clone" (by handle_ops_on_dev_replace()) new incoming write IOs from users to the source device into the target device. Cloning incoming IOs can break the sequential write rule in on target device. When a write is mapped in the middle of a block group, the IO is directed to the middle of a target device zone, which breaks the sequential write requirement. However, the cloning function cannot be disabled since incoming IOs targeting already copied device extents must be cloned so that the IO is executed on the target device. We cannot use dev_replace->cursor_{left,right} to determine whether a bio is going to a not yet copied region. Since we have a time gap between finishing btrfs_scrub_dev() and rewriting the mapping tree in btrfs_dev_replace_finishing(), we can have a newly allocated device extent which is never cloned nor copied. So the point is to copy only already existing device extents. This patch introduces mark_block_group_to_copy() to mark existing block groups as a target of copying. Then, handle_ops_on_dev_replace() and dev-replace can check the flag to do their job. Also, btrfs_finish_block_group_to_copy() will check if the copied stripe is the last stripe in the block group. With the last stripe copied, the to_copy flag is finally disabled. Afterwards we can safely clone incoming IOs on this block group. Reviewed-by: Josef Bacik <[email protected]> Signed-off-by: Naohiro Aota <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 4eef29e commit 78ce9fc

File tree

4 files changed

+204
-0
lines changed

4 files changed

+204
-0
lines changed

fs/btrfs/block-group.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ struct btrfs_block_group {
9595
unsigned int iref:1;
9696
unsigned int has_caching_ctl:1;
9797
unsigned int removed:1;
98+
unsigned int to_copy:1;
9899

99100
int disk_cache_state;
100101

fs/btrfs/dev-replace.c

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "dev-replace.h"
2323
#include "sysfs.h"
2424
#include "zoned.h"
25+
#include "block-group.h"
2526

2627
/*
2728
* Device replace overview
@@ -459,6 +460,185 @@ static char* btrfs_dev_name(struct btrfs_device *device)
459460
return rcu_str_deref(device->name);
460461
}
461462

463+
static int mark_block_group_to_copy(struct btrfs_fs_info *fs_info,
464+
struct btrfs_device *src_dev)
465+
{
466+
struct btrfs_path *path;
467+
struct btrfs_key key;
468+
struct btrfs_key found_key;
469+
struct btrfs_root *root = fs_info->dev_root;
470+
struct btrfs_dev_extent *dev_extent = NULL;
471+
struct btrfs_block_group *cache;
472+
struct btrfs_trans_handle *trans;
473+
int ret = 0;
474+
u64 chunk_offset;
475+
476+
/* Do not use "to_copy" on non zoned filesystem for now */
477+
if (!btrfs_is_zoned(fs_info))
478+
return 0;
479+
480+
mutex_lock(&fs_info->chunk_mutex);
481+
482+
/* Ensure we don't have pending new block group */
483+
spin_lock(&fs_info->trans_lock);
484+
while (fs_info->running_transaction &&
485+
!list_empty(&fs_info->running_transaction->dev_update_list)) {
486+
spin_unlock(&fs_info->trans_lock);
487+
mutex_unlock(&fs_info->chunk_mutex);
488+
trans = btrfs_attach_transaction(root);
489+
if (IS_ERR(trans)) {
490+
ret = PTR_ERR(trans);
491+
mutex_lock(&fs_info->chunk_mutex);
492+
if (ret == -ENOENT) {
493+
spin_lock(&fs_info->trans_lock);
494+
continue;
495+
} else {
496+
goto unlock;
497+
}
498+
}
499+
500+
ret = btrfs_commit_transaction(trans);
501+
mutex_lock(&fs_info->chunk_mutex);
502+
if (ret)
503+
goto unlock;
504+
505+
spin_lock(&fs_info->trans_lock);
506+
}
507+
spin_unlock(&fs_info->trans_lock);
508+
509+
path = btrfs_alloc_path();
510+
if (!path) {
511+
ret = -ENOMEM;
512+
goto unlock;
513+
}
514+
515+
path->reada = READA_FORWARD;
516+
path->search_commit_root = 1;
517+
path->skip_locking = 1;
518+
519+
key.objectid = src_dev->devid;
520+
key.type = BTRFS_DEV_EXTENT_KEY;
521+
key.offset = 0;
522+
523+
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
524+
if (ret < 0)
525+
goto free_path;
526+
if (ret > 0) {
527+
if (path->slots[0] >=
528+
btrfs_header_nritems(path->nodes[0])) {
529+
ret = btrfs_next_leaf(root, path);
530+
if (ret < 0)
531+
goto free_path;
532+
if (ret > 0) {
533+
ret = 0;
534+
goto free_path;
535+
}
536+
} else {
537+
ret = 0;
538+
}
539+
}
540+
541+
while (1) {
542+
struct extent_buffer *leaf = path->nodes[0];
543+
int slot = path->slots[0];
544+
545+
btrfs_item_key_to_cpu(leaf, &found_key, slot);
546+
547+
if (found_key.objectid != src_dev->devid)
548+
break;
549+
550+
if (found_key.type != BTRFS_DEV_EXTENT_KEY)
551+
break;
552+
553+
if (found_key.offset < key.offset)
554+
break;
555+
556+
dev_extent = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
557+
558+
chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dev_extent);
559+
560+
cache = btrfs_lookup_block_group(fs_info, chunk_offset);
561+
if (!cache)
562+
goto skip;
563+
564+
spin_lock(&cache->lock);
565+
cache->to_copy = 1;
566+
spin_unlock(&cache->lock);
567+
568+
btrfs_put_block_group(cache);
569+
570+
skip:
571+
ret = btrfs_next_item(root, path);
572+
if (ret != 0) {
573+
if (ret > 0)
574+
ret = 0;
575+
break;
576+
}
577+
}
578+
579+
free_path:
580+
btrfs_free_path(path);
581+
unlock:
582+
mutex_unlock(&fs_info->chunk_mutex);
583+
584+
return ret;
585+
}
586+
587+
bool btrfs_finish_block_group_to_copy(struct btrfs_device *srcdev,
588+
struct btrfs_block_group *cache,
589+
u64 physical)
590+
{
591+
struct btrfs_fs_info *fs_info = cache->fs_info;
592+
struct extent_map *em;
593+
struct map_lookup *map;
594+
u64 chunk_offset = cache->start;
595+
int num_extents, cur_extent;
596+
int i;
597+
598+
/* Do not use "to_copy" on non zoned filesystem for now */
599+
if (!btrfs_is_zoned(fs_info))
600+
return true;
601+
602+
spin_lock(&cache->lock);
603+
if (cache->removed) {
604+
spin_unlock(&cache->lock);
605+
return true;
606+
}
607+
spin_unlock(&cache->lock);
608+
609+
em = btrfs_get_chunk_map(fs_info, chunk_offset, 1);
610+
ASSERT(!IS_ERR(em));
611+
map = em->map_lookup;
612+
613+
num_extents = cur_extent = 0;
614+
for (i = 0; i < map->num_stripes; i++) {
615+
/* We have more device extent to copy */
616+
if (srcdev != map->stripes[i].dev)
617+
continue;
618+
619+
num_extents++;
620+
if (physical == map->stripes[i].physical)
621+
cur_extent = i;
622+
}
623+
624+
free_extent_map(em);
625+
626+
if (num_extents > 1 && cur_extent < num_extents - 1) {
627+
/*
628+
* Has more stripes on this device. Keep this block group
629+
* readonly until we finish all the stripes.
630+
*/
631+
return false;
632+
}
633+
634+
/* Last stripe on this device */
635+
spin_lock(&cache->lock);
636+
cache->to_copy = 0;
637+
spin_unlock(&cache->lock);
638+
639+
return true;
640+
}
641+
462642
static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
463643
const char *tgtdev_name, u64 srcdevid, const char *srcdev_name,
464644
int read_src)
@@ -500,6 +680,10 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
500680
if (ret)
501681
return ret;
502682

683+
ret = mark_block_group_to_copy(fs_info, src_device);
684+
if (ret)
685+
return ret;
686+
503687
down_write(&dev_replace->rwsem);
504688
switch (dev_replace->replace_state) {
505689
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:

fs/btrfs/dev-replace.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,8 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info);
1818
void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info);
1919
int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info);
2020
int __pure btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace);
21+
bool btrfs_finish_block_group_to_copy(struct btrfs_device *srcdev,
22+
struct btrfs_block_group *cache,
23+
u64 physical);
2124

2225
#endif

fs/btrfs/scrub.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3561,6 +3561,16 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
35613561
if (!cache)
35623562
goto skip;
35633563

3564+
if (sctx->is_dev_replace && btrfs_is_zoned(fs_info)) {
3565+
spin_lock(&cache->lock);
3566+
if (!cache->to_copy) {
3567+
spin_unlock(&cache->lock);
3568+
ro_set = 0;
3569+
goto done;
3570+
}
3571+
spin_unlock(&cache->lock);
3572+
}
3573+
35643574
/*
35653575
* Make sure that while we are scrubbing the corresponding block
35663576
* group doesn't get its logical address and its device extents
@@ -3692,6 +3702,12 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
36923702

36933703
scrub_pause_off(fs_info);
36943704

3705+
if (sctx->is_dev_replace &&
3706+
!btrfs_finish_block_group_to_copy(dev_replace->srcdev,
3707+
cache, found_key.offset))
3708+
ro_set = 0;
3709+
3710+
done:
36953711
down_write(&dev_replace->rwsem);
36963712
dev_replace->cursor_left = dev_replace->cursor_right;
36973713
dev_replace->item_needs_writeback = 1;

0 commit comments

Comments
 (0)