Skip to content

Commit 086dcbf

Browse files
fdmananakdave
authored andcommitted
btrfs: insert items in batches when logging a directory when possible
When logging a directory, we scan its directory items from the subvolume tree and then copy one by one into the log tree. This is not efficient since we generally are able to insert several items in a batch, using a single btree operation for adding several items at once. The reason we copy items one by one is that we must check if each item was previously logged in the current transaction, and if it was we either overwrite it or skip it in case its content did not change in the subvolume tree (this can happen only for dir item keys, but not for dir index keys), and doing such check makes it a bit cumbersome to attempt batch insertions. However the chances for doing batch insertions are very frequent and always happen when: 1) Logging the directory for the first time in the current transaction, as none of the items exist in the log tree yet; 2) Logging new dir index keys, because the offset for new dir index keys comes from a monotonically increasing counter. This means if we keep adding dentries to a directory, through creation of new files and sub-directories or by adding new links or renaming from some other directory into the one we are logging, all the new dir index keys have a new offset that is greater than the offset of any previously logged index keys, so we can insert them in batches into the log tree. For dir item keys, since their offset depends on the result of an hash function against the dentry's name, unless the directory is being logged for the first time in the current transaction, the chances being able to insert the items in the log using batches is pretty much random and not predictable, as it depends on the names of the dentries, but still happens often enough. So change directory logging to keep track of consecutive directory items that don't exist yet in the log and batch insert them. This patch is part of a patchset comprised of the following 5 patches: btrfs: remove root argument from btrfs_log_inode() and its callees btrfs: remove redundant log root assignment from log_dir_items() btrfs: factor out the copying loop of dir items from log_dir_items() btrfs: insert items in batches when logging a directory when possible btrfs: keep track of the last logged keys when logging a directory This is patch 4/5. The change log of the last patch (5/5) has performance results. Signed-off-by: Filipe Manana <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent eb10d85 commit 086dcbf

File tree

1 file changed

+180
-37
lines changed

1 file changed

+180
-37
lines changed

fs/btrfs/tree-log.c

Lines changed: 180 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -368,25 +368,11 @@ static int process_one_buffer(struct btrfs_root *log,
368368
return ret;
369369
}
370370

371-
/*
372-
* Item overwrite used by replay and tree logging. eb, slot and key all refer
373-
* to the src data we are copying out.
374-
*
375-
* root is the tree we are copying into, and path is a scratch
376-
* path for use in this function (it should be released on entry and
377-
* will be released on exit).
378-
*
379-
* If the key is already in the destination tree the existing item is
380-
* overwritten. If the existing item isn't big enough, it is extended.
381-
* If it is too large, it is truncated.
382-
*
383-
* If the key isn't in the destination yet, a new item is inserted.
384-
*/
385-
static noinline int overwrite_item(struct btrfs_trans_handle *trans,
386-
struct btrfs_root *root,
387-
struct btrfs_path *path,
388-
struct extent_buffer *eb, int slot,
389-
struct btrfs_key *key)
371+
static int do_overwrite_item(struct btrfs_trans_handle *trans,
372+
struct btrfs_root *root,
373+
struct btrfs_path *path,
374+
struct extent_buffer *eb, int slot,
375+
struct btrfs_key *key)
390376
{
391377
int ret;
392378
u32 item_size;
@@ -403,10 +389,22 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
403389
item_size = btrfs_item_size_nr(eb, slot);
404390
src_ptr = btrfs_item_ptr_offset(eb, slot);
405391

406-
/* look for the key in the destination tree */
407-
ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
408-
if (ret < 0)
409-
return ret;
392+
/* Our caller must have done a search for the key for us. */
393+
ASSERT(path->nodes[0] != NULL);
394+
395+
/*
396+
* And the slot must point to the exact key or the slot where the key
397+
* should be at (the first item with a key greater than 'key')
398+
*/
399+
if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
400+
struct btrfs_key found_key;
401+
402+
btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
403+
ret = btrfs_comp_cpu_keys(&found_key, key);
404+
ASSERT(ret >= 0);
405+
} else {
406+
ret = 1;
407+
}
410408

411409
if (ret == 0) {
412410
char *src_copy;
@@ -584,6 +582,36 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
584582
return 0;
585583
}
586584

585+
/*
586+
* Item overwrite used by replay and tree logging. eb, slot and key all refer
587+
* to the src data we are copying out.
588+
*
589+
* root is the tree we are copying into, and path is a scratch
590+
* path for use in this function (it should be released on entry and
591+
* will be released on exit).
592+
*
593+
* If the key is already in the destination tree the existing item is
594+
* overwritten. If the existing item isn't big enough, it is extended.
595+
* If it is too large, it is truncated.
596+
*
597+
* If the key isn't in the destination yet, a new item is inserted.
598+
*/
599+
static int overwrite_item(struct btrfs_trans_handle *trans,
600+
struct btrfs_root *root,
601+
struct btrfs_path *path,
602+
struct extent_buffer *eb, int slot,
603+
struct btrfs_key *key)
604+
{
605+
int ret;
606+
607+
/* Look for the key in the destination tree. */
608+
ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
609+
if (ret < 0)
610+
return ret;
611+
612+
return do_overwrite_item(trans, root, path, eb, slot, key);
613+
}
614+
587615
/*
588616
* simple helper to read an inode off the disk from a given root
589617
* This can only be called for subvolume roots and not for the log
@@ -3632,6 +3660,68 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
36323660
return 0;
36333661
}
36343662

3663+
static int flush_dir_items_batch(struct btrfs_trans_handle *trans,
3664+
struct btrfs_root *log,
3665+
struct extent_buffer *src,
3666+
struct btrfs_path *dst_path,
3667+
int start_slot,
3668+
int count)
3669+
{
3670+
char *ins_data = NULL;
3671+
struct btrfs_key *ins_keys;
3672+
u32 *ins_sizes;
3673+
struct extent_buffer *dst;
3674+
struct btrfs_key key;
3675+
u32 item_size;
3676+
int ret;
3677+
int i;
3678+
3679+
ASSERT(count > 0);
3680+
3681+
if (count == 1) {
3682+
btrfs_item_key_to_cpu(src, &key, start_slot);
3683+
item_size = btrfs_item_size_nr(src, start_slot);
3684+
ins_keys = &key;
3685+
ins_sizes = &item_size;
3686+
} else {
3687+
ins_data = kmalloc(count * sizeof(u32) +
3688+
count * sizeof(struct btrfs_key), GFP_NOFS);
3689+
if (!ins_data)
3690+
return -ENOMEM;
3691+
3692+
ins_sizes = (u32 *)ins_data;
3693+
ins_keys = (struct btrfs_key *)(ins_data + count * sizeof(u32));
3694+
3695+
for (i = 0; i < count; i++) {
3696+
const int slot = start_slot + i;
3697+
3698+
btrfs_item_key_to_cpu(src, &ins_keys[i], slot);
3699+
ins_sizes[i] = btrfs_item_size_nr(src, slot);
3700+
}
3701+
}
3702+
3703+
ret = btrfs_insert_empty_items(trans, log, dst_path, ins_keys, ins_sizes,
3704+
count);
3705+
if (ret)
3706+
goto out;
3707+
3708+
dst = dst_path->nodes[0];
3709+
for (i = 0; i < count; i++) {
3710+
unsigned long src_offset;
3711+
unsigned long dst_offset;
3712+
3713+
dst_offset = btrfs_item_ptr_offset(dst, dst_path->slots[0]);
3714+
src_offset = btrfs_item_ptr_offset(src, start_slot + i);
3715+
copy_extent_buffer(dst, src, dst_offset, src_offset, ins_sizes[i]);
3716+
dst_path->slots[0]++;
3717+
}
3718+
btrfs_release_path(dst_path);
3719+
out:
3720+
kfree(ins_data);
3721+
3722+
return ret;
3723+
}
3724+
36353725
static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
36363726
struct btrfs_inode *inode,
36373727
struct btrfs_path *path,
@@ -3643,21 +3733,22 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
36433733
struct extent_buffer *src = path->nodes[0];
36443734
const int nritems = btrfs_header_nritems(src);
36453735
const u64 ino = btrfs_ino(inode);
3736+
const bool inode_logged_before = inode_logged(trans, inode);
3737+
bool last_found = false;
3738+
int batch_start = 0;
3739+
int batch_size = 0;
36463740
int i;
36473741

36483742
for (i = path->slots[0]; i < nritems; i++) {
36493743
struct btrfs_key key;
3650-
struct btrfs_dir_item *di;
36513744
int ret;
36523745

36533746
btrfs_item_key_to_cpu(src, &key, i);
36543747

3655-
if (key.objectid != ino || key.type != key_type)
3656-
return 1;
3657-
3658-
ret = overwrite_item(trans, log, dst_path, src, i, &key);
3659-
if (ret < 0)
3660-
return ret;
3748+
if (key.objectid != ino || key.type != key_type) {
3749+
last_found = true;
3750+
break;
3751+
}
36613752

36623753
/*
36633754
* We must make sure that when we log a directory entry, the
@@ -3681,15 +3772,67 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
36813772
* never be decremented to the value BTRFS_EMPTY_DIR_SIZE,
36823773
* resulting in -ENOTEMPTY errors.
36833774
*/
3684-
di = btrfs_item_ptr(src, i, struct btrfs_dir_item);
3685-
btrfs_dir_item_key_to_cpu(src, di, &key);
3686-
if ((btrfs_dir_transid(src, di) == trans->transid ||
3687-
btrfs_dir_type(src, di) == BTRFS_FT_DIR) &&
3688-
key.type != BTRFS_ROOT_ITEM_KEY)
3689-
ctx->log_new_dentries = true;
3775+
if (!ctx->log_new_dentries) {
3776+
struct btrfs_dir_item *di;
3777+
struct btrfs_key di_key;
3778+
3779+
di = btrfs_item_ptr(src, i, struct btrfs_dir_item);
3780+
btrfs_dir_item_key_to_cpu(src, di, &di_key);
3781+
if ((btrfs_dir_transid(src, di) == trans->transid ||
3782+
btrfs_dir_type(src, di) == BTRFS_FT_DIR) &&
3783+
di_key.type != BTRFS_ROOT_ITEM_KEY)
3784+
ctx->log_new_dentries = true;
3785+
}
3786+
3787+
if (!inode_logged_before)
3788+
goto add_to_batch;
3789+
/*
3790+
* Check if the key was already logged before. If not we can add
3791+
* it to a batch for bulk insertion.
3792+
*/
3793+
ret = btrfs_search_slot(NULL, log, &key, dst_path, 0, 0);
3794+
if (ret < 0) {
3795+
return ret;
3796+
} else if (ret > 0) {
3797+
btrfs_release_path(dst_path);
3798+
goto add_to_batch;
3799+
}
3800+
3801+
/*
3802+
* Item exists in the log. Overwrite the item in the log if it
3803+
* has different content or do nothing if it has exactly the same
3804+
* content. And then flush the current batch if any - do it after
3805+
* overwriting the current item, or we would deadlock otherwise,
3806+
* since we are holding a path for the existing item.
3807+
*/
3808+
ret = do_overwrite_item(trans, log, dst_path, src, i, &key);
3809+
if (ret < 0)
3810+
return ret;
3811+
3812+
if (batch_size > 0) {
3813+
ret = flush_dir_items_batch(trans, log, src, dst_path,
3814+
batch_start, batch_size);
3815+
if (ret < 0)
3816+
return ret;
3817+
batch_size = 0;
3818+
}
3819+
continue;
3820+
add_to_batch:
3821+
if (batch_size == 0)
3822+
batch_start = i;
3823+
batch_size++;
36903824
}
36913825

3692-
return 0;
3826+
if (batch_size > 0) {
3827+
int ret;
3828+
3829+
ret = flush_dir_items_batch(trans, log, src, dst_path,
3830+
batch_start, batch_size);
3831+
if (ret < 0)
3832+
return ret;
3833+
}
3834+
3835+
return last_found ? 1 : 0;
36933836
}
36943837

36953838
/*

0 commit comments

Comments
 (0)