Skip to content

Commit b3998b3

Browse files
riteshharjanitytso
authored andcommitted
ext4: improve fast_commit performance and scalability
Currently ext4_fc_commit_dentry_updates() is of quadratic time complexity, which is causing performance bottlenecks with high threads/file/dir count with fs_mark. This patch makes commit dentry updates (and hence ext4_fc_commit()) path to linear time complexity. Hence improves the performance of workloads which does fsync on multiple threads/open files one-by-one. Absolute numbers in avg file creates per sec (from fs_mark in 1K order) ======================================================================= no. Order without-patch(K) with-patch(K) Diff(%) 1 1 16.90 17.51 +3.60 2 2,2 32.08 31.80 -0.87 3 3,3 53.97 55.01 +1.92 4 4,4 78.94 76.90 -2.58 5 5,5 95.82 95.37 -0.46 6 6,6 87.92 103.38 +17.58 7 6,10 0.73 126.13 +17178.08 8 6,14 2.33 143.19 +6045.49 workload type ============== For e.g. 7th row order of 6,10 (2^6 == 64 && 2^10 == 1024) echo /run/riteshh/mnt/{1..64} |sed -E 's/[[:space:]]+/ -d /g' \ | xargs -I {} bash -c "sudo fs_mark -L 100 -D 1024 -n 1024 -s0 -S5 -d {}" Perf profile (w/o patches) ============================= 87.15% [kernel] [k] ext4_fc_commit --> Heavy contention/bottleneck 1.98% [kernel] [k] perf_event_interrupt 0.96% [kernel] [k] power_pmu_enable 0.91% [kernel] [k] update_sd_lb_stats.constprop.0 0.67% [kernel] [k] ktime_get Signed-off-by: Ritesh Harjani <[email protected]> Reviewed-by: Harshad Shirwadkar <[email protected]> Link: https://lore.kernel.org/r/930f35d4fd5f83e2673c868781d9ebf15e91bf4e.1645426817.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o <[email protected]>
1 parent 8c91c57 commit b3998b3

File tree

3 files changed

+59
-18
lines changed

3 files changed

+59
-18
lines changed

fs/ext4/ext4.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,6 +1046,8 @@ struct ext4_inode_info {
10461046

10471047
/* Fast commit related info */
10481048

1049+
/* For tracking dentry create updates */
1050+
struct list_head i_fc_dilist;
10491051
struct list_head i_fc_list; /*
10501052
* inodes that need fast commit
10511053
* protected by sbi->s_fc_lock.

fs/ext4/fast_commit.c

Lines changed: 56 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ void ext4_fc_init_inode(struct inode *inode)
199199
ext4_fc_reset_inode(inode);
200200
ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
201201
INIT_LIST_HEAD(&ei->i_fc_list);
202+
INIT_LIST_HEAD(&ei->i_fc_dilist);
202203
init_waitqueue_head(&ei->i_fc_wait);
203204
atomic_set(&ei->i_fc_updates, 0);
204205
}
@@ -279,14 +280,16 @@ void ext4_fc_stop_update(struct inode *inode)
279280
void ext4_fc_del(struct inode *inode)
280281
{
281282
struct ext4_inode_info *ei = EXT4_I(inode);
283+
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
284+
struct ext4_fc_dentry_update *fc_dentry;
282285

283286
if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
284287
(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
285288
return;
286289

287290
restart:
288291
spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
289-
if (list_empty(&ei->i_fc_list)) {
292+
if (list_empty(&ei->i_fc_list) && list_empty(&ei->i_fc_dilist)) {
290293
spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
291294
return;
292295
}
@@ -295,8 +298,33 @@ void ext4_fc_del(struct inode *inode)
295298
ext4_fc_wait_committing_inode(inode);
296299
goto restart;
297300
}
298-
list_del_init(&ei->i_fc_list);
299-
spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
301+
302+
if (!list_empty(&ei->i_fc_list))
303+
list_del_init(&ei->i_fc_list);
304+
305+
/*
306+
* Since this inode is getting removed, let's also remove all FC
307+
* dentry create references, since it is not needed to log it anyways.
308+
*/
309+
if (list_empty(&ei->i_fc_dilist)) {
310+
spin_unlock(&sbi->s_fc_lock);
311+
return;
312+
}
313+
314+
fc_dentry = list_first_entry(&ei->i_fc_dilist, struct ext4_fc_dentry_update, fcd_dilist);
315+
WARN_ON(fc_dentry->fcd_op != EXT4_FC_TAG_CREAT);
316+
list_del_init(&fc_dentry->fcd_list);
317+
list_del_init(&fc_dentry->fcd_dilist);
318+
319+
WARN_ON(!list_empty(&ei->i_fc_dilist));
320+
spin_unlock(&sbi->s_fc_lock);
321+
322+
if (fc_dentry->fcd_name.name &&
323+
fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
324+
kfree(fc_dentry->fcd_name.name);
325+
kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
326+
327+
return;
300328
}
301329

302330
/*
@@ -427,14 +455,28 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
427455
node->fcd_name.name = node->fcd_iname;
428456
}
429457
node->fcd_name.len = dentry->d_name.len;
430-
458+
INIT_LIST_HEAD(&node->fcd_dilist);
431459
spin_lock(&sbi->s_fc_lock);
432460
if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
433461
sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)
434462
list_add_tail(&node->fcd_list,
435463
&sbi->s_fc_dentry_q[FC_Q_STAGING]);
436464
else
437465
list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]);
466+
467+
/*
468+
* This helps us keep a track of all fc_dentry updates which is part of
469+
* this ext4 inode. So in case the inode is getting unlinked, before
470+
* even we get a chance to fsync, we could remove all fc_dentry
471+
* references while evicting the inode in ext4_fc_del().
472+
* Also with this, we don't need to loop over all the inodes in
473+
* sbi->s_fc_q to get the corresponding inode in
474+
* ext4_fc_commit_dentry_updates().
475+
*/
476+
if (dentry_update->op == EXT4_FC_TAG_CREAT) {
477+
WARN_ON(!list_empty(&ei->i_fc_dilist));
478+
list_add_tail(&node->fcd_dilist, &ei->i_fc_dilist);
479+
}
438480
spin_unlock(&sbi->s_fc_lock);
439481
mutex_lock(&ei->i_fc_lock);
440482

@@ -954,7 +996,7 @@ __releases(&sbi->s_fc_lock)
954996
struct ext4_sb_info *sbi = EXT4_SB(sb);
955997
struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n;
956998
struct inode *inode;
957-
struct ext4_inode_info *ei, *ei_n;
999+
struct ext4_inode_info *ei;
9581000
int ret;
9591001

9601002
if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN]))
@@ -970,21 +1012,16 @@ __releases(&sbi->s_fc_lock)
9701012
spin_lock(&sbi->s_fc_lock);
9711013
continue;
9721014
}
973-
974-
inode = NULL;
975-
list_for_each_entry_safe(ei, ei_n, &sbi->s_fc_q[FC_Q_MAIN],
976-
i_fc_list) {
977-
if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) {
978-
inode = &ei->vfs_inode;
979-
break;
980-
}
981-
}
9821015
/*
983-
* If we don't find inode in our list, then it was deleted,
984-
* in which case, we don't need to record it's create tag.
1016+
* With fcd_dilist we need not loop in sbi->s_fc_q to get the
1017+
* corresponding inode pointer
9851018
*/
986-
if (!inode)
987-
continue;
1019+
WARN_ON(list_empty(&fc_dentry->fcd_dilist));
1020+
ei = list_first_entry(&fc_dentry->fcd_dilist,
1021+
struct ext4_inode_info, i_fc_dilist);
1022+
inode = &ei->vfs_inode;
1023+
WARN_ON(inode->i_ino != fc_dentry->fcd_ino);
1024+
9881025
spin_unlock(&sbi->s_fc_lock);
9891026

9901027
/*
@@ -1228,6 +1265,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
12281265
struct ext4_fc_dentry_update,
12291266
fcd_list);
12301267
list_del_init(&fc_dentry->fcd_list);
1268+
list_del_init(&fc_dentry->fcd_dilist);
12311269
spin_unlock(&sbi->s_fc_lock);
12321270

12331271
if (fc_dentry->fcd_name.name &&

fs/ext4/fast_commit.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ struct ext4_fc_dentry_update {
109109
struct qstr fcd_name; /* Dirent name */
110110
unsigned char fcd_iname[DNAME_INLINE_LEN]; /* Dirent name string */
111111
struct list_head fcd_list;
112+
struct list_head fcd_dilist;
112113
};
113114

114115
struct ext4_fc_stats {

0 commit comments

Comments
 (0)