Skip to content

Commit cc88323

Browse files
zhangyi089tytso
authored andcommitted
ext4: drop unnecessary journal handle in delalloc write
After we factor out the inline data write procedure from ext4_da_write_end(), we don't need to start journal handle for the cases of both buffer overwrite and append-write. If we need to update i_disksize, mark_inode_dirty() do start handle and update inode buffer. So we could just remove all the journal handle codes in the delalloc write procedure. After this patch, we could get a lot of performance improvement. Below is the Unixbench comparison data test on my machine with 'Intel Xeon Gold 5120' CPU and nvme SSD backend. Test cmd: ./Run -c 56 -i 3 fstime fsbuffer fsdisk Before this patch: System Benchmarks Partial Index BASELINE RESULT INDEX File Copy 1024 bufsize 2000 maxblocks 3960.0 422965.0 1068.1 File Copy 256 bufsize 500 maxblocks 1655.0 105077.0 634.9 File Copy 4096 bufsize 8000 maxblocks 5800.0 1429092.0 2464.0 ====== System Benchmarks Index Score (Partial Only) 1186.6 After this patch: System Benchmarks Partial Index BASELINE RESULT INDEX File Copy 1024 bufsize 2000 maxblocks 3960.0 732716.0 1850.3 File Copy 256 bufsize 500 maxblocks 1655.0 184940.0 1117.5 File Copy 4096 bufsize 8000 maxblocks 5800.0 2427152.0 4184.7 ====== System Benchmarks Index Score (Partial Only) 2053.0 Signed-off-by: Zhang Yi <[email protected]> Reviewed-by: Jan Kara <[email protected]> Signed-off-by: Theodore Ts'o <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 6984aef commit cc88323

File tree

1 file changed

+5
-55
lines changed

1 file changed

+5
-55
lines changed

fs/ext4/inode.c

Lines changed: 5 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -2910,19 +2910,6 @@ static int ext4_nonda_switch(struct super_block *sb)
29102910
return 0;
29112911
}
29122912

2913-
/* We always reserve for an inode update; the superblock could be there too */
2914-
static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len)
2915-
{
2916-
if (likely(ext4_has_feature_large_file(inode->i_sb)))
2917-
return 1;
2918-
2919-
if (pos + len <= 0x7fffffffULL)
2920-
return 1;
2921-
2922-
/* We might need to update the superblock to set LARGE_FILE */
2923-
return 2;
2924-
}
2925-
29262913
static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
29272914
loff_t pos, unsigned len, unsigned flags,
29282915
struct page **pagep, void **fsdata)
@@ -2931,7 +2918,6 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
29312918
struct page *page;
29322919
pgoff_t index;
29332920
struct inode *inode = mapping->host;
2934-
handle_t *handle;
29352921

29362922
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
29372923
return -EIO;
@@ -2957,41 +2943,11 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
29572943
return 0;
29582944
}
29592945

2960-
/*
2961-
* grab_cache_page_write_begin() can take a long time if the
2962-
* system is thrashing due to memory pressure, or if the page
2963-
* is being written back. So grab it first before we start
2964-
* the transaction handle. This also allows us to allocate
2965-
* the page (if needed) without using GFP_NOFS.
2966-
*/
2967-
retry_grab:
2946+
retry:
29682947
page = grab_cache_page_write_begin(mapping, index, flags);
29692948
if (!page)
29702949
return -ENOMEM;
2971-
unlock_page(page);
2972-
2973-
/*
2974-
* With delayed allocation, we don't log the i_disksize update
2975-
* if there is delayed block allocation. But we still need
2976-
* to journalling the i_disksize update if writes to the end
2977-
* of file which has an already mapped buffer.
2978-
*/
2979-
retry_journal:
2980-
handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
2981-
ext4_da_write_credits(inode, pos, len));
2982-
if (IS_ERR(handle)) {
2983-
put_page(page);
2984-
return PTR_ERR(handle);
2985-
}
29862950

2987-
lock_page(page);
2988-
if (page->mapping != mapping) {
2989-
/* The page got truncated from under us */
2990-
unlock_page(page);
2991-
put_page(page);
2992-
ext4_journal_stop(handle);
2993-
goto retry_grab;
2994-
}
29952951
/* In case writeback began while the page was unlocked */
29962952
wait_for_stable_page(page);
29972953

@@ -3003,20 +2959,18 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
30032959
#endif
30042960
if (ret < 0) {
30052961
unlock_page(page);
3006-
ext4_journal_stop(handle);
2962+
put_page(page);
30072963
/*
30082964
* block_write_begin may have instantiated a few blocks
30092965
* outside i_size. Trim these off again. Don't need
3010-
* i_size_read because we hold i_mutex.
2966+
* i_size_read because we hold inode lock.
30112967
*/
30122968
if (pos + len > inode->i_size)
30132969
ext4_truncate_failed_write(inode);
30142970

30152971
if (ret == -ENOSPC &&
30162972
ext4_should_retry_alloc(inode->i_sb, &retries))
3017-
goto retry_journal;
3018-
3019-
put_page(page);
2973+
goto retry;
30202974
return ret;
30212975
}
30222976

@@ -3053,8 +3007,6 @@ static int ext4_da_write_end(struct file *file,
30533007
struct page *page, void *fsdata)
30543008
{
30553009
struct inode *inode = mapping->host;
3056-
int ret;
3057-
handle_t *handle = ext4_journal_current_handle();
30583010
loff_t new_i_size;
30593011
unsigned long start, end;
30603012
int write_mode = (int)(unsigned long)fsdata;
@@ -3093,9 +3045,7 @@ static int ext4_da_write_end(struct file *file,
30933045
ext4_da_should_update_i_disksize(page, end))
30943046
ext4_update_i_disksize(inode, new_i_size);
30953047

3096-
copied = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
3097-
ret = ext4_journal_stop(handle);
3098-
return ret ? ret : copied;
3048+
return generic_write_end(file, mapping, pos, len, copied, page, fsdata);
30993049
}
31003050

31013051
/*

0 commit comments

Comments
 (0)