Skip to content

Commit 53e8726

Browse files
jankaratytso
authored andcommitted
ext4: fix deadlock in journal_unmap_buffer()
We cannot wait for transaction commit in journal_unmap_buffer() because we hold page lock which ranks below transaction start. We solve the issue by bailing out of journal_unmap_buffer() and jbd2_journal_invalidatepage() with -EBUSY. Caller is then responsible for waiting for transaction commit to finish and try invalidation again. Since the issue can happen only for page stradding i_size, it is simple enough to manually call jbd2_journal_invalidatepage() for such page from ext4_setattr(), check the return value and wait if necessary. Signed-off-by: Jan Kara <[email protected]> Signed-off-by: "Theodore Ts'o" <[email protected]>
1 parent 4520fb3 commit 53e8726

File tree

3 files changed

+86
-25
lines changed

3 files changed

+86
-25
lines changed

fs/ext4/inode.c

Lines changed: 71 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2894,8 +2894,8 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
28942894
block_invalidatepage(page, offset);
28952895
}
28962896

2897-
static void ext4_journalled_invalidatepage(struct page *page,
2898-
unsigned long offset)
2897+
static int __ext4_journalled_invalidatepage(struct page *page,
2898+
unsigned long offset)
28992899
{
29002900
journal_t *journal = EXT4_JOURNAL(page->mapping->host);
29012901

@@ -2907,7 +2907,14 @@ static void ext4_journalled_invalidatepage(struct page *page,
29072907
if (offset == 0)
29082908
ClearPageChecked(page);
29092909

2910-
jbd2_journal_invalidatepage(journal, page, offset);
2910+
return jbd2_journal_invalidatepage(journal, page, offset);
2911+
}
2912+
2913+
/* Wrapper for aops... */
2914+
static void ext4_journalled_invalidatepage(struct page *page,
2915+
unsigned long offset)
2916+
{
2917+
WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0);
29112918
}
29122919

29132920
static int ext4_releasepage(struct page *page, gfp_t wait)
@@ -4313,6 +4320,47 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
43134320
return err;
43144321
}
43154322

4323+
/*
4324+
* In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate
4325+
* buffers that are attached to a page stradding i_size and are undergoing
4326+
* commit. In that case we have to wait for commit to finish and try again.
4327+
*/
4328+
static void ext4_wait_for_tail_page_commit(struct inode *inode)
4329+
{
4330+
struct page *page;
4331+
unsigned offset;
4332+
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
4333+
tid_t commit_tid = 0;
4334+
int ret;
4335+
4336+
offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
4337+
/*
4338+
* All buffers in the last page remain valid? Then there's nothing to
4339+
* do. We do the check mainly to optimize the common PAGE_CACHE_SIZE ==
4340+
* blocksize case
4341+
*/
4342+
if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits))
4343+
return;
4344+
while (1) {
4345+
page = find_lock_page(inode->i_mapping,
4346+
inode->i_size >> PAGE_CACHE_SHIFT);
4347+
if (!page)
4348+
return;
4349+
ret = __ext4_journalled_invalidatepage(page, offset);
4350+
unlock_page(page);
4351+
page_cache_release(page);
4352+
if (ret != -EBUSY)
4353+
return;
4354+
commit_tid = 0;
4355+
read_lock(&journal->j_state_lock);
4356+
if (journal->j_committing_transaction)
4357+
commit_tid = journal->j_committing_transaction->t_tid;
4358+
read_unlock(&journal->j_state_lock);
4359+
if (commit_tid)
4360+
jbd2_log_wait_commit(journal, commit_tid);
4361+
}
4362+
}
4363+
43164364
/*
43174365
* ext4_setattr()
43184366
*
@@ -4426,16 +4474,28 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
44264474
}
44274475

44284476
if (attr->ia_valid & ATTR_SIZE) {
4429-
if (attr->ia_size != i_size_read(inode)) {
4430-
truncate_setsize(inode, attr->ia_size);
4431-
/* Inode size will be reduced, wait for dio in flight.
4432-
* Temporarily disable dioread_nolock to prevent
4433-
* livelock. */
4477+
if (attr->ia_size != inode->i_size) {
4478+
loff_t oldsize = inode->i_size;
4479+
4480+
i_size_write(inode, attr->ia_size);
4481+
/*
4482+
* Blocks are going to be removed from the inode. Wait
4483+
* for dio in flight. Temporarily disable
4484+
* dioread_nolock to prevent livelock.
4485+
*/
44344486
if (orphan) {
4435-
ext4_inode_block_unlocked_dio(inode);
4436-
inode_dio_wait(inode);
4437-
ext4_inode_resume_unlocked_dio(inode);
4487+
if (!ext4_should_journal_data(inode)) {
4488+
ext4_inode_block_unlocked_dio(inode);
4489+
inode_dio_wait(inode);
4490+
ext4_inode_resume_unlocked_dio(inode);
4491+
} else
4492+
ext4_wait_for_tail_page_commit(inode);
44384493
}
4494+
/*
4495+
* Truncate pagecache after we've waited for commit
4496+
* in data=journal mode to make pages freeable.
4497+
*/
4498+
truncate_pagecache(inode, oldsize, inode->i_size);
44394499
}
44404500
ext4_truncate(inode);
44414501
}

fs/jbd2/transaction.c

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1840,7 +1840,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
18401840

18411841
BUFFER_TRACE(bh, "entry");
18421842

1843-
retry:
18441843
/*
18451844
* It is safe to proceed here without the j_list_lock because the
18461845
* buffers cannot be stolen by try_to_free_buffers as long as we are
@@ -1935,14 +1934,11 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
19351934
* for commit and try again.
19361935
*/
19371936
if (partial_page) {
1938-
tid_t tid = journal->j_committing_transaction->t_tid;
1939-
19401937
jbd2_journal_put_journal_head(jh);
19411938
spin_unlock(&journal->j_list_lock);
19421939
jbd_unlock_bh_state(bh);
19431940
write_unlock(&journal->j_state_lock);
1944-
jbd2_log_wait_commit(journal, tid);
1945-
goto retry;
1941+
return -EBUSY;
19461942
}
19471943
/*
19481944
* OK, buffer won't be reachable after truncate. We just set
@@ -2003,21 +1999,23 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
20031999
* @page: page to flush
20042000
* @offset: length of page to invalidate.
20052001
*
2006-
* Reap page buffers containing data after offset in page.
2007-
*
2002+
* Reap page buffers containing data after offset in page. Can return -EBUSY
2003+
* if buffers are part of the committing transaction and the page is straddling
2004+
* i_size. Caller then has to wait for current commit and try again.
20082005
*/
2009-
void jbd2_journal_invalidatepage(journal_t *journal,
2010-
struct page *page,
2011-
unsigned long offset)
2006+
int jbd2_journal_invalidatepage(journal_t *journal,
2007+
struct page *page,
2008+
unsigned long offset)
20122009
{
20132010
struct buffer_head *head, *bh, *next;
20142011
unsigned int curr_off = 0;
20152012
int may_free = 1;
2013+
int ret = 0;
20162014

20172015
if (!PageLocked(page))
20182016
BUG();
20192017
if (!page_has_buffers(page))
2020-
return;
2018+
return 0;
20212019

20222020
/* We will potentially be playing with lists other than just the
20232021
* data lists (especially for journaled data mode), so be
@@ -2031,9 +2029,11 @@ void jbd2_journal_invalidatepage(journal_t *journal,
20312029
if (offset <= curr_off) {
20322030
/* This block is wholly outside the truncation point */
20332031
lock_buffer(bh);
2034-
may_free &= journal_unmap_buffer(journal, bh,
2035-
offset > 0);
2032+
ret = journal_unmap_buffer(journal, bh, offset > 0);
20362033
unlock_buffer(bh);
2034+
if (ret < 0)
2035+
return ret;
2036+
may_free &= ret;
20372037
}
20382038
curr_off = next_off;
20392039
bh = next;
@@ -2044,6 +2044,7 @@ void jbd2_journal_invalidatepage(journal_t *journal,
20442044
if (may_free && try_to_free_buffers(page))
20452045
J_ASSERT(!page_has_buffers(page));
20462046
}
2047+
return 0;
20472048
}
20482049

20492050
/*

include/linux/jbd2.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1098,7 +1098,7 @@ void jbd2_journal_set_triggers(struct buffer_head *,
10981098
extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
10991099
extern int jbd2_journal_forget (handle_t *, struct buffer_head *);
11001100
extern void journal_sync_buffer (struct buffer_head *);
1101-
extern void jbd2_journal_invalidatepage(journal_t *,
1101+
extern int jbd2_journal_invalidatepage(journal_t *,
11021102
struct page *, unsigned long);
11031103
extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
11041104
extern int jbd2_journal_stop(handle_t *);

0 commit comments

Comments
 (0)