Skip to content

Commit b631642

Browse files
Josef Bacikchrismason-xx
authored andcommitted
Btrfs: force a page fault if we have a shorty copy on a page boundary
A user reported a problem where ceph was getting into 100% cpu usage while doing some writing. It turns out it's because we were doing a short write on a not uptodate page, which means we'd fall back at one page at a time and fault the page in. The problem is our position is on the page boundary, so our fault in logic wasn't actually reading the page, so we'd just spin forever or until the page got read in by somebody else. This will force a readpage if we end up doing a short copy. Alexandre could reproduce this easily with ceph and reports it fixes his problem. I also wrote a reproducer that no longer hangs my box with this patch. Thanks, Reported-and-tested-by: Alexandre Oliva <[email protected]> Signed-off-by: Josef Bacik <[email protected]> Signed-off-by: Chris Mason <[email protected]>
1 parent b6f3409 commit b631642

File tree

1 file changed

+16
-8
lines changed

1 file changed

+16
-8
lines changed

fs/btrfs/file.c

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,11 +1036,13 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
10361036
* on error we return an unlocked page and the error value
10371037
* on success we return a locked page and 0
10381038
*/
1039-
static int prepare_uptodate_page(struct page *page, u64 pos)
1039+
static int prepare_uptodate_page(struct page *page, u64 pos,
1040+
bool force_uptodate)
10401041
{
10411042
int ret = 0;
10421043

1043-
if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) {
1044+
if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) &&
1045+
!PageUptodate(page)) {
10441046
ret = btrfs_readpage(NULL, page);
10451047
if (ret)
10461048
return ret;
@@ -1061,7 +1063,7 @@ static int prepare_uptodate_page(struct page *page, u64 pos)
10611063
static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
10621064
struct page **pages, size_t num_pages,
10631065
loff_t pos, unsigned long first_index,
1064-
size_t write_bytes)
1066+
size_t write_bytes, bool force_uptodate)
10651067
{
10661068
struct extent_state *cached_state = NULL;
10671069
int i;
@@ -1086,10 +1088,11 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
10861088
}
10871089

10881090
if (i == 0)
1089-
err = prepare_uptodate_page(pages[i], pos);
1091+
err = prepare_uptodate_page(pages[i], pos,
1092+
force_uptodate);
10901093
if (i == num_pages - 1)
10911094
err = prepare_uptodate_page(pages[i],
1092-
pos + write_bytes);
1095+
pos + write_bytes, false);
10931096
if (err) {
10941097
page_cache_release(pages[i]);
10951098
faili = i - 1;
@@ -1158,6 +1161,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
11581161
size_t num_written = 0;
11591162
int nrptrs;
11601163
int ret = 0;
1164+
bool force_page_uptodate = false;
11611165

11621166
nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
11631167
PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
@@ -1200,7 +1204,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
12001204
* contents of pages from loop to loop
12011205
*/
12021206
ret = prepare_pages(root, file, pages, num_pages,
1203-
pos, first_index, write_bytes);
1207+
pos, first_index, write_bytes,
1208+
force_page_uptodate);
12041209
if (ret) {
12051210
btrfs_delalloc_release_space(inode,
12061211
num_pages << PAGE_CACHE_SHIFT);
@@ -1217,12 +1222,15 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
12171222
if (copied < write_bytes)
12181223
nrptrs = 1;
12191224

1220-
if (copied == 0)
1225+
if (copied == 0) {
1226+
force_page_uptodate = true;
12211227
dirty_pages = 0;
1222-
else
1228+
} else {
1229+
force_page_uptodate = false;
12231230
dirty_pages = (copied + offset +
12241231
PAGE_CACHE_SIZE - 1) >>
12251232
PAGE_CACHE_SHIFT;
1233+
}
12261234

12271235
/*
12281236
* If we had a short copy we need to release the excess delaloc

0 commit comments

Comments
 (0)