Skip to content

Commit e13cf63

Browse files
committed
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: Btrfs: prevent RAID level downgrades when space is low Btrfs: account for missing devices in RAID allocation profiles Btrfs: EIO when we fail to read tree roots Btrfs: fix compiler warnings Btrfs: Make async snapshot ioctl more generic Btrfs: pwrite blocked when writing from the mmaped buffer of the same page Btrfs: Fix a crash when mounting a subvolume Btrfs: fix sync subvol/snapshot creation Btrfs: Fix page leak in compressed writeback path Btrfs: do not BUG if we fail to remove the orphan item for dead snapshots Btrfs: fixup return code for btrfs_del_orphan_item Btrfs: do not do fast caching if we are allocating blocks for tree_root Btrfs: deal with space cache errors better Btrfs: fix use after free in O_DIRECT
2 parents 073f21a + 83a50de commit e13cf63

File tree

11 files changed

+207
-94
lines changed

11 files changed

+207
-94
lines changed

fs/btrfs/disk-io.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
696696
__btree_submit_bio_done);
697697
}
698698

699+
#ifdef CONFIG_MIGRATION
699700
static int btree_migratepage(struct address_space *mapping,
700701
struct page *newpage, struct page *page)
701702
{
@@ -712,12 +713,9 @@ static int btree_migratepage(struct address_space *mapping,
712713
if (page_has_private(page) &&
713714
!try_to_release_page(page, GFP_KERNEL))
714715
return -EAGAIN;
715-
#ifdef CONFIG_MIGRATION
716716
return migrate_page(mapping, newpage, page);
717-
#else
718-
return -ENOSYS;
719-
#endif
720717
}
718+
#endif
721719

722720
static int btree_writepage(struct page *page, struct writeback_control *wbc)
723721
{
@@ -1009,7 +1007,10 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
10091007
blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
10101008
root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
10111009
blocksize, generation);
1012-
BUG_ON(!root->node);
1010+
if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) {
1011+
free_extent_buffer(root->node);
1012+
return -EIO;
1013+
}
10131014
root->commit_root = btrfs_root_node(root);
10141015
return 0;
10151016
}

fs/btrfs/extent-tree.c

Lines changed: 59 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,7 @@ static int caching_kthread(void *data)
429429

430430
static int cache_block_group(struct btrfs_block_group_cache *cache,
431431
struct btrfs_trans_handle *trans,
432+
struct btrfs_root *root,
432433
int load_cache_only)
433434
{
434435
struct btrfs_fs_info *fs_info = cache->fs_info;
@@ -442,9 +443,12 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
442443

443444
/*
444445
* We can't do the read from on-disk cache during a commit since we need
445-
* to have the normal tree locking.
446+
* to have the normal tree locking. Also if we are currently trying to
447+
* allocate blocks for the tree root we can't do the fast caching since
448+
* we likely hold important locks.
446449
*/
447-
if (!trans->transaction->in_commit) {
450+
if (!trans->transaction->in_commit &&
451+
(root && root != root->fs_info->tree_root)) {
448452
spin_lock(&cache->lock);
449453
if (cache->cached != BTRFS_CACHE_NO) {
450454
spin_unlock(&cache->lock);
@@ -2741,6 +2745,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
27412745
struct btrfs_root *root = block_group->fs_info->tree_root;
27422746
struct inode *inode = NULL;
27432747
u64 alloc_hint = 0;
2748+
int dcs = BTRFS_DC_ERROR;
27442749
int num_pages = 0;
27452750
int retries = 0;
27462751
int ret = 0;
@@ -2795,6 +2800,8 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
27952800

27962801
spin_lock(&block_group->lock);
27972802
if (block_group->cached != BTRFS_CACHE_FINISHED) {
2803+
/* We're not cached, don't bother trying to write stuff out */
2804+
dcs = BTRFS_DC_WRITTEN;
27982805
spin_unlock(&block_group->lock);
27992806
goto out_put;
28002807
}
@@ -2821,17 +2828,16 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
28212828
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
28222829
num_pages, num_pages,
28232830
&alloc_hint);
2831+
if (!ret)
2832+
dcs = BTRFS_DC_SETUP;
28242833
btrfs_free_reserved_data_space(inode, num_pages);
28252834
out_put:
28262835
iput(inode);
28272836
out_free:
28282837
btrfs_release_path(root, path);
28292838
out:
28302839
spin_lock(&block_group->lock);
2831-
if (ret)
2832-
block_group->disk_cache_state = BTRFS_DC_ERROR;
2833-
else
2834-
block_group->disk_cache_state = BTRFS_DC_SETUP;
2840+
block_group->disk_cache_state = dcs;
28352841
spin_unlock(&block_group->lock);
28362842

28372843
return ret;
@@ -3037,7 +3043,13 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
30373043

30383044
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
30393045
{
3040-
u64 num_devices = root->fs_info->fs_devices->rw_devices;
3046+
/*
3047+
* we add in the count of missing devices because we want
3048+
* to make sure that any RAID levels on a degraded FS
3049+
* continue to be honored.
3050+
*/
3051+
u64 num_devices = root->fs_info->fs_devices->rw_devices +
3052+
root->fs_info->fs_devices->missing_devices;
30413053

30423054
if (num_devices == 1)
30433055
flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
@@ -4080,7 +4092,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
40804092
* space back to the block group, otherwise we will leak space.
40814093
*/
40824094
if (!alloc && cache->cached == BTRFS_CACHE_NO)
4083-
cache_block_group(cache, trans, 1);
4095+
cache_block_group(cache, trans, NULL, 1);
40844096

40854097
byte_in_group = bytenr - cache->key.objectid;
40864098
WARN_ON(byte_in_group > cache->key.offset);
@@ -4930,11 +4942,31 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
49304942
btrfs_get_block_group(block_group);
49314943
search_start = block_group->key.objectid;
49324944

4945+
/*
4946+
* this can happen if we end up cycling through all the
4947+
* raid types, but we want to make sure we only allocate
4948+
* for the proper type.
4949+
*/
4950+
if (!block_group_bits(block_group, data)) {
4951+
u64 extra = BTRFS_BLOCK_GROUP_DUP |
4952+
BTRFS_BLOCK_GROUP_RAID1 |
4953+
BTRFS_BLOCK_GROUP_RAID10;
4954+
4955+
/*
4956+
* if they asked for extra copies and this block group
4957+
* doesn't provide them, bail. This does allow us to
4958+
* fill raid0 from raid1.
4959+
*/
4960+
if ((data & extra) && !(block_group->flags & extra))
4961+
goto loop;
4962+
}
4963+
49334964
have_block_group:
49344965
if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
49354966
u64 free_percent;
49364967

4937-
ret = cache_block_group(block_group, trans, 1);
4968+
ret = cache_block_group(block_group, trans,
4969+
orig_root, 1);
49384970
if (block_group->cached == BTRFS_CACHE_FINISHED)
49394971
goto have_block_group;
49404972

@@ -4958,7 +4990,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
49584990
if (loop > LOOP_CACHING_NOWAIT ||
49594991
(loop > LOOP_FIND_IDEAL &&
49604992
atomic_read(&space_info->caching_threads) < 2)) {
4961-
ret = cache_block_group(block_group, trans, 0);
4993+
ret = cache_block_group(block_group, trans,
4994+
orig_root, 0);
49624995
BUG_ON(ret);
49634996
}
49644997
found_uncached_bg = true;
@@ -5515,7 +5548,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
55155548
u64 num_bytes = ins->offset;
55165549

55175550
block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
5518-
cache_block_group(block_group, trans, 0);
5551+
cache_block_group(block_group, trans, NULL, 0);
55195552
caching_ctl = get_caching_control(block_group);
55205553

55215554
if (!caching_ctl) {
@@ -6300,9 +6333,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
63006333
NULL, NULL);
63016334
BUG_ON(ret < 0);
63026335
if (ret > 0) {
6303-
ret = btrfs_del_orphan_item(trans, tree_root,
6304-
root->root_key.objectid);
6305-
BUG_ON(ret);
6336+
/* if we fail to delete the orphan item this time
6337+
* around, it'll get picked up the next time.
6338+
*
6339+
* The most common failure here is just -ENOENT.
6340+
*/
6341+
btrfs_del_orphan_item(trans, tree_root,
6342+
root->root_key.objectid);
63066343
}
63076344
}
63086345

@@ -7878,7 +7915,14 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
78787915
u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
78797916
BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
78807917

7881-
num_devices = root->fs_info->fs_devices->rw_devices;
7918+
/*
7919+
* we add in the count of missing devices because we want
7920+
* to make sure that any RAID levels on a degraded FS
7921+
* continue to be honored.
7922+
*/
7923+
num_devices = root->fs_info->fs_devices->rw_devices +
7924+
root->fs_info->fs_devices->missing_devices;
7925+
78827926
if (num_devices == 1) {
78837927
stripped |= BTRFS_BLOCK_GROUP_DUP;
78847928
stripped = flags & ~stripped;
@@ -8247,7 +8291,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
82478291
break;
82488292
if (ret != 0)
82498293
goto error;
8250-
82518294
leaf = path->nodes[0];
82528295
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
82538296
cache = kzalloc(sizeof(*cache), GFP_NOFS);

fs/btrfs/file.c

Lines changed: 60 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -48,30 +48,34 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
4848
struct page **prepared_pages,
4949
struct iov_iter *i)
5050
{
51-
size_t copied;
51+
size_t copied = 0;
5252
int pg = 0;
5353
int offset = pos & (PAGE_CACHE_SIZE - 1);
54+
int total_copied = 0;
5455

5556
while (write_bytes > 0) {
5657
size_t count = min_t(size_t,
5758
PAGE_CACHE_SIZE - offset, write_bytes);
5859
struct page *page = prepared_pages[pg];
59-
again:
60-
if (unlikely(iov_iter_fault_in_readable(i, count)))
61-
return -EFAULT;
62-
63-
/* Copy data from userspace to the current page */
64-
copied = iov_iter_copy_from_user(page, i, offset, count);
60+
/*
61+
* Copy data from userspace to the current page
62+
*
63+
* Disable pagefault to avoid recursive lock since
64+
* the pages are already locked
65+
*/
66+
pagefault_disable();
67+
copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
68+
pagefault_enable();
6569

6670
/* Flush processor's dcache for this page */
6771
flush_dcache_page(page);
6872
iov_iter_advance(i, copied);
6973
write_bytes -= copied;
74+
total_copied += copied;
7075

76+
/* Return to btrfs_file_aio_write to fault page */
7177
if (unlikely(copied == 0)) {
72-
count = min_t(size_t, PAGE_CACHE_SIZE - offset,
73-
iov_iter_single_seg_count(i));
74-
goto again;
78+
break;
7579
}
7680

7781
if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
@@ -81,7 +85,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
8185
offset = 0;
8286
}
8387
}
84-
return 0;
88+
return total_copied;
8589
}
8690

8791
/*
@@ -854,6 +858,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
854858
unsigned long last_index;
855859
int will_write;
856860
int buffered = 0;
861+
int copied = 0;
862+
int dirty_pages = 0;
857863

858864
will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
859865
(file->f_flags & O_DIRECT));
@@ -970,45 +976,67 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
970976
WARN_ON(num_pages > nrptrs);
971977
memset(pages, 0, sizeof(struct page *) * nrptrs);
972978

973-
ret = btrfs_delalloc_reserve_space(inode, write_bytes);
979+
/*
980+
* Fault pages before locking them in prepare_pages
981+
* to avoid recursive lock
982+
*/
983+
if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) {
984+
ret = -EFAULT;
985+
goto out;
986+
}
987+
988+
ret = btrfs_delalloc_reserve_space(inode,
989+
num_pages << PAGE_CACHE_SHIFT);
974990
if (ret)
975991
goto out;
976992

977993
ret = prepare_pages(root, file, pages, num_pages,
978994
pos, first_index, last_index,
979995
write_bytes);
980996
if (ret) {
981-
btrfs_delalloc_release_space(inode, write_bytes);
997+
btrfs_delalloc_release_space(inode,
998+
num_pages << PAGE_CACHE_SHIFT);
982999
goto out;
9831000
}
9841001

985-
ret = btrfs_copy_from_user(pos, num_pages,
1002+
copied = btrfs_copy_from_user(pos, num_pages,
9861003
write_bytes, pages, &i);
987-
if (ret == 0) {
1004+
dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >>
1005+
PAGE_CACHE_SHIFT;
1006+
1007+
if (num_pages > dirty_pages) {
1008+
if (copied > 0)
1009+
atomic_inc(
1010+
&BTRFS_I(inode)->outstanding_extents);
1011+
btrfs_delalloc_release_space(inode,
1012+
(num_pages - dirty_pages) <<
1013+
PAGE_CACHE_SHIFT);
1014+
}
1015+
1016+
if (copied > 0) {
9881017
dirty_and_release_pages(NULL, root, file, pages,
989-
num_pages, pos, write_bytes);
1018+
dirty_pages, pos, copied);
9901019
}
9911020

9921021
btrfs_drop_pages(pages, num_pages);
993-
if (ret) {
994-
btrfs_delalloc_release_space(inode, write_bytes);
995-
goto out;
996-
}
9971022

998-
if (will_write) {
999-
filemap_fdatawrite_range(inode->i_mapping, pos,
1000-
pos + write_bytes - 1);
1001-
} else {
1002-
balance_dirty_pages_ratelimited_nr(inode->i_mapping,
1003-
num_pages);
1004-
if (num_pages <
1005-
(root->leafsize >> PAGE_CACHE_SHIFT) + 1)
1006-
btrfs_btree_balance_dirty(root, 1);
1007-
btrfs_throttle(root);
1023+
if (copied > 0) {
1024+
if (will_write) {
1025+
filemap_fdatawrite_range(inode->i_mapping, pos,
1026+
pos + copied - 1);
1027+
} else {
1028+
balance_dirty_pages_ratelimited_nr(
1029+
inode->i_mapping,
1030+
dirty_pages);
1031+
if (dirty_pages <
1032+
(root->leafsize >> PAGE_CACHE_SHIFT) + 1)
1033+
btrfs_btree_balance_dirty(root, 1);
1034+
btrfs_throttle(root);
1035+
}
10081036
}
10091037

1010-
pos += write_bytes;
1011-
num_written += write_bytes;
1038+
pos += copied;
1039+
num_written += copied;
10121040

10131041
cond_resched();
10141042
}

fs/btrfs/free-space-cache.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
290290
(unsigned long long)BTRFS_I(inode)->generation,
291291
(unsigned long long)generation,
292292
(unsigned long long)block_group->key.objectid);
293-
goto out;
293+
goto free_cache;
294294
}
295295

296296
if (!num_entries)
@@ -524,6 +524,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
524524
return 0;
525525
}
526526

527+
node = rb_first(&block_group->free_space_offset);
528+
if (!node) {
529+
iput(inode);
530+
return 0;
531+
}
532+
527533
last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
528534
filemap_write_and_wait(inode->i_mapping);
529535
btrfs_wait_ordered_range(inode, inode->i_size &
@@ -543,10 +549,6 @@ int btrfs_write_out_cache(struct btrfs_root *root,
543549
*/
544550
first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
545551

546-
node = rb_first(&block_group->free_space_offset);
547-
if (!node)
548-
goto out_free;
549-
550552
/*
551553
* Lock all pages first so we can lock the extent safely.
552554
*

0 commit comments

Comments
 (0)