Skip to content

Commit f0bfa76

Browse files
fdmananakdave
authored andcommitted
btrfs: fix ENOSPC failure when attempting direct IO write into NOCOW range
When doing a direct IO write against a file range that either has preallocated extents in that range or has regular extents and the file has the NOCOW attribute set, the write fails with -ENOSPC when all of the following conditions are met: 1) There are no data blocks groups with enough free space matching the size of the write; 2) There's not enough unallocated space for allocating a new data block group; 3) The extents in the target file range are not shared, neither through snapshots nor through reflinks. This is wrong because a NOCOW write can be done in such case, and in fact it's possible to do it using a buffered IO write, since when failing to allocate data space, the buffered IO path checks if a NOCOW write is possible. The failure in direct IO write path comes from the fact that early on, at btrfs_dio_iomap_begin(), we try to allocate data space for the write and if it that fails we return the error and stop - we never check if we can do NOCOW. But later, at btrfs_get_blocks_direct_write(), we check if we can do a NOCOW write into the range, or a subset of the range, and then release the previously reserved data space. Fix this by doing the data reservation only if needed, when we must COW, at btrfs_get_blocks_direct_write() instead of doing it at btrfs_dio_iomap_begin(). This also simplifies a bit the logic and removes the inneficiency of doing unnecessary data reservations. The following example test script reproduces the problem: $ cat dio-nocow-enospc.sh #!/bin/bash DEV=/dev/sdj MNT=/mnt/sdj # Use a small fixed size (1G) filesystem so that it's quick to fill # it up. # Make sure the mixed block groups feature is not enabled because we # later want to not have more space available for allocating data # extents but still have enough metadata space free for the file writes. mkfs.btrfs -f -b $((1024 * 1024 * 1024)) -O ^mixed-bg $DEV mount $DEV $MNT # Create our test file with the NOCOW attribute set. touch $MNT/foobar chattr +C $MNT/foobar # Now fill in all unallocated space with data for our test file. # This will allocate a data block group that will be full and leave # no (or a very small amount of) unallocated space in the device, so # that it will not be possible to allocate a new block group later. echo echo "Creating test file with initial data..." xfs_io -c "pwrite -S 0xab -b 1M 0 900M" $MNT/foobar # Now try a direct IO write against file range [0, 10M[. # This should succeed since this is a NOCOW file and an extent for the # range was previously allocated. echo echo "Trying direct IO write over allocated space..." xfs_io -d -c "pwrite -S 0xcd -b 10M 0 10M" $MNT/foobar umount $MNT When running the test: $ ./dio-nocow-enospc.sh (...) Creating test file with initial data... wrote 943718400/943718400 bytes at offset 0 900 MiB, 900 ops; 0:00:01.43 (625.526 MiB/sec and 625.5265 ops/sec) Trying direct IO write over allocated space... pwrite: No space left on device A test case for fstests will follow, testing both this direct IO write scenario as well as the buffered IO write scenario to make it less likely to get future regressions on the buffered IO case. Reviewed-by: Josef Bacik <[email protected]> Signed-off-by: Filipe Manana <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent c9e6606 commit f0bfa76

File tree

1 file changed

+78
-64
lines changed

1 file changed

+78
-64
lines changed

fs/btrfs/inode.c

Lines changed: 78 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,6 @@ struct btrfs_iget_args {
6161
};
6262

6363
struct btrfs_dio_data {
64-
u64 reserve;
65-
loff_t length;
6664
ssize_t submitted;
6765
struct extent_changeset *data_reserved;
6866
};
@@ -7773,6 +7771,10 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
77737771
{
77747772
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
77757773
struct extent_map *em = *map;
7774+
int type;
7775+
u64 block_start, orig_start, orig_block_len, ram_bytes;
7776+
bool can_nocow = false;
7777+
bool space_reserved = false;
77767778
int ret = 0;
77777779

77787780
/*
@@ -7787,9 +7789,6 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
77877789
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
77887790
((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
77897791
em->block_start != EXTENT_MAP_HOLE)) {
7790-
int type;
7791-
u64 block_start, orig_start, orig_block_len, ram_bytes;
7792-
77937792
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
77947793
type = BTRFS_ORDERED_PREALLOC;
77957794
else
@@ -7799,53 +7798,92 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
77997798

78007799
if (can_nocow_extent(inode, start, &len, &orig_start,
78017800
&orig_block_len, &ram_bytes, false) == 1 &&
7802-
btrfs_inc_nocow_writers(fs_info, block_start)) {
7803-
struct extent_map *em2;
7801+
btrfs_inc_nocow_writers(fs_info, block_start))
7802+
can_nocow = true;
7803+
}
78047804

7805-
em2 = btrfs_create_dio_extent(BTRFS_I(inode), start, len,
7806-
orig_start, block_start,
7807-
len, orig_block_len,
7808-
ram_bytes, type);
7805+
if (can_nocow) {
7806+
struct extent_map *em2;
7807+
7808+
/* We can NOCOW, so only need to reserve metadata space. */
7809+
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
7810+
if (ret < 0) {
7811+
/* Our caller expects us to free the input extent map. */
7812+
free_extent_map(em);
7813+
*map = NULL;
78097814
btrfs_dec_nocow_writers(fs_info, block_start);
7810-
if (type == BTRFS_ORDERED_PREALLOC) {
7811-
free_extent_map(em);
7812-
*map = em = em2;
7813-
}
7815+
goto out;
7816+
}
7817+
space_reserved = true;
78147818

7815-
if (em2 && IS_ERR(em2)) {
7816-
ret = PTR_ERR(em2);
7817-
goto out;
7818-
}
7819-
/*
7820-
* For inode marked NODATACOW or extent marked PREALLOC,
7821-
* use the existing or preallocated extent, so does not
7822-
* need to adjust btrfs_space_info's bytes_may_use.
7823-
*/
7824-
btrfs_free_reserved_data_space_noquota(fs_info, len);
7825-
goto skip_cow;
7819+
em2 = btrfs_create_dio_extent(BTRFS_I(inode), start, len,
7820+
orig_start, block_start,
7821+
len, orig_block_len,
7822+
ram_bytes, type);
7823+
btrfs_dec_nocow_writers(fs_info, block_start);
7824+
if (type == BTRFS_ORDERED_PREALLOC) {
7825+
free_extent_map(em);
7826+
*map = em = em2;
78267827
}
7827-
}
78287828

7829-
/* this will cow the extent */
7830-
free_extent_map(em);
7831-
*map = em = btrfs_new_extent_direct(BTRFS_I(inode), start, len);
7832-
if (IS_ERR(em)) {
7833-
ret = PTR_ERR(em);
7834-
goto out;
7829+
if (IS_ERR(em2)) {
7830+
ret = PTR_ERR(em2);
7831+
goto out;
7832+
}
7833+
} else {
7834+
const u64 prev_len = len;
7835+
7836+
/* Our caller expects us to free the input extent map. */
7837+
free_extent_map(em);
7838+
*map = NULL;
7839+
7840+
/* We have to COW, so need to reserve metadata and data space. */
7841+
ret = btrfs_delalloc_reserve_space(BTRFS_I(inode),
7842+
&dio_data->data_reserved,
7843+
start, len);
7844+
if (ret < 0)
7845+
goto out;
7846+
space_reserved = true;
7847+
7848+
em = btrfs_new_extent_direct(BTRFS_I(inode), start, len);
7849+
if (IS_ERR(em)) {
7850+
ret = PTR_ERR(em);
7851+
goto out;
7852+
}
7853+
*map = em;
7854+
len = min(len, em->len - (start - em->start));
7855+
if (len < prev_len)
7856+
btrfs_delalloc_release_space(BTRFS_I(inode),
7857+
dio_data->data_reserved,
7858+
start + len, prev_len - len,
7859+
true);
78357860
}
78367861

7837-
len = min(len, em->len - (start - em->start));
7862+
/*
7863+
* We have created our ordered extent, so we can now release our reservation
7864+
* for an outstanding extent.
7865+
*/
7866+
btrfs_delalloc_release_extents(BTRFS_I(inode), len);
78387867

7839-
skip_cow:
78407868
/*
78417869
* Need to update the i_size under the extent lock so buffered
78427870
* readers will get the updated i_size when we unlock.
78437871
*/
78447872
if (start + len > i_size_read(inode))
78457873
i_size_write(inode, start + len);
7846-
7847-
dio_data->reserve -= len;
78487874
out:
7875+
if (ret && space_reserved) {
7876+
btrfs_delalloc_release_extents(BTRFS_I(inode), len);
7877+
if (can_nocow) {
7878+
btrfs_delalloc_release_metadata(BTRFS_I(inode), len, true);
7879+
} else {
7880+
btrfs_delalloc_release_space(BTRFS_I(inode),
7881+
dio_data->data_reserved,
7882+
start, len, true);
7883+
extent_changeset_free(dio_data->data_reserved);
7884+
dio_data->data_reserved = NULL;
7885+
}
7886+
}
78497887
return ret;
78507888
}
78517889

@@ -7887,18 +7925,6 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
78877925
if (!dio_data)
78887926
return -ENOMEM;
78897927

7890-
dio_data->length = length;
7891-
if (write) {
7892-
dio_data->reserve = round_up(length, fs_info->sectorsize);
7893-
ret = btrfs_delalloc_reserve_space(BTRFS_I(inode),
7894-
&dio_data->data_reserved,
7895-
start, dio_data->reserve);
7896-
if (ret) {
7897-
extent_changeset_free(dio_data->data_reserved);
7898-
kfree(dio_data);
7899-
return ret;
7900-
}
7901-
}
79027928
iomap->private = dio_data;
79037929

79047930

@@ -7991,14 +8017,8 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
79918017
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
79928018
&cached_state);
79938019
err:
7994-
if (dio_data) {
7995-
btrfs_delalloc_release_space(BTRFS_I(inode),
7996-
dio_data->data_reserved, start,
7997-
dio_data->reserve, true);
7998-
btrfs_delalloc_release_extents(BTRFS_I(inode), dio_data->reserve);
7999-
extent_changeset_free(dio_data->data_reserved);
8000-
kfree(dio_data);
8001-
}
8020+
kfree(dio_data);
8021+
80028022
return ret;
80038023
}
80048024

@@ -8028,14 +8048,8 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
80288048
ret = -ENOTBLK;
80298049
}
80308050

8031-
if (write) {
8032-
if (dio_data->reserve)
8033-
btrfs_delalloc_release_space(BTRFS_I(inode),
8034-
dio_data->data_reserved, pos,
8035-
dio_data->reserve, true);
8036-
btrfs_delalloc_release_extents(BTRFS_I(inode), dio_data->length);
8051+
if (write)
80378052
extent_changeset_free(dio_data->data_reserved);
8038-
}
80398053
out:
80408054
kfree(dio_data);
80418055
iomap->private = NULL;

0 commit comments

Comments
 (0)