Skip to content

Commit 867f856

Browse files
committed
Merge patch series "iomap: fix zero padding data issue in concurrent append writes"
Long Li <[email protected]> says: This patch series fixes zero padding data issues in concurrent append write scenarios. A detailed problem description and solution can be found in patch 2. Patch 1 is introduced as preparation for the fix in patch 2, eliminating the need to resample inode size for io_size trimming and avoiding issues caused by inode size changes during concurrent writeback and truncate operations. * patches from https://lore.kernel.org/r/[email protected]: iomap: fix zero padding data issue in concurrent append writes iomap: pass byte granular end position to iomap_add_to_ioend Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Christian Brauner <[email protected]>
2 parents 930e7c2 + 51d20d1 commit 867f856

File tree

2 files changed

+58
-10
lines changed

2 files changed

+58
-10
lines changed

fs/iomap/buffered-io.c

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1774,7 +1774,8 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos)
17741774
*/
17751775
static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
17761776
struct writeback_control *wbc, struct folio *folio,
1777-
struct inode *inode, loff_t pos, unsigned len)
1777+
struct inode *inode, loff_t pos, loff_t end_pos,
1778+
unsigned len)
17781779
{
17791780
struct iomap_folio_state *ifs = folio->private;
17801781
size_t poff = offset_in_folio(folio, pos);
@@ -1793,15 +1794,60 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
17931794

17941795
if (ifs)
17951796
atomic_add(len, &ifs->write_bytes_pending);
1797+
1798+
/*
1799+
* Clamp io_offset and io_size to the incore EOF so that ondisk
1800+
* file size updates in the ioend completion are byte-accurate.
1801+
* This avoids recovering files with zeroed tail regions when
1802+
* writeback races with appending writes:
1803+
*
1804+
* Thread 1: Thread 2:
1805+
* ------------ -----------
1806+
* write [A, A+B]
1807+
* update inode size to A+B
1808+
* submit I/O [A, A+BS]
1809+
* write [A+B, A+B+C]
1810+
* update inode size to A+B+C
1811+
* <I/O completes, updates disk size to min(A+B+C, A+BS)>
1812+
* <power failure>
1813+
*
1814+
* After reboot:
1815+
* 1) with A+B+C < A+BS, the file has zero padding in range
1816+
* [A+B, A+B+C]
1817+
*
1818+
* |< Block Size (BS) >|
1819+
* |DDDDDDDDDDDD0000000000000|
1820+
* ^ ^ ^
1821+
* A A+B A+B+C
1822+
* (EOF)
1823+
*
1824+
* 2) with A+B+C > A+BS, the file has zero padding in range
1825+
* [A+B, A+BS]
1826+
*
1827+
* |< Block Size (BS) >|< Block Size (BS) >|
1828+
* |DDDDDDDDDDDD0000000000000|00000000000000000000000000|
1829+
* ^ ^ ^ ^
1830+
* A A+B A+BS A+B+C
1831+
* (EOF)
1832+
*
1833+
* D = Valid Data
1834+
* 0 = Zero Padding
1835+
*
1836+
* Note that this defeats the ability to chain the ioends of
1837+
* appending writes.
1838+
*/
17961839
wpc->ioend->io_size += len;
1840+
if (wpc->ioend->io_offset + wpc->ioend->io_size > end_pos)
1841+
wpc->ioend->io_size = end_pos - wpc->ioend->io_offset;
1842+
17971843
wbc_account_cgroup_owner(wbc, folio, len);
17981844
return 0;
17991845
}
18001846

18011847
static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc,
18021848
struct writeback_control *wbc, struct folio *folio,
1803-
struct inode *inode, u64 pos, unsigned dirty_len,
1804-
unsigned *count)
1849+
struct inode *inode, u64 pos, u64 end_pos,
1850+
unsigned dirty_len, unsigned *count)
18051851
{
18061852
int error;
18071853

@@ -1826,7 +1872,7 @@ static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc,
18261872
break;
18271873
default:
18281874
error = iomap_add_to_ioend(wpc, wbc, folio, inode, pos,
1829-
map_len);
1875+
end_pos, map_len);
18301876
if (!error)
18311877
(*count)++;
18321878
break;
@@ -1897,11 +1943,11 @@ static bool iomap_writepage_handle_eof(struct folio *folio, struct inode *inode,
18971943
* remaining memory is zeroed when mapped, and writes to that
18981944
* region are not written out to the file.
18991945
*
1900-
* Also adjust the writeback range to skip all blocks entirely
1901-
* beyond i_size.
1946+
* Also adjust the end_pos to the end of file and skip writeback
1947+
* for all blocks entirely beyond i_size.
19021948
*/
19031949
folio_zero_segment(folio, poff, folio_size(folio));
1904-
*end_pos = round_up(isize, i_blocksize(inode));
1950+
*end_pos = isize;
19051951
}
19061952

19071953
return true;
@@ -1914,6 +1960,7 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
19141960
struct inode *inode = folio->mapping->host;
19151961
u64 pos = folio_pos(folio);
19161962
u64 end_pos = pos + folio_size(folio);
1963+
u64 end_aligned = 0;
19171964
unsigned count = 0;
19181965
int error = 0;
19191966
u32 rlen;
@@ -1955,9 +2002,10 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
19552002
/*
19562003
* Walk through the folio to find dirty areas to write back.
19572004
*/
1958-
while ((rlen = iomap_find_dirty_range(folio, &pos, end_pos))) {
2005+
end_aligned = round_up(end_pos, i_blocksize(inode));
2006+
while ((rlen = iomap_find_dirty_range(folio, &pos, end_aligned))) {
19592007
error = iomap_writepage_map_blocks(wpc, wbc, folio, inode,
1960-
pos, rlen, &count);
2008+
pos, end_pos, rlen, &count);
19612009
if (error)
19622010
break;
19632011
pos += rlen;

include/linux/iomap.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ struct iomap_ioend {
335335
u16 io_type;
336336
u16 io_flags; /* IOMAP_F_* */
337337
struct inode *io_inode; /* file being written to */
338-
size_t io_size; /* size of the extent */
338+
size_t io_size; /* size of data within eof */
339339
loff_t io_offset; /* offset in the file */
340340
sector_t io_sector; /* start sector of ioend */
341341
struct bio io_bio; /* MUST BE LAST! */

0 commit comments

Comments
 (0)