@@ -1774,7 +1774,8 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos)
1774
1774
*/
1775
1775
static int iomap_add_to_ioend (struct iomap_writepage_ctx * wpc ,
1776
1776
struct writeback_control * wbc , struct folio * folio ,
1777
- struct inode * inode , loff_t pos , unsigned len )
1777
+ struct inode * inode , loff_t pos , loff_t end_pos ,
1778
+ unsigned len )
1778
1779
{
1779
1780
struct iomap_folio_state * ifs = folio -> private ;
1780
1781
size_t poff = offset_in_folio (folio , pos );
@@ -1793,15 +1794,60 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
1793
1794
1794
1795
if (ifs )
1795
1796
atomic_add (len , & ifs -> write_bytes_pending );
1797
+
1798
+ /*
1799
+ * Clamp io_offset and io_size to the incore EOF so that ondisk
1800
+ * file size updates in the ioend completion are byte-accurate.
1801
+ * This avoids recovering files with zeroed tail regions when
1802
+ * writeback races with appending writes:
1803
+ *
1804
+ * Thread 1: Thread 2:
1805
+ * ------------ -----------
1806
+ * write [A, A+B]
1807
+ * update inode size to A+B
1808
+ * submit I/O [A, A+BS]
1809
+ * write [A+B, A+B+C]
1810
+ * update inode size to A+B+C
1811
+ * <I/O completes, updates disk size to min(A+B+C, A+BS)>
1812
+ * <power failure>
1813
+ *
1814
+ * After reboot:
1815
+ * 1) with A+B+C < A+BS, the file has zero padding in range
1816
+ * [A+B, A+B+C]
1817
+ *
1818
+ * |< Block Size (BS) >|
1819
+ * |DDDDDDDDDDDD0000000000000|
1820
+ * ^ ^ ^
1821
+ * A A+B A+B+C
1822
+ * (EOF)
1823
+ *
1824
+ * 2) with A+B+C > A+BS, the file has zero padding in range
1825
+ * [A+B, A+BS]
1826
+ *
1827
+ * |< Block Size (BS) >|< Block Size (BS) >|
1828
+ * |DDDDDDDDDDDD0000000000000|00000000000000000000000000|
1829
+ * ^ ^ ^ ^
1830
+ * A A+B A+BS A+B+C
1831
+ * (EOF)
1832
+ *
1833
+ * D = Valid Data
1834
+ * 0 = Zero Padding
1835
+ *
1836
+ * Note that this defeats the ability to chain the ioends of
1837
+ * appending writes.
1838
+ */
1796
1839
wpc -> ioend -> io_size += len ;
1840
+ if (wpc -> ioend -> io_offset + wpc -> ioend -> io_size > end_pos )
1841
+ wpc -> ioend -> io_size = end_pos - wpc -> ioend -> io_offset ;
1842
+
1797
1843
wbc_account_cgroup_owner (wbc , folio , len );
1798
1844
return 0 ;
1799
1845
}
1800
1846
1801
1847
static int iomap_writepage_map_blocks (struct iomap_writepage_ctx * wpc ,
1802
1848
struct writeback_control * wbc , struct folio * folio ,
1803
- struct inode * inode , u64 pos , unsigned dirty_len ,
1804
- unsigned * count )
1849
+ struct inode * inode , u64 pos , u64 end_pos ,
1850
+ unsigned dirty_len , unsigned * count )
1805
1851
{
1806
1852
int error ;
1807
1853
@@ -1826,7 +1872,7 @@ static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc,
1826
1872
break ;
1827
1873
default :
1828
1874
error = iomap_add_to_ioend (wpc , wbc , folio , inode , pos ,
1829
- map_len );
1875
+ end_pos , map_len );
1830
1876
if (!error )
1831
1877
(* count )++ ;
1832
1878
break ;
@@ -1897,11 +1943,11 @@ static bool iomap_writepage_handle_eof(struct folio *folio, struct inode *inode,
1897
1943
* remaining memory is zeroed when mapped, and writes to that
1898
1944
* region are not written out to the file.
1899
1945
*
1900
- * Also adjust the writeback range to skip all blocks entirely
1901
- * beyond i_size.
1946
+ * Also adjust the end_pos to the end of file and skip writeback
1947
+ * for all blocks entirely beyond i_size.
1902
1948
*/
1903
1949
folio_zero_segment (folio , poff , folio_size (folio ));
1904
- * end_pos = round_up ( isize , i_blocksize ( inode )) ;
1950
+ * end_pos = isize ;
1905
1951
}
1906
1952
1907
1953
return true;
@@ -1914,6 +1960,7 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
1914
1960
struct inode * inode = folio -> mapping -> host ;
1915
1961
u64 pos = folio_pos (folio );
1916
1962
u64 end_pos = pos + folio_size (folio );
1963
+ u64 end_aligned = 0 ;
1917
1964
unsigned count = 0 ;
1918
1965
int error = 0 ;
1919
1966
u32 rlen ;
@@ -1955,9 +2002,10 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
1955
2002
/*
1956
2003
* Walk through the folio to find dirty areas to write back.
1957
2004
*/
1958
- while ((rlen = iomap_find_dirty_range (folio , & pos , end_pos ))) {
2005
+ end_aligned = round_up (end_pos , i_blocksize (inode ));
2006
+ while ((rlen = iomap_find_dirty_range (folio , & pos , end_aligned ))) {
1959
2007
error = iomap_writepage_map_blocks (wpc , wbc , folio , inode ,
1960
- pos , rlen , & count );
2008
+ pos , end_pos , rlen , & count );
1961
2009
if (error )
1962
2010
break ;
1963
2011
pos += rlen ;
0 commit comments