@@ -1958,6 +1958,104 @@ xlog_recover_do_inode_buffer(
19581958 return 0 ;
19591959}
19601960
1961+ /*
1962+ * V5 filesystems know the age of the buffer on disk being recovered. We can
1963+ * have newer objects on disk than we are replaying, and so for these cases we
1964+ * don't want to replay the current change as that will make the buffer contents
1965+ * temporarily invalid on disk.
1966+ *
1967+ * The magic number might not match the buffer type we are going to recover
1968+ * (e.g. reallocated blocks), so we ignore the xfs_buf_log_format flags. Hence
1969+ * extract the LSN of the existing object in the buffer based on it's current
1970+ * magic number. If we don't recognise the magic number in the buffer, then
1971+ * return a LSN of -1 so that the caller knows it was an unrecognised block and
1972+ * so can recover the buffer.
1973+ */
1974+ static xfs_lsn_t
1975+ xlog_recover_get_buf_lsn (
1976+ struct xfs_mount * mp ,
1977+ struct xfs_buf * bp )
1978+ {
1979+ __uint32_t magic32 ;
1980+ __uint16_t magic16 ;
1981+ __uint16_t magicda ;
1982+ void * blk = bp -> b_addr ;
1983+
1984+ /* v4 filesystems always recover immediately */
1985+ if (!xfs_sb_version_hascrc (& mp -> m_sb ))
1986+ goto recover_immediately ;
1987+
1988+ magic32 = be32_to_cpu (* (__be32 * )blk );
1989+ switch (magic32 ) {
1990+ case XFS_ABTB_CRC_MAGIC :
1991+ case XFS_ABTC_CRC_MAGIC :
1992+ case XFS_ABTB_MAGIC :
1993+ case XFS_ABTC_MAGIC :
1994+ case XFS_IBT_CRC_MAGIC :
1995+ case XFS_IBT_MAGIC :
1996+ return be64_to_cpu (
1997+ ((struct xfs_btree_block * )blk )-> bb_u .s .bb_lsn );
1998+ case XFS_BMAP_CRC_MAGIC :
1999+ case XFS_BMAP_MAGIC :
2000+ return be64_to_cpu (
2001+ ((struct xfs_btree_block * )blk )-> bb_u .l .bb_lsn );
2002+ case XFS_AGF_MAGIC :
2003+ return be64_to_cpu (((struct xfs_agf * )blk )-> agf_lsn );
2004+ case XFS_AGFL_MAGIC :
2005+ return be64_to_cpu (((struct xfs_agfl * )blk )-> agfl_lsn );
2006+ case XFS_AGI_MAGIC :
2007+ return be64_to_cpu (((struct xfs_agi * )blk )-> agi_lsn );
2008+ case XFS_SYMLINK_MAGIC :
2009+ return be64_to_cpu (((struct xfs_dsymlink_hdr * )blk )-> sl_lsn );
2010+ case XFS_DIR3_BLOCK_MAGIC :
2011+ case XFS_DIR3_DATA_MAGIC :
2012+ case XFS_DIR3_FREE_MAGIC :
2013+ return be64_to_cpu (((struct xfs_dir3_blk_hdr * )blk )-> lsn );
2014+ case XFS_ATTR3_RMT_MAGIC :
2015+ return be64_to_cpu (((struct xfs_attr3_rmt_hdr * )blk )-> rm_lsn );
2016+ case XFS_SB_MAGIC :
2017+ return be64_to_cpu (((struct xfs_sb * )blk )-> sb_lsn );
2018+ default :
2019+ break ;
2020+ }
2021+
2022+ magicda = be16_to_cpu (((struct xfs_da_blkinfo * )blk )-> magic );
2023+ switch (magicda ) {
2024+ case XFS_DIR3_LEAF1_MAGIC :
2025+ case XFS_DIR3_LEAFN_MAGIC :
2026+ case XFS_DA3_NODE_MAGIC :
2027+ return be64_to_cpu (((struct xfs_da3_blkinfo * )blk )-> lsn );
2028+ default :
2029+ break ;
2030+ }
2031+
2032+ /*
2033+ * We do individual object checks on dquot and inode buffers as they
2034+ * have their own individual LSN records. Also, we could have a stale
2035+ * buffer here, so we have to at least recognise these buffer types.
2036+ *
2037+ * A notd complexity here is inode unlinked list processing - it logs
2038+ * the inode directly in the buffer, but we don't know which inodes have
2039+ * been modified, and there is no global buffer LSN. Hence we need to
2040+ * recover all inode buffer types immediately. This problem will be
2041+ * fixed by logical logging of the unlinked list modifications.
2042+ */
2043+ magic16 = be16_to_cpu (* (__be16 * )blk );
2044+ switch (magic16 ) {
2045+ case XFS_DQUOT_MAGIC :
2046+ case XFS_DINODE_MAGIC :
2047+ goto recover_immediately ;
2048+ default :
2049+ break ;
2050+ }
2051+
2052+ /* unknown buffer contents, recover immediately */
2053+
2054+ recover_immediately :
2055+ return (xfs_lsn_t )- 1 ;
2056+
2057+ }
2058+
19612059/*
19622060 * Validate the recovered buffer is of the correct type and attach the
19632061 * appropriate buffer operations to them for writeback. Magic numbers are in a
@@ -1967,7 +2065,7 @@ xlog_recover_do_inode_buffer(
19672065 * inside a struct xfs_da_blkinfo at the start of the buffer.
19682066 */
19692067static void
1970- xlog_recovery_validate_buf_type (
2068+ xlog_recover_validate_buf_type (
19712069 struct xfs_mount * mp ,
19722070 struct xfs_buf * bp ,
19732071 xfs_buf_log_format_t * buf_f )
@@ -2246,7 +2344,7 @@ xlog_recover_do_reg_buffer(
22462344 * just avoid the verification stage for non-crc filesystems
22472345 */
22482346 if (xfs_sb_version_hascrc (& mp -> m_sb ))
2249- xlog_recovery_validate_buf_type (mp , bp , buf_f );
2347+ xlog_recover_validate_buf_type (mp , bp , buf_f );
22502348}
22512349
22522350/*
@@ -2444,13 +2542,15 @@ STATIC int
24442542xlog_recover_buffer_pass2 (
24452543 struct xlog * log ,
24462544 struct list_head * buffer_list ,
2447- struct xlog_recover_item * item )
2545+ struct xlog_recover_item * item ,
2546+ xfs_lsn_t current_lsn )
24482547{
24492548 xfs_buf_log_format_t * buf_f = item -> ri_buf [0 ].i_addr ;
24502549 xfs_mount_t * mp = log -> l_mp ;
24512550 xfs_buf_t * bp ;
24522551 int error ;
24532552 uint buf_flags ;
2553+ xfs_lsn_t lsn ;
24542554
24552555 /*
24562556 * In this pass we only want to recover all the buffers which have
@@ -2475,10 +2575,17 @@ xlog_recover_buffer_pass2(
24752575 error = bp -> b_error ;
24762576 if (error ) {
24772577 xfs_buf_ioerror_alert (bp , "xlog_recover_do..(read#1)" );
2478- xfs_buf_relse (bp );
2479- return error ;
2578+ goto out_release ;
24802579 }
24812580
2581+ /*
2582+ * recover the buffer only if we get an LSN from it and it's less than
2583+ * the lsn of the transaction we are replaying.
2584+ */
2585+ lsn = xlog_recover_get_buf_lsn (mp , bp );
2586+ if (lsn && lsn != -1 && XFS_LSN_CMP (lsn , current_lsn ) >= 0 )
2587+ goto out_release ;
2588+
24822589 if (buf_f -> blf_flags & XFS_BLF_INODE_BUF ) {
24832590 error = xlog_recover_do_inode_buffer (mp , item , bp , buf_f );
24842591 } else if (buf_f -> blf_flags &
@@ -2488,7 +2595,7 @@ xlog_recover_buffer_pass2(
24882595 xlog_recover_do_reg_buffer (mp , item , bp , buf_f );
24892596 }
24902597 if (error )
2491- return XFS_ERROR ( error ) ;
2598+ goto out_release ;
24922599
24932600 /*
24942601 * Perform delayed write on the buffer. Asynchronous writes will be
@@ -2517,6 +2624,7 @@ xlog_recover_buffer_pass2(
25172624 xfs_buf_delwri_queue (bp , buffer_list );
25182625 }
25192626
2627+ out_release :
25202628 xfs_buf_relse (bp );
25212629 return error ;
25222630}
@@ -2525,7 +2633,8 @@ STATIC int
25252633xlog_recover_inode_pass2 (
25262634 struct xlog * log ,
25272635 struct list_head * buffer_list ,
2528- struct xlog_recover_item * item )
2636+ struct xlog_recover_item * item ,
2637+ xfs_lsn_t current_lsn )
25292638{
25302639 xfs_inode_log_format_t * in_f ;
25312640 xfs_mount_t * mp = log -> l_mp ;
@@ -2604,6 +2713,20 @@ xlog_recover_inode_pass2(
26042713 goto error ;
26052714 }
26062715
2716+ /*
2717+ * If the inode has an LSN in it, recover the inode only if it's less
2718+ * than the lsn of the transaction we are replaying.
2719+ */
2720+ if (dip -> di_version >= 3 ) {
2721+ xfs_lsn_t lsn = be64_to_cpu (dip -> di_lsn );
2722+
2723+ if (lsn && lsn != -1 && XFS_LSN_CMP (lsn , current_lsn ) >= 0 ) {
2724+ trace_xfs_log_recover_inode_skip (log , in_f );
2725+ error = 0 ;
2726+ goto out_release ;
2727+ }
2728+ }
2729+
26072730 /*
26082731 * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
26092732 * are transactional and if ordering is necessary we can determine that
@@ -2793,6 +2916,8 @@ xlog_recover_inode_pass2(
27932916 ASSERT (bp -> b_target -> bt_mount == mp );
27942917 bp -> b_iodone = xlog_recover_iodone ;
27952918 xfs_buf_delwri_queue (bp , buffer_list );
2919+
2920+ out_release :
27962921 xfs_buf_relse (bp );
27972922error :
27982923 if (need_free )
@@ -2834,7 +2959,8 @@ STATIC int
28342959xlog_recover_dquot_pass2 (
28352960 struct xlog * log ,
28362961 struct list_head * buffer_list ,
2837- struct xlog_recover_item * item )
2962+ struct xlog_recover_item * item ,
2963+ xfs_lsn_t current_lsn )
28382964{
28392965 xfs_mount_t * mp = log -> l_mp ;
28402966 xfs_buf_t * bp ;
@@ -2908,6 +3034,19 @@ xlog_recover_dquot_pass2(
29083034 return XFS_ERROR (EIO );
29093035 }
29103036
3037+ /*
3038+ * If the dquot has an LSN in it, recover the dquot only if it's less
3039+ * than the lsn of the transaction we are replaying.
3040+ */
3041+ if (xfs_sb_version_hascrc (& mp -> m_sb )) {
3042+ struct xfs_dqblk * dqb = (struct xfs_dqblk * )ddq ;
3043+ xfs_lsn_t lsn = be64_to_cpu (dqb -> dd_lsn );
3044+
3045+ if (lsn && lsn != -1 && XFS_LSN_CMP (lsn , current_lsn ) >= 0 ) {
3046+ goto out_release ;
3047+ }
3048+ }
3049+
29113050 memcpy (ddq , recddq , item -> ri_buf [1 ].i_len );
29123051 if (xfs_sb_version_hascrc (& mp -> m_sb )) {
29133052 xfs_update_cksum ((char * )ddq , sizeof (struct xfs_dqblk ),
@@ -2918,9 +3057,10 @@ xlog_recover_dquot_pass2(
29183057 ASSERT (bp -> b_target -> bt_mount == mp );
29193058 bp -> b_iodone = xlog_recover_iodone ;
29203059 xfs_buf_delwri_queue (bp , buffer_list );
2921- xfs_buf_relse (bp );
29223060
2923- return (0 );
3061+ out_release :
3062+ xfs_buf_relse (bp );
3063+ return 0 ;
29243064}
29253065
29263066/*
@@ -3267,15 +3407,18 @@ xlog_recover_commit_pass2(
32673407
32683408 switch (ITEM_TYPE (item )) {
32693409 case XFS_LI_BUF :
3270- return xlog_recover_buffer_pass2 (log , buffer_list , item );
3410+ return xlog_recover_buffer_pass2 (log , buffer_list , item ,
3411+ trans -> r_lsn );
32713412 case XFS_LI_INODE :
3272- return xlog_recover_inode_pass2 (log , buffer_list , item );
3413+ return xlog_recover_inode_pass2 (log , buffer_list , item ,
3414+ trans -> r_lsn );
32733415 case XFS_LI_EFI :
32743416 return xlog_recover_efi_pass2 (log , item , trans -> r_lsn );
32753417 case XFS_LI_EFD :
32763418 return xlog_recover_efd_pass2 (log , item );
32773419 case XFS_LI_DQUOT :
3278- return xlog_recover_dquot_pass2 (log , buffer_list , item );
3420+ return xlog_recover_dquot_pass2 (log , buffer_list , item ,
3421+ trans -> r_lsn );
32793422 case XFS_LI_ICREATE :
32803423 return xlog_recover_do_icreate_pass2 (log , buffer_list , item );
32813424 case XFS_LI_QUOTAOFF :
0 commit comments