@@ -8808,90 +8808,90 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o
88088808; GFX12-NEXT: v_lshrrev_b16 v2, 8, s6
88098809; GFX12-NEXT: v_lshrrev_b16 v4, 8, s5
88108810; GFX12-NEXT: v_lshrrev_b16 v8, 8, s2
8811- ; GFX12-NEXT: s_lshr_b32 s24 , s7, 16
8811+ ; GFX12-NEXT: s_lshr_b32 s22 , s7, 16
88128812; GFX12-NEXT: v_bfe_i32 v31, v1, 0, 8
8813- ; GFX12-NEXT: s_lshr_b32 s42 , s2, 24
8814- ; GFX12-NEXT: s_mov_b32 s48 , s7
8813+ ; GFX12-NEXT: s_lshr_b32 s40 , s2, 24
8814+ ; GFX12-NEXT: s_mov_b32 s46 , s7
88158815; GFX12-NEXT: v_lshrrev_b16 v5, 8, s4
88168816; GFX12-NEXT: v_lshrrev_b16 v7, 8, s1
8817- ; GFX12-NEXT: s_lshr_b32 s26 , s6, 16
8818- ; GFX12-NEXT: s_lshr_b32 s44 , s1, 16
8817+ ; GFX12-NEXT: s_lshr_b32 s24 , s6, 16
8818+ ; GFX12-NEXT: s_lshr_b32 s42 , s1, 16
88198819; GFX12-NEXT: s_ashr_i64 s[58:59], s[6:7], 56
8820- ; GFX12-NEXT: s_bfe_i64 s[48:49 ], s[48:49 ], 0x80000
8821- ; GFX12-NEXT: s_bfe_i64 s[42:43 ], s[42:43 ], 0x80000
8822- ; GFX12-NEXT: s_bfe_i64 s[24:25 ], s[24:25 ], 0x80000
8820+ ; GFX12-NEXT: s_bfe_i64 s[46:47 ], s[46:47 ], 0x80000
8821+ ; GFX12-NEXT: s_bfe_i64 s[40:41 ], s[40:41 ], 0x80000
8822+ ; GFX12-NEXT: s_bfe_i64 s[22:23 ], s[22:23 ], 0x80000
88238823; GFX12-NEXT: v_lshrrev_b16 v6, 8, s3
88248824; GFX12-NEXT: v_lshrrev_b16 v3, 8, s0
8825- ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v33, s24
8826- ; GFX12-NEXT: s_lshr_b32 s28 , s6, 24
8827- ; GFX12-NEXT: s_lshr_b32 s30 , s5, 16
8828- ; GFX12-NEXT: s_lshr_b32 s40 , s2, 16
8825+ ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v33, s22
8826+ ; GFX12-NEXT: s_lshr_b32 s26 , s6, 24
8827+ ; GFX12-NEXT: s_lshr_b32 s28 , s5, 16
8828+ ; GFX12-NEXT: s_lshr_b32 s38 , s2, 16
88298829; GFX12-NEXT: v_bfe_i32 v11, v8, 0, 8
88308830; GFX12-NEXT: v_bfe_i32 v23, v4, 0, 8
88318831; GFX12-NEXT: v_bfe_i32 v27, v2, 0, 8
88328832; GFX12-NEXT: v_ashrrev_i32_e32 v32, 31, v31
8833- ; GFX12-NEXT: s_bfe_i64 s[44:45 ], s[44:45 ], 0x80000
8834- ; GFX12-NEXT: s_bfe_i64 s[26:27 ], s[26:27 ], 0x80000
8835- ; GFX12-NEXT: v_dual_mov_b32 v34, s25 :: v_dual_mov_b32 v35, s58
8836- ; GFX12-NEXT: v_dual_mov_b32 v36, s59 :: v_dual_mov_b32 v37, s26
8837- ; GFX12-NEXT: v_dual_mov_b32 v56, s43 :: v_dual_mov_b32 v29, s48
8838- ; GFX12-NEXT: v_mov_b32_e32 v30, s49
8839- ; GFX12-NEXT: s_lshr_b32 s46 , s0, 24
8840- ; GFX12-NEXT: s_mov_b32 s50 , s5
8841- ; GFX12-NEXT: s_mov_b32 s52 , s3
8842- ; GFX12-NEXT: s_lshr_b32 s34 , s4, 16
8843- ; GFX12-NEXT: s_lshr_b32 s36 , s4, 24
8844- ; GFX12-NEXT: s_ashr_i64 s[22:23 ], s[2:3], 56
8833+ ; GFX12-NEXT: s_bfe_i64 s[42:43 ], s[42:43 ], 0x80000
8834+ ; GFX12-NEXT: s_bfe_i64 s[24:25 ], s[24:25 ], 0x80000
8835+ ; GFX12-NEXT: v_dual_mov_b32 v34, s23 :: v_dual_mov_b32 v35, s58
8836+ ; GFX12-NEXT: v_dual_mov_b32 v36, s59 :: v_dual_mov_b32 v37, s24
8837+ ; GFX12-NEXT: v_dual_mov_b32 v56, s41 :: v_dual_mov_b32 v29, s46
8838+ ; GFX12-NEXT: v_mov_b32_e32 v30, s47
8839+ ; GFX12-NEXT: s_lshr_b32 s44 , s0, 24
8840+ ; GFX12-NEXT: s_mov_b32 s48 , s5
8841+ ; GFX12-NEXT: s_mov_b32 s50 , s3
8842+ ; GFX12-NEXT: s_lshr_b32 s30 , s4, 16
8843+ ; GFX12-NEXT: s_lshr_b32 s34 , s4, 24
8844+ ; GFX12-NEXT: s_ashr_i64 s[54:55 ], s[2:3], 56
88458845; GFX12-NEXT: s_ashr_i64 s[56:57], s[4:5], 56
88468846; GFX12-NEXT: v_bfe_i32 v7, v7, 0, 8
88478847; GFX12-NEXT: v_bfe_i32 v19, v5, 0, 8
8848- ; GFX12-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x80000
8849- ; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x80000
8848+ ; GFX12-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x80000
88508849; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x80000
8851- ; GFX12-NEXT: s_lshr_b32 s38, s3, 16
8852- ; GFX12-NEXT: s_mov_b32 s54, s1
8850+ ; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000
8851+ ; GFX12-NEXT: s_lshr_b32 s36, s3, 16
8852+ ; GFX12-NEXT: s_mov_b32 s52, s1
88538853; GFX12-NEXT: s_bfe_i64 s[12:13], s[2:3], 0x80000
88548854; GFX12-NEXT: s_bfe_i64 s[14:15], s[4:5], 0x80000
88558855; GFX12-NEXT: s_bfe_i64 s[16:17], s[6:7], 0x80000
8856- ; GFX12-NEXT: s_bfe_i64 s[2:3], s[52:53 ], 0x80000
8857- ; GFX12-NEXT: s_bfe_i64 s[4:5], s[50:51 ], 0x80000
8858- ; GFX12-NEXT: s_bfe_i64 s[6:7], s[46:47 ], 0x80000
8856+ ; GFX12-NEXT: s_bfe_i64 s[2:3], s[50:51 ], 0x80000
8857+ ; GFX12-NEXT: s_bfe_i64 s[4:5], s[48:49 ], 0x80000
8858+ ; GFX12-NEXT: s_bfe_i64 s[6:7], s[44:45 ], 0x80000
88598859; GFX12-NEXT: s_lshr_b32 s20, s0, 16
88608860; GFX12-NEXT: s_ashr_i64 s[18:19], s[0:1], 56
88618861; GFX12-NEXT: v_bfe_i32 v3, v3, 0, 8
88628862; GFX12-NEXT: v_bfe_i32 v15, v6, 0, 8
8863- ; GFX12-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x80000
88648863; GFX12-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x80000
8865- ; GFX12-NEXT: v_dual_mov_b32 v38, s27 :: v_dual_mov_b32 v39, s28
8866- ; GFX12-NEXT: v_dual_mov_b32 v40, s29 :: v_dual_mov_b32 v41, s30
8867- ; GFX12-NEXT: v_dual_mov_b32 v42, s31 :: v_dual_mov_b32 v43, s56
8868- ; GFX12-NEXT: v_dual_mov_b32 v44, s57 :: v_dual_mov_b32 v45, s34
8869- ; GFX12-NEXT: v_dual_mov_b32 v52, s23 :: v_dual_mov_b32 v53, s40
8870- ; GFX12-NEXT: v_dual_mov_b32 v54, s41 :: v_dual_mov_b32 v55, s42
8864+ ; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x80000
8865+ ; GFX12-NEXT: v_dual_mov_b32 v38, s25 :: v_dual_mov_b32 v39, s26
8866+ ; GFX12-NEXT: v_dual_mov_b32 v40, s27 :: v_dual_mov_b32 v41, s28
8867+ ; GFX12-NEXT: v_dual_mov_b32 v42, s29 :: v_dual_mov_b32 v43, s56
8868+ ; GFX12-NEXT: v_dual_mov_b32 v44, s57 :: v_dual_mov_b32 v45, s30
8869+ ; GFX12-NEXT: v_dual_mov_b32 v52, s55 :: v_dual_mov_b32 v53, s38
8870+ ; GFX12-NEXT: v_dual_mov_b32 v54, s39 :: v_dual_mov_b32 v55, s40
88718871; GFX12-NEXT: s_bfe_i64 s[10:11], s[0:1], 0x80000
8872- ; GFX12-NEXT: s_bfe_i64 s[0:1], s[54:55 ], 0x80000
8872+ ; GFX12-NEXT: s_bfe_i64 s[0:1], s[52:53 ], 0x80000
88738873; GFX12-NEXT: v_ashrrev_i32_e32 v12, 31, v11
88748874; GFX12-NEXT: v_ashrrev_i32_e32 v24, 31, v23
88758875; GFX12-NEXT: v_ashrrev_i32_e32 v28, 31, v27
88768876; GFX12-NEXT: global_store_b128 v0, v[33:36], s[8:9] offset:240
8877- ; GFX12-NEXT: v_mov_b32_e32 v33, s44
8877+ ; GFX12-NEXT: v_mov_b32_e32 v33, s42
88788878; GFX12-NEXT: global_store_b128 v0, v[29:32], s[8:9] offset:224
88798879; GFX12-NEXT: v_dual_mov_b32 v25, s16 :: v_dual_mov_b32 v26, s17
88808880; GFX12-NEXT: v_dual_mov_b32 v32, s7 :: v_dual_mov_b32 v21, s4
88818881; GFX12-NEXT: v_dual_mov_b32 v22, s5 :: v_dual_mov_b32 v17, s14
88828882; GFX12-NEXT: v_dual_mov_b32 v14, s3 :: v_dual_mov_b32 v9, s12
88838883; GFX12-NEXT: v_dual_mov_b32 v10, s13 :: v_dual_mov_b32 v5, s0
88848884; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000
8885- ; GFX12-NEXT: s_bfe_i64 s[38:39 ], s[38:39 ], 0x80000
8886- ; GFX12-NEXT: v_dual_mov_b32 v46, s35 :: v_dual_mov_b32 v47, s36
8887- ; GFX12-NEXT: v_dual_mov_b32 v48, s37 :: v_dual_mov_b32 v49, s38
8888- ; GFX12-NEXT: v_dual_mov_b32 v34, s45 :: v_dual_mov_b32 v35, s18
8885+ ; GFX12-NEXT: s_bfe_i64 s[36:37 ], s[36:37 ], 0x80000
8886+ ; GFX12-NEXT: v_dual_mov_b32 v46, s31 :: v_dual_mov_b32 v47, s34
8887+ ; GFX12-NEXT: v_dual_mov_b32 v48, s35 :: v_dual_mov_b32 v49, s36
8888+ ; GFX12-NEXT: v_dual_mov_b32 v34, s43 :: v_dual_mov_b32 v35, s18
88898889; GFX12-NEXT: v_dual_mov_b32 v36, s19 :: v_dual_mov_b32 v29, s20
88908890; GFX12-NEXT: v_ashrrev_i32_e32 v8, 31, v7
88918891; GFX12-NEXT: v_ashrrev_i32_e32 v20, 31, v19
88928892; GFX12-NEXT: v_dual_mov_b32 v18, s15 :: v_dual_mov_b32 v13, s2
88938893; GFX12-NEXT: v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v1, s10
8894- ; GFX12-NEXT: v_dual_mov_b32 v50, s39 :: v_dual_mov_b32 v51, s22
8894+ ; GFX12-NEXT: v_dual_mov_b32 v50, s37 :: v_dual_mov_b32 v51, s54
88958895; GFX12-NEXT: v_dual_mov_b32 v30, s21 :: v_dual_mov_b32 v31, s6
88968896; GFX12-NEXT: v_ashrrev_i32_e32 v4, 31, v3
88978897; GFX12-NEXT: v_ashrrev_i32_e32 v16, 31, v15
0 commit comments