@@ -69,33 +69,29 @@ define amdgpu_kernel void @preload_block_count_x(ptr addrspace(1) inreg noundef
6969; GFX942-NEXT: s_mul_i32 s11, s5, s11
7070; GFX942-NEXT: s_add_u32 s11, s14, s11
7171; GFX942-NEXT: s_addc_u32 s14, 0, s15
72- ; GFX942-NEXT: s_add_u32 s15, s10, s11
73- ; GFX942-NEXT: s_cselect_b64 s[10:11], -1, 0
74- ; GFX942-NEXT: s_cmp_lg_u64 s[10:11], 0
72+ ; GFX942-NEXT: s_add_u32 s10, s10, s11
7573; GFX942-NEXT: s_addc_u32 s5, s5, s14
76- ; GFX942-NEXT: s_mul_i32 s10 , s1, s5
77- ; GFX942-NEXT: s_mul_hi_u32 s11 , s1, s15
78- ; GFX942-NEXT: s_add_i32 s10, s11, s10
79- ; GFX942-NEXT: s_mul_i32 s3, s3, s15
80- ; GFX942-NEXT: s_add_i32 s10, s10 , s3
81- ; GFX942-NEXT: s_mul_i32 s1, s1, s15
82- ; GFX942-NEXT: s_mul_hi_u32 s11 , s5, s1
83- ; GFX942-NEXT: s_mul_i32 s14 , s5, s1
84- ; GFX942-NEXT: s_mul_i32 s17, s15, s10
85- ; GFX942-NEXT: s_mul_hi_u32 s1, s15 , s1
86- ; GFX942-NEXT: s_mul_hi_u32 s16, s15, s10
74+ ; GFX942-NEXT: s_mul_i32 s11 , s1, s5
75+ ; GFX942-NEXT: s_mul_hi_u32 s14 , s1, s10
76+ ; GFX942-NEXT: s_add_i32 s11, s14, s11
77+ ; GFX942-NEXT: s_mul_i32 s3, s3, s10
78+ ; GFX942-NEXT: s_add_i32 s11, s11 , s3
79+ ; GFX942-NEXT: s_mul_i32 s1, s1, s10
80+ ; GFX942-NEXT: s_mul_hi_u32 s14 , s5, s1
81+ ; GFX942-NEXT: s_mul_i32 s15 , s5, s1
82+ ; GFX942-NEXT: s_mul_i32 s17, s10, s11
83+ ; GFX942-NEXT: s_mul_hi_u32 s1, s10 , s1
84+ ; GFX942-NEXT: s_mul_hi_u32 s16, s10, s11
8785; GFX942-NEXT: s_add_u32 s1, s1, s17
8886; GFX942-NEXT: s_addc_u32 s16, 0, s16
89- ; GFX942-NEXT: s_add_u32 s1, s1, s14
90- ; GFX942-NEXT: s_mul_hi_u32 s3, s5, s10
91- ; GFX942-NEXT: s_addc_u32 s1, s16, s11
87+ ; GFX942-NEXT: s_add_u32 s1, s1, s15
88+ ; GFX942-NEXT: s_mul_hi_u32 s3, s5, s11
89+ ; GFX942-NEXT: s_addc_u32 s1, s16, s14
9290; GFX942-NEXT: s_addc_u32 s3, s3, 0
93- ; GFX942-NEXT: s_mul_i32 s10 , s5, s10
94- ; GFX942-NEXT: s_add_u32 s1, s1, s10
91+ ; GFX942-NEXT: s_mul_i32 s11 , s5, s11
92+ ; GFX942-NEXT: s_add_u32 s1, s1, s11
9593; GFX942-NEXT: s_addc_u32 s3, 0, s3
96- ; GFX942-NEXT: s_add_u32 s1, s15, s1
97- ; GFX942-NEXT: s_cselect_b64 s[10:11], -1, 0
98- ; GFX942-NEXT: s_cmp_lg_u64 s[10:11], 0
94+ ; GFX942-NEXT: s_add_u32 s1, s10, s1
9995; GFX942-NEXT: s_addc_u32 s3, s5, s3
10096; GFX942-NEXT: s_mul_i32 s10, s6, s3
10197; GFX942-NEXT: s_mul_hi_u32 s11, s6, s1
@@ -118,37 +114,34 @@ define amdgpu_kernel void @preload_block_count_x(ptr addrspace(1) inreg noundef
118114; GFX942-NEXT: s_add_i32 s5, s5, s10
119115; GFX942-NEXT: s_sub_i32 s14, s7, s5
120116; GFX942-NEXT: s_mul_i32 s10, s12, s1
121- ; GFX942-NEXT: s_sub_u32 s16 , s6, s10
117+ ; GFX942-NEXT: s_sub_u32 s15 , s6, s10
122118; GFX942-NEXT: s_cselect_b64 s[10:11], -1, 0
123- ; GFX942-NEXT: s_cmp_lg_u64 s[10:11], 0
124- ; GFX942-NEXT: s_subb_u32 s17, s14, s13
125- ; GFX942-NEXT: s_sub_u32 s18, s16, s12
126- ; GFX942-NEXT: s_cselect_b64 s[14:15], -1, 0
127- ; GFX942-NEXT: s_cmp_lg_u64 s[14:15], 0
128- ; GFX942-NEXT: s_subb_u32 s14, s17, 0
119+ ; GFX942-NEXT: s_subb_u32 s14, s14, s13
120+ ; GFX942-NEXT: s_sub_u32 s16, s15, s12
121+ ; GFX942-NEXT: s_subb_u32 s14, s14, 0
129122; GFX942-NEXT: s_cmp_ge_u32 s14, s13
130- ; GFX942-NEXT: s_cselect_b32 s15, -1, 0
131- ; GFX942-NEXT: s_cmp_ge_u32 s18, s12
132123; GFX942-NEXT: s_cselect_b32 s17, -1, 0
124+ ; GFX942-NEXT: s_cmp_ge_u32 s16, s12
125+ ; GFX942-NEXT: s_cselect_b32 s16, -1, 0
133126; GFX942-NEXT: s_cmp_eq_u32 s14, s13
134- ; GFX942-NEXT: s_cselect_b32 s14, s17, s15
135- ; GFX942-NEXT: s_add_u32 s15 , s1, 1
127+ ; GFX942-NEXT: s_cselect_b32 s14, s16, s17
128+ ; GFX942-NEXT: s_add_u32 s16 , s1, 1
136129; GFX942-NEXT: s_addc_u32 s17, s3, 0
137130; GFX942-NEXT: s_add_u32 s18, s1, 2
138131; GFX942-NEXT: s_addc_u32 s19, s3, 0
139132; GFX942-NEXT: s_cmp_lg_u32 s14, 0
140- ; GFX942-NEXT: s_cselect_b32 s14, s18, s15
141- ; GFX942-NEXT: s_cselect_b32 s15 , s19, s17
133+ ; GFX942-NEXT: s_cselect_b32 s14, s18, s16
134+ ; GFX942-NEXT: s_cselect_b32 s16 , s19, s17
142135; GFX942-NEXT: s_cmp_lg_u64 s[10:11], 0
143136; GFX942-NEXT: s_subb_u32 s5, s7, s5
144137; GFX942-NEXT: s_cmp_ge_u32 s5, s13
145138; GFX942-NEXT: s_cselect_b32 s10, -1, 0
146- ; GFX942-NEXT: s_cmp_ge_u32 s16 , s12
139+ ; GFX942-NEXT: s_cmp_ge_u32 s15 , s12
147140; GFX942-NEXT: s_cselect_b32 s11, -1, 0
148141; GFX942-NEXT: s_cmp_eq_u32 s5, s13
149142; GFX942-NEXT: s_cselect_b32 s5, s11, s10
150143; GFX942-NEXT: s_cmp_lg_u32 s5, 0
151- ; GFX942-NEXT: s_cselect_b32 s11, s15 , s3
144+ ; GFX942-NEXT: s_cselect_b32 s11, s16 , s3
152145; GFX942-NEXT: s_cselect_b32 s10, s14, s1
153146; GFX942-NEXT: s_cbranch_execnz .LBB0_3
154147; GFX942-NEXT: .LBB0_2:
0 commit comments