|
4 | 4 | define amdgpu_kernel void @global_atomic_fadd_ret_f32_wrong_subtarget(ptr addrspace(1) %ptr) #1 {
|
5 | 5 | ; GCN-LABEL: global_atomic_fadd_ret_f32_wrong_subtarget:
|
6 | 6 | ; GCN: ; %bb.0:
|
7 |
| -; GCN-NEXT: s_mov_b64 s[2:3], exec |
8 |
| -; GCN-NEXT: v_bfrev_b32_e32 v1, 1 |
9 |
| -; GCN-NEXT: v_mov_b32_e32 v2, 4.0 |
10 |
| -; GCN-NEXT: ; implicit-def: $vgpr0 |
11 |
| -; GCN-NEXT: .LBB0_1: ; %ComputeLoop |
12 |
| -; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 |
13 |
| -; GCN-NEXT: s_ff1_i32_b64 s6, s[2:3] |
14 |
| -; GCN-NEXT: s_lshl_b64 s[4:5], 1, s6 |
15 |
| -; GCN-NEXT: v_readfirstlane_b32 s7, v1 |
16 |
| -; GCN-NEXT: v_readlane_b32 s8, v2, s6 |
17 |
| -; GCN-NEXT: s_mov_b32 m0, s6 |
18 |
| -; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5] |
19 |
| -; GCN-NEXT: v_writelane_b32 v0, s7, m0 |
20 |
| -; GCN-NEXT: s_cmp_lg_u64 s[2:3], 0 |
21 |
| -; GCN-NEXT: v_add_f32_e32 v1, s8, v1 |
22 |
| -; GCN-NEXT: s_cbranch_scc1 .LBB0_1 |
23 |
| -; GCN-NEXT: ; %bb.2: ; %ComputeEnd |
24 |
| -; GCN-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 |
25 |
| -; GCN-NEXT: v_mbcnt_hi_u32_b32 v2, exec_hi, v2 |
26 |
| -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 |
27 |
| -; GCN-NEXT: ; implicit-def: $vgpr2 |
| 7 | +; GCN-NEXT: s_mov_b64 s[6:7], exec |
| 8 | +; GCN-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0 |
| 9 | +; GCN-NEXT: v_mbcnt_hi_u32_b32 v0, s7, v0 |
| 10 | +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| 11 | +; GCN-NEXT: ; implicit-def: $vgpr1 |
28 | 12 | ; GCN-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
29 |
| -; GCN-NEXT: s_xor_b64 s[2:3], exec, s[2:3] |
30 |
| -; GCN-NEXT: s_cbranch_execz .LBB0_6 |
31 |
| -; GCN-NEXT: ; %bb.3: |
32 |
| -; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 |
33 |
| -; GCN-NEXT: s_mov_b64 s[4:5], 0 |
34 |
| -; GCN-NEXT: v_mov_b32_e32 v3, 0 |
| 13 | +; GCN-NEXT: s_cbranch_execz .LBB0_4 |
| 14 | +; GCN-NEXT: ; %bb.1: |
| 15 | +; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 |
| 16 | +; GCN-NEXT: s_bcnt1_i32_b64 s1, s[6:7] |
| 17 | +; GCN-NEXT: v_cvt_f32_ubyte0_e32 v1, s1 |
| 18 | +; GCN-NEXT: s_mov_b64 s[6:7], 0 |
| 19 | +; GCN-NEXT: v_mul_f32_e32 v2, 4.0, v1 |
35 | 20 | ; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
36 |
| -; GCN-NEXT: s_load_dword s6, s[0:1], 0x0 |
| 21 | +; GCN-NEXT: s_load_dword s0, s[4:5], 0x0 |
| 22 | +; GCN-NEXT: v_mov_b32_e32 v3, 0 |
37 | 23 | ; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
38 |
| -; GCN-NEXT: v_mov_b32_e32 v2, s6 |
39 |
| -; GCN-NEXT: .LBB0_4: ; %atomicrmw.start |
| 24 | +; GCN-NEXT: v_mov_b32_e32 v1, s0 |
| 25 | +; GCN-NEXT: .LBB0_2: ; %atomicrmw.start |
40 | 26 | ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
41 |
| -; GCN-NEXT: v_mov_b32_e32 v5, v2 |
42 |
| -; GCN-NEXT: v_add_f32_e32 v4, v5, v1 |
43 |
| -; GCN-NEXT: global_atomic_cmpswap v2, v3, v[4:5], s[0:1] glc |
| 27 | +; GCN-NEXT: v_mov_b32_e32 v5, v1 |
| 28 | +; GCN-NEXT: v_add_f32_e32 v4, v5, v2 |
| 29 | +; GCN-NEXT: global_atomic_cmpswap v1, v3, v[4:5], s[4:5] glc |
44 | 30 | ; GCN-NEXT: s_waitcnt vmcnt(0)
|
45 | 31 | ; GCN-NEXT: buffer_wbinvl1
|
46 |
| -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v2, v5 |
47 |
| -; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5] |
48 |
| -; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5] |
49 |
| -; GCN-NEXT: s_cbranch_execnz .LBB0_4 |
50 |
| -; GCN-NEXT: ; %bb.5: ; %Flow |
51 |
| -; GCN-NEXT: s_or_b64 exec, exec, s[4:5] |
52 |
| -; GCN-NEXT: .LBB0_6: ; %Flow4 |
| 32 | +; GCN-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v5 |
| 33 | +; GCN-NEXT: s_or_b64 s[6:7], s[0:1], s[6:7] |
| 34 | +; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7] |
| 35 | +; GCN-NEXT: s_cbranch_execnz .LBB0_2 |
| 36 | +; GCN-NEXT: ; %bb.3: ; %Flow |
| 37 | +; GCN-NEXT: s_or_b64 exec, exec, s[6:7] |
| 38 | +; GCN-NEXT: .LBB0_4: ; %Flow2 |
53 | 39 | ; GCN-NEXT: s_or_b64 exec, exec, s[2:3]
|
54 |
| -; GCN-NEXT: v_readfirstlane_b32 s0, v2 |
| 40 | +; GCN-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 |
| 41 | +; GCN-NEXT: v_readfirstlane_b32 s0, v1 |
| 42 | +; GCN-NEXT: v_mul_f32_e32 v0, 4.0, v0 |
| 43 | +; GCN-NEXT: v_bfrev_b32_e32 v1, 1 |
| 44 | +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc |
55 | 45 | ; GCN-NEXT: v_add_f32_e32 v0, s0, v0
|
56 | 46 | ; GCN-NEXT: global_store_dword v[0:1], v0, off
|
57 | 47 | ; GCN-NEXT: s_endpgm
|
|
0 commit comments