@@ -264,6 +264,142 @@ ret:
264264 ret void
265265}
266266
267+ define amdgpu_kernel void @trap_with_use_after (ptr addrspace (1 ) %arg0 , ptr addrspace (1 ) %arg1 ) {
268+ ; NOHSA-TRAP-GFX900-LABEL: trap_with_use_after:
269+ ; NOHSA-TRAP-GFX900: ; %bb.0:
270+ ; NOHSA-TRAP-GFX900-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
271+ ; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0
272+ ; NOHSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0)
273+ ; NOHSA-TRAP-GFX900-NEXT: global_load_dword v1, v0, s[0:1] glc
274+ ; NOHSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
275+ ; NOHSA-TRAP-GFX900-NEXT: s_cbranch_execnz .LBB2_2
276+ ; NOHSA-TRAP-GFX900-NEXT: ; %bb.1:
277+ ; NOHSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[2:3]
278+ ; NOHSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
279+ ; NOHSA-TRAP-GFX900-NEXT: .LBB2_2:
280+ ; NOHSA-TRAP-GFX900-NEXT: s_endpgm
281+ ;
282+ ; HSA-TRAP-GFX803-LABEL: trap_with_use_after:
283+ ; HSA-TRAP-GFX803: ; %bb.0:
284+ ; HSA-TRAP-GFX803-NEXT: s_mov_b64 s[0:1], s[4:5]
285+ ; HSA-TRAP-GFX803-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0
286+ ; HSA-TRAP-GFX803-NEXT: s_waitcnt lgkmcnt(0)
287+ ; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v0, s4
288+ ; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v1, s5
289+ ; HSA-TRAP-GFX803-NEXT: flat_load_dword v2, v[0:1] glc
290+ ; HSA-TRAP-GFX803-NEXT: s_waitcnt vmcnt(0)
291+ ; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v0, s6
292+ ; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v1, s7
293+ ; HSA-TRAP-GFX803-NEXT: s_trap 2
294+ ; HSA-TRAP-GFX803-NEXT: flat_store_dword v[0:1], v2
295+ ; HSA-TRAP-GFX803-NEXT: s_waitcnt vmcnt(0)
296+ ; HSA-TRAP-GFX803-NEXT: s_endpgm
297+ ;
298+ ; HSA-TRAP-GFX900-LABEL: trap_with_use_after:
299+ ; HSA-TRAP-GFX900: ; %bb.0:
300+ ; HSA-TRAP-GFX900-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
301+ ; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0
302+ ; HSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0)
303+ ; HSA-TRAP-GFX900-NEXT: global_load_dword v1, v0, s[0:1] glc
304+ ; HSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
305+ ; HSA-TRAP-GFX900-NEXT: s_trap 2
306+ ; HSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[2:3]
307+ ; HSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
308+ ; HSA-TRAP-GFX900-NEXT: s_endpgm
309+ ;
310+ ; HSA-NOTRAP-GFX900-LABEL: trap_with_use_after:
311+ ; HSA-NOTRAP-GFX900: ; %bb.0:
312+ ; HSA-NOTRAP-GFX900-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
313+ ; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0
314+ ; HSA-NOTRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0)
315+ ; HSA-NOTRAP-GFX900-NEXT: global_load_dword v1, v0, s[0:1] glc
316+ ; HSA-NOTRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
317+ ; HSA-NOTRAP-GFX900-NEXT: s_cbranch_execnz .LBB2_2
318+ ; HSA-NOTRAP-GFX900-NEXT: ; %bb.1:
319+ ; HSA-NOTRAP-GFX900-NEXT: global_store_dword v0, v1, s[2:3]
320+ ; HSA-NOTRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
321+ ; HSA-NOTRAP-GFX900-NEXT: .LBB2_2:
322+ ; HSA-NOTRAP-GFX900-NEXT: s_endpgm
323+ ;
324+ ; HSA-TRAP-GFX1100-LABEL: trap_with_use_after:
325+ ; HSA-TRAP-GFX1100: ; %bb.0:
326+ ; HSA-TRAP-GFX1100-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
327+ ; HSA-TRAP-GFX1100-NEXT: v_mov_b32_e32 v0, 0
328+ ; HSA-TRAP-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
329+ ; HSA-TRAP-GFX1100-NEXT: global_load_b32 v1, v0, s[0:1] glc dlc
330+ ; HSA-TRAP-GFX1100-NEXT: s_waitcnt vmcnt(0)
331+ ; HSA-TRAP-GFX1100-NEXT: s_cbranch_execnz .LBB2_2
332+ ; HSA-TRAP-GFX1100-NEXT: ; %bb.1:
333+ ; HSA-TRAP-GFX1100-NEXT: global_store_b32 v0, v1, s[2:3] dlc
334+ ; HSA-TRAP-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
335+ ; HSA-TRAP-GFX1100-NEXT: s_nop 0
336+ ; HSA-TRAP-GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
337+ ; HSA-TRAP-GFX1100-NEXT: s_endpgm
338+ ; HSA-TRAP-GFX1100-NEXT: .LBB2_2:
339+ ; HSA-TRAP-GFX1100-NEXT: s_trap 2
340+ ; HSA-TRAP-GFX1100-NEXT: s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_DOORBELL)
341+ ; HSA-TRAP-GFX1100-NEXT: s_mov_b32 ttmp2, m0
342+ ; HSA-TRAP-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
343+ ; HSA-TRAP-GFX1100-NEXT: s_and_b32 s0, s0, 0x3ff
344+ ; HSA-TRAP-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
345+ ; HSA-TRAP-GFX1100-NEXT: s_bitset1_b32 s0, 10
346+ ; HSA-TRAP-GFX1100-NEXT: s_mov_b32 m0, s0
347+ ; HSA-TRAP-GFX1100-NEXT: s_sendmsg sendmsg(MSG_INTERRUPT)
348+ ; HSA-TRAP-GFX1100-NEXT: s_mov_b32 m0, ttmp2
349+ ; HSA-TRAP-GFX1100-NEXT: .LBB2_3: ; =>This Inner Loop Header: Depth=1
350+ ; HSA-TRAP-GFX1100-NEXT: s_sethalt 5
351+ ; HSA-TRAP-GFX1100-NEXT: s_branch .LBB2_3
352+ ;
353+ ; HSA-TRAP-GFX1100-O0-LABEL: trap_with_use_after:
354+ ; HSA-TRAP-GFX1100-O0: ; %bb.0:
355+ ; HSA-TRAP-GFX1100-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
356+ ; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v0, 0
357+ ; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v0, off offset:8 ; 4-byte Folded Spill
358+ ; HSA-TRAP-GFX1100-O0-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
359+ ; HSA-TRAP-GFX1100-O0-NEXT: s_load_b64 s[2:3], s[4:5], 0x8
360+ ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt lgkmcnt(0)
361+ ; HSA-TRAP-GFX1100-O0-NEXT: v_writelane_b32 v1, s2, 0
362+ ; HSA-TRAP-GFX1100-O0-NEXT: v_writelane_b32 v1, s3, 1
363+ ; HSA-TRAP-GFX1100-O0-NEXT: s_or_saveexec_b32 s6, -1
364+ ; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v1, off offset:4 ; 4-byte Folded Spill
365+ ; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 exec_lo, s6
366+ ; HSA-TRAP-GFX1100-O0-NEXT: global_load_b32 v0, v0, s[0:1] glc dlc
367+ ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt vmcnt(0)
368+ ; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v0, off ; 4-byte Folded Spill
369+ ; HSA-TRAP-GFX1100-O0-NEXT: s_cbranch_execnz .LBB2_2
370+ ; HSA-TRAP-GFX1100-O0-NEXT: ; %bb.1:
371+ ; HSA-TRAP-GFX1100-O0-NEXT: s_or_saveexec_b32 s6, -1
372+ ; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v0, off, off offset:4 ; 4-byte Folded Reload
373+ ; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 exec_lo, s6
374+ ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt vmcnt(0)
375+ ; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s0, v0, 0
376+ ; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s1, v0, 1
377+ ; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v1, off, off offset:8 ; 4-byte Folded Reload
378+ ; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v2, off, off ; 4-byte Folded Reload
379+ ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt vmcnt(0)
380+ ; HSA-TRAP-GFX1100-O0-NEXT: global_store_b32 v1, v2, s[0:1] dlc
381+ ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt_vscnt null, 0x0
382+ ; HSA-TRAP-GFX1100-O0-NEXT: ; kill: killed $vgpr0
383+ ; HSA-TRAP-GFX1100-O0-NEXT: s_endpgm
384+ ; HSA-TRAP-GFX1100-O0-NEXT: .LBB2_2:
385+ ; HSA-TRAP-GFX1100-O0-NEXT: s_trap 2
386+ ; HSA-TRAP-GFX1100-O0-NEXT: s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_DOORBELL)
387+ ; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 ttmp2, m0
388+ ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt lgkmcnt(0)
389+ ; HSA-TRAP-GFX1100-O0-NEXT: s_and_b32 s0, s0, 0x3ff
390+ ; HSA-TRAP-GFX1100-O0-NEXT: s_or_b32 s0, s0, 0x400
391+ ; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 m0, s0
392+ ; HSA-TRAP-GFX1100-O0-NEXT: s_sendmsg sendmsg(MSG_INTERRUPT)
393+ ; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 m0, ttmp2
394+ ; HSA-TRAP-GFX1100-O0-NEXT: .LBB2_3: ; =>This Inner Loop Header: Depth=1
395+ ; HSA-TRAP-GFX1100-O0-NEXT: s_sethalt 5
396+ ; HSA-TRAP-GFX1100-O0-NEXT: s_branch .LBB2_3
397+ %tmp = load volatile i32 , ptr addrspace (1 ) %arg0
398+ call void @llvm.trap ()
399+ store volatile i32 %tmp , ptr addrspace (1 ) %arg1
400+ ret void
401+ }
402+
267403define amdgpu_kernel void @debugtrap (ptr addrspace (1 ) nocapture readonly %arg0 ) {
268404; NOHSA-TRAP-GFX900-LABEL: debugtrap:
269405; NOHSA-TRAP-GFX900: ; %bb.0:
@@ -334,6 +470,20 @@ define amdgpu_kernel void @debugtrap(ptr addrspace(1) nocapture readonly %arg0)
334470; HSA-TRAP-GFX1100-NEXT: s_nop 0
335471; HSA-TRAP-GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
336472; HSA-TRAP-GFX1100-NEXT: s_endpgm
473+ ;
474+ ; HSA-TRAP-GFX1100-O0-LABEL: debugtrap:
475+ ; HSA-TRAP-GFX1100-O0: ; %bb.0:
476+ ; HSA-TRAP-GFX1100-O0-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
477+ ; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v0, 0
478+ ; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v1, 1
479+ ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt lgkmcnt(0)
480+ ; HSA-TRAP-GFX1100-O0-NEXT: global_store_b32 v0, v1, s[0:1] dlc
481+ ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt_vscnt null, 0x0
482+ ; HSA-TRAP-GFX1100-O0-NEXT: s_trap 3
483+ ; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v1, 2
484+ ; HSA-TRAP-GFX1100-O0-NEXT: global_store_b32 v0, v1, s[0:1] dlc
485+ ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt_vscnt null, 0x0
486+ ; HSA-TRAP-GFX1100-O0-NEXT: s_endpgm
337487 store volatile i32 1 , ptr addrspace (1 ) %arg0
338488 call void @llvm.debugtrap ()
339489 store volatile i32 2 , ptr addrspace (1 ) %arg0
0 commit comments