Skip to content

Commit 3454564

Browse files
committed
[AMDGPU][SDAG] Test ISD::PTRADD handling in various special cases
Pre-committing tests to show improvements in a follow-up PR.
1 parent 5b54ca1 commit 3454564

File tree

2 files changed

+269
-0
lines changed

2 files changed

+269
-0
lines changed
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-use-sdag-ptradd=1 < %s | FileCheck --check-prefixes=GFX6,GFX6_PTRADD %s
3+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-use-sdag-ptradd=0 < %s | FileCheck --check-prefixes=GFX6,GFX6_LEGACY %s
4+
5+
; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectMUBUF.
6+
7+
define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
8+
; GFX6_PTRADD-LABEL: v_add_i32:
9+
; GFX6_PTRADD: ; %bb.0:
10+
; GFX6_PTRADD-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
11+
; GFX6_PTRADD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
12+
; GFX6_PTRADD-NEXT: s_mov_b32 s7, 0x100f000
13+
; GFX6_PTRADD-NEXT: s_mov_b32 s10, 0
14+
; GFX6_PTRADD-NEXT: s_mov_b32 s11, s7
15+
; GFX6_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
16+
; GFX6_PTRADD-NEXT: v_mov_b32_e32 v1, s3
17+
; GFX6_PTRADD-NEXT: v_add_i32_e32 v0, vcc, s2, v0
18+
; GFX6_PTRADD-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
19+
; GFX6_PTRADD-NEXT: s_mov_b32 s8, s10
20+
; GFX6_PTRADD-NEXT: s_mov_b32 s9, s10
21+
; GFX6_PTRADD-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
22+
; GFX6_PTRADD-NEXT: s_waitcnt vmcnt(0)
23+
; GFX6_PTRADD-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc
24+
; GFX6_PTRADD-NEXT: s_waitcnt vmcnt(0)
25+
; GFX6_PTRADD-NEXT: s_mov_b32 s6, -1
26+
; GFX6_PTRADD-NEXT: s_mov_b32 s4, s0
27+
; GFX6_PTRADD-NEXT: s_mov_b32 s5, s1
28+
; GFX6_PTRADD-NEXT: v_add_i32_e32 v0, vcc, v2, v0
29+
; GFX6_PTRADD-NEXT: buffer_store_dword v0, off, s[4:7], 0
30+
; GFX6_PTRADD-NEXT: s_endpgm
31+
;
32+
; GFX6_LEGACY-LABEL: v_add_i32:
33+
; GFX6_LEGACY: ; %bb.0:
34+
; GFX6_LEGACY-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
35+
; GFX6_LEGACY-NEXT: s_mov_b32 s7, 0x100f000
36+
; GFX6_LEGACY-NEXT: s_mov_b32 s10, 0
37+
; GFX6_LEGACY-NEXT: s_mov_b32 s11, s7
38+
; GFX6_LEGACY-NEXT: v_lshlrev_b32_e32 v0, 2, v0
39+
; GFX6_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
40+
; GFX6_LEGACY-NEXT: s_mov_b64 s[8:9], s[2:3]
41+
; GFX6_LEGACY-NEXT: v_mov_b32_e32 v1, 0
42+
; GFX6_LEGACY-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
43+
; GFX6_LEGACY-NEXT: s_waitcnt vmcnt(0)
44+
; GFX6_LEGACY-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc
45+
; GFX6_LEGACY-NEXT: s_waitcnt vmcnt(0)
46+
; GFX6_LEGACY-NEXT: s_mov_b32 s6, -1
47+
; GFX6_LEGACY-NEXT: s_mov_b32 s4, s0
48+
; GFX6_LEGACY-NEXT: s_mov_b32 s5, s1
49+
; GFX6_LEGACY-NEXT: v_add_i32_e32 v0, vcc, v2, v0
50+
; GFX6_LEGACY-NEXT: buffer_store_dword v0, off, s[4:7], 0
51+
; GFX6_LEGACY-NEXT: s_endpgm
52+
%tid = call i32 @llvm.amdgcn.workitem.id.x()
53+
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
54+
%b_ptr = getelementptr i32, ptr addrspace(1) %gep, i32 1
55+
%a = load volatile i32, ptr addrspace(1) %gep
56+
%b = load volatile i32, ptr addrspace(1) %b_ptr
57+
%result = add i32 %a, %b
58+
store i32 %result, ptr addrspace(1) %out
59+
ret void
60+
}
61+
62+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
63+
; GFX6: {{.*}}

llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,3 +290,209 @@ define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
290290
%gep = getelementptr inbounds i8, ptr %base, i64 %mul
291291
ret ptr %gep
292292
}
293+
294+
; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectGlobalSAddr.
295+
define amdgpu_kernel void @uniform_base_varying_offset_imm(ptr addrspace(1) %p) {
296+
; GFX942_PTRADD-LABEL: uniform_base_varying_offset_imm:
297+
; GFX942_PTRADD: ; %bb.0: ; %entry
298+
; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
299+
; GFX942_PTRADD-NEXT: v_and_b32_e32 v0, 0x3ff, v0
300+
; GFX942_PTRADD-NEXT: v_mov_b32_e32 v1, 0
301+
; GFX942_PTRADD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
302+
; GFX942_PTRADD-NEXT: v_mov_b32_e32 v2, 1
303+
; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
304+
; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
305+
; GFX942_PTRADD-NEXT: global_store_dword v[0:1], v2, off offset:16
306+
; GFX942_PTRADD-NEXT: s_endpgm
307+
;
308+
; GFX942_LEGACY-LABEL: uniform_base_varying_offset_imm:
309+
; GFX942_LEGACY: ; %bb.0: ; %entry
310+
; GFX942_LEGACY-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
311+
; GFX942_LEGACY-NEXT: v_and_b32_e32 v0, 0x3ff, v0
312+
; GFX942_LEGACY-NEXT: v_lshlrev_b32_e32 v0, 2, v0
313+
; GFX942_LEGACY-NEXT: v_mov_b32_e32 v1, 1
314+
; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
315+
; GFX942_LEGACY-NEXT: global_store_dword v0, v1, s[0:1] offset:16
316+
; GFX942_LEGACY-NEXT: s_endpgm
317+
entry:
318+
%tid = call i32 @llvm.amdgcn.workitem.id.x()
319+
%shift = shl i32 %tid, 2
320+
%voffset = zext i32 %shift to i64
321+
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %p, i64 %voffset
322+
%gep2 = getelementptr inbounds i8, ptr addrspace(1) %gep1, i64 16
323+
store i32 1, ptr addrspace(1) %gep2
324+
ret void
325+
}
326+
327+
; Adjusted from global-saddr-load.ll. Tests PTRADD handling in
328+
; AMDGPUDAGToDAGISel::SelectSMRDBaseOffset.
329+
define amdgpu_kernel void @global_load_saddr_i32_uniform_offset(ptr addrspace(1) %sbase, i32 %soffset, ptr addrspace(1) %r) {
330+
; GFX942_PTRADD-LABEL: global_load_saddr_i32_uniform_offset:
331+
; GFX942_PTRADD: ; %bb.0:
332+
; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
333+
; GFX942_PTRADD-NEXT: s_load_dword s6, s[4:5], 0x8
334+
; GFX942_PTRADD-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10
335+
; GFX942_PTRADD-NEXT: v_mov_b32_e32 v0, 0
336+
; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
337+
; GFX942_PTRADD-NEXT: s_add_u32 s0, s0, s6
338+
; GFX942_PTRADD-NEXT: s_addc_u32 s1, s1, 0
339+
; GFX942_PTRADD-NEXT: s_load_dword s0, s[0:1], 0x0
340+
; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
341+
; GFX942_PTRADD-NEXT: v_mov_b32_e32 v1, s0
342+
; GFX942_PTRADD-NEXT: global_store_dword v0, v1, s[2:3]
343+
; GFX942_PTRADD-NEXT: s_endpgm
344+
;
345+
; GFX942_LEGACY-LABEL: global_load_saddr_i32_uniform_offset:
346+
; GFX942_LEGACY: ; %bb.0:
347+
; GFX942_LEGACY-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
348+
; GFX942_LEGACY-NEXT: s_load_dword s6, s[4:5], 0x8
349+
; GFX942_LEGACY-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10
350+
; GFX942_LEGACY-NEXT: v_mov_b32_e32 v0, 0
351+
; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
352+
; GFX942_LEGACY-NEXT: s_load_dword s0, s[0:1], s6 offset:0x0
353+
; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
354+
; GFX942_LEGACY-NEXT: v_mov_b32_e32 v1, s0
355+
; GFX942_LEGACY-NEXT: global_store_dword v0, v1, s[2:3]
356+
; GFX942_LEGACY-NEXT: s_endpgm
357+
%zext.offset = zext i32 %soffset to i64
358+
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
359+
%load = load i32, ptr addrspace(1) %gep0
360+
%to.vgpr = bitcast i32 %load to float
361+
store float %to.vgpr, ptr addrspace(1) %r
362+
ret void
363+
}
364+
365+
; Adjusted from llvm.amdgcn.global.load.lds.ll, tests the offset lowering for
366+
; Intrinsic::amdgcn_global_load_lds.
367+
define void @global_load_lds_dword_saddr_and_vaddr(ptr addrspace(1) nocapture inreg %gptr, ptr addrspace(3) nocapture %lptr, i32 %voffset) {
368+
; GFX942_PTRADD-LABEL: global_load_lds_dword_saddr_and_vaddr:
369+
; GFX942_PTRADD: ; %bb.0: ; %main_body
370+
; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
371+
; GFX942_PTRADD-NEXT: v_mov_b32_e32 v2, v1
372+
; GFX942_PTRADD-NEXT: v_mov_b32_e32 v3, 0
373+
; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[2:3], s[0:1], 0, v[2:3]
374+
; GFX942_PTRADD-NEXT: v_readfirstlane_b32 s0, v0
375+
; GFX942_PTRADD-NEXT: s_mov_b32 m0, s0
376+
; GFX942_PTRADD-NEXT: s_nop 0
377+
; GFX942_PTRADD-NEXT: global_load_lds_dword v[2:3], off offset:48 sc1
378+
; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
379+
; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
380+
;
381+
; GFX942_LEGACY-LABEL: global_load_lds_dword_saddr_and_vaddr:
382+
; GFX942_LEGACY: ; %bb.0: ; %main_body
383+
; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384+
; GFX942_LEGACY-NEXT: v_readfirstlane_b32 s2, v0
385+
; GFX942_LEGACY-NEXT: s_mov_b32 m0, s2
386+
; GFX942_LEGACY-NEXT: s_nop 0
387+
; GFX942_LEGACY-NEXT: global_load_lds_dword v1, s[0:1] offset:48 sc1
388+
; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
389+
; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
390+
main_body:
391+
%voffset.64 = zext i32 %voffset to i64
392+
%gep = getelementptr i8, ptr addrspace(1) %gptr, i64 %voffset.64
393+
call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gep, ptr addrspace(3) %lptr, i32 4, i32 48, i32 16)
394+
ret void
395+
}
396+
397+
; Taken from shl_add_ptr_global.ll, tests PTRADD handling in
398+
; SITargetLowering::performSHLPtrCombine.
399+
define void @shl_base_global_ptr_global_atomic_fadd(ptr addrspace(1) %out, ptr addrspace(1) %extra.use, ptr addrspace(1) %ptr) {
400+
; GFX942_PTRADD-LABEL: shl_base_global_ptr_global_atomic_fadd:
401+
; GFX942_PTRADD: ; %bb.0:
402+
; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
403+
; GFX942_PTRADD-NEXT: s_mov_b64 s[0:1], 0x80
404+
; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 0, s[0:1]
405+
; GFX942_PTRADD-NEXT: v_lshlrev_b64 v[4:5], 2, v[0:1]
406+
; GFX942_PTRADD-NEXT: v_mov_b32_e32 v6, 0x42c80000
407+
; GFX942_PTRADD-NEXT: global_atomic_add_f32 v[4:5], v6, off
408+
; GFX942_PTRADD-NEXT: global_store_dwordx2 v[2:3], v[0:1], off sc0 sc1
409+
; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0)
410+
; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
411+
;
412+
; GFX942_LEGACY-LABEL: shl_base_global_ptr_global_atomic_fadd:
413+
; GFX942_LEGACY: ; %bb.0:
414+
; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
415+
; GFX942_LEGACY-NEXT: v_lshlrev_b64 v[0:1], 2, v[4:5]
416+
; GFX942_LEGACY-NEXT: v_mov_b32_e32 v6, 0x42c80000
417+
; GFX942_LEGACY-NEXT: global_atomic_add_f32 v[0:1], v6, off offset:512
418+
; GFX942_LEGACY-NEXT: s_mov_b64 s[0:1], 0x80
419+
; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 0, s[0:1]
420+
; GFX942_LEGACY-NEXT: global_store_dwordx2 v[2:3], v[0:1], off sc0 sc1
421+
; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0)
422+
; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
423+
%arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 32
424+
%cast = ptrtoint ptr addrspace(1) %arrayidx0 to i64
425+
%shl = shl i64 %cast, 2
426+
%castback = inttoptr i64 %shl to ptr addrspace(1)
427+
%unused = atomicrmw fadd ptr addrspace(1) %castback, float 100.0 syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
428+
store volatile i64 %cast, ptr addrspace(1) %extra.use, align 4
429+
ret void
430+
}
431+
432+
; Test PTRADD handling in TargetLowering::SimplifyDemandedBits and
433+
; TargetLowering::ShrinkDemandedOp.
434+
define i32 @gep_in_const_as_cast_to_const32_as(ptr addrspace(4) %src, i64 %offset) {
435+
; GFX942_PTRADD-LABEL: gep_in_const_as_cast_to_const32_as:
436+
; GFX942_PTRADD: ; %bb.0: ; %entry
437+
; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
438+
; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
439+
; GFX942_PTRADD-NEXT: s_mov_b32 s1, 0
440+
; GFX942_PTRADD-NEXT: v_readfirstlane_b32 s0, v0
441+
; GFX942_PTRADD-NEXT: s_load_dword s0, s[0:1], 0x0
442+
; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
443+
; GFX942_PTRADD-NEXT: v_mov_b32_e32 v0, s0
444+
; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
445+
;
446+
; GFX942_LEGACY-LABEL: gep_in_const_as_cast_to_const32_as:
447+
; GFX942_LEGACY: ; %bb.0: ; %entry
448+
; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
449+
; GFX942_LEGACY-NEXT: v_add_u32_e32 v0, v0, v2
450+
; GFX942_LEGACY-NEXT: s_mov_b32 s1, 0
451+
; GFX942_LEGACY-NEXT: v_readfirstlane_b32 s0, v0
452+
; GFX942_LEGACY-NEXT: s_load_dword s0, s[0:1], 0x0
453+
; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
454+
; GFX942_LEGACY-NEXT: v_mov_b32_e32 v0, s0
455+
; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
456+
entry:
457+
%gep = getelementptr i8, ptr addrspace(4) %src, i64 %offset
458+
%gep.cast = addrspacecast ptr addrspace(4) %gep to ptr addrspace(6)
459+
%l = load i32, ptr addrspace(6) %gep.cast
460+
ret i32 %l
461+
}
462+
463+
@CG = addrspace(4) constant [16 x i32] zeroinitializer, align 4
464+
465+
; Test PTRADD handling in isMemSrcFromConstant.
466+
define void @replace_const0_memcpy_by_memset(ptr align 4 %dst) {
467+
; GFX942_PTRADD-LABEL: replace_const0_memcpy_by_memset:
468+
; GFX942_PTRADD: ; %bb.0: ; %entry
469+
; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
470+
; GFX942_PTRADD-NEXT: s_getpc_b64 s[0:1]
471+
; GFX942_PTRADD-NEXT: s_add_u32 s0, s0, CG@gotpcrel32@lo+4
472+
; GFX942_PTRADD-NEXT: s_addc_u32 s1, s1, CG@gotpcrel32@hi+12
473+
; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
474+
; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
475+
; GFX942_PTRADD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4
476+
; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
477+
; GFX942_PTRADD-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
478+
; GFX942_PTRADD-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
479+
; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
480+
; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
481+
;
482+
; GFX942_LEGACY-LABEL: replace_const0_memcpy_by_memset:
483+
; GFX942_LEGACY: ; %bb.0: ; %entry
484+
; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
485+
; GFX942_LEGACY-NEXT: v_mov_b32_e32 v2, 0
486+
; GFX942_LEGACY-NEXT: v_mov_b32_e32 v3, v2
487+
; GFX942_LEGACY-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
488+
; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
489+
; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
490+
entry:
491+
%gep = getelementptr i8, ptr addrspace(4) @CG, i64 4
492+
tail call void @llvm.memcpy.p0.p4.i64(ptr noundef nonnull align 4 %dst, ptr addrspace(4) noundef nonnull align 4 %gep, i64 8, i1 false)
493+
ret void
494+
}
495+
496+
declare void @llvm.memcpy.p0.p4.i64(ptr noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg)
497+
498+
!0 = !{}

0 commit comments

Comments
 (0)