Skip to content

Commit ac27b24

Browse files
authored
AMDGPU: Add baseline test for load-select to load select of pointer combine (#167908)
1 parent 50f16ff commit ac27b24

File tree

1 file changed

+123
-0
lines changed

1 file changed

+123
-0
lines changed
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
3+
4+
define i32 @select_load_i32_p0(i1 %cond, ptr %a, ptr %b) {
5+
; CHECK-LABEL: select_load_i32_p0:
6+
; CHECK: ; %bb.0:
7+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8+
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
9+
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
10+
; CHECK-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
11+
; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
12+
; CHECK-NEXT: flat_load_dword v0, v[1:2]
13+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
14+
; CHECK-NEXT: s_setpc_b64 s[30:31]
15+
%ld0 = load i32, ptr %a
16+
%ld1 = load i32, ptr %b
17+
%select = select i1 %cond, i32 %ld0, i32 %ld1
18+
ret i32 %select
19+
}
20+
21+
define i32 @select_load_i32_p1(i1 %cond, ptr addrspace(1) %a, ptr addrspace(1) %b) {
22+
; CHECK-LABEL: select_load_i32_p1:
23+
; CHECK: ; %bb.0:
24+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25+
; CHECK-NEXT: global_load_dword v5, v[1:2], off
26+
; CHECK-NEXT: global_load_dword v6, v[3:4], off
27+
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
28+
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
29+
; CHECK-NEXT: s_waitcnt vmcnt(0)
30+
; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v5, vcc
31+
; CHECK-NEXT: s_setpc_b64 s[30:31]
32+
%ld0 = load i32, ptr addrspace(1) %a
33+
%ld1 = load i32, ptr addrspace(1) %b
34+
%select = select i1 %cond, i32 %ld0, i32 %ld1
35+
ret i32 %select
36+
}
37+
38+
define i32 @select_load_i32_p3(i1 %cond, ptr addrspace(3) %a, ptr addrspace(3) %b) {
39+
; CHECK-LABEL: select_load_i32_p3:
40+
; CHECK: ; %bb.0:
41+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42+
; CHECK-NEXT: ds_read_b32 v1, v1
43+
; CHECK-NEXT: ds_read_b32 v2, v2
44+
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
45+
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
46+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
47+
; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
48+
; CHECK-NEXT: s_setpc_b64 s[30:31]
49+
%ld0 = load i32, ptr addrspace(3) %a
50+
%ld1 = load i32, ptr addrspace(3) %b
51+
%select = select i1 %cond, i32 %ld0, i32 %ld1
52+
ret i32 %select
53+
}
54+
55+
define i32 @select_load_i32_p0_p1(i1 %cond, ptr %a, ptr addrspace(1) %b) {
56+
; CHECK-LABEL: select_load_i32_p0_p1:
57+
; CHECK: ; %bb.0:
58+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
59+
; CHECK-NEXT: flat_load_dword v5, v[1:2]
60+
; CHECK-NEXT: global_load_dword v6, v[3:4], off
61+
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
62+
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
63+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
64+
; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v5, vcc
65+
; CHECK-NEXT: s_setpc_b64 s[30:31]
66+
%ld0 = load i32, ptr %a
67+
%ld1 = load i32, ptr addrspace(1) %b
68+
%select = select i1 %cond, i32 %ld0, i32 %ld1
69+
ret i32 %select
70+
}
71+
72+
define i32 @select_load_i32_p1_p0(i1 %cond, ptr addrspace(1) %a, ptr %b) {
73+
; CHECK-LABEL: select_load_i32_p1_p0:
74+
; CHECK: ; %bb.0:
75+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76+
; CHECK-NEXT: global_load_dword v5, v[1:2], off
77+
; CHECK-NEXT: flat_load_dword v6, v[3:4]
78+
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
79+
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
80+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
81+
; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v5, vcc
82+
; CHECK-NEXT: s_setpc_b64 s[30:31]
83+
%ld0 = load i32, ptr addrspace(1) %a
84+
%ld1 = load i32, ptr %b
85+
%select = select i1 %cond, i32 %ld0, i32 %ld1
86+
ret i32 %select
87+
}
88+
89+
define i8 @select_load_i8_p1(i1 %cond, ptr addrspace(1) %a, ptr addrspace(1) %b) {
90+
; CHECK-LABEL: select_load_i8_p1:
91+
; CHECK: ; %bb.0:
92+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
93+
; CHECK-NEXT: global_load_ubyte v5, v[1:2], off
94+
; CHECK-NEXT: global_load_ubyte v6, v[3:4], off
95+
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
96+
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
97+
; CHECK-NEXT: s_waitcnt vmcnt(0)
98+
; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v5, vcc
99+
; CHECK-NEXT: s_setpc_b64 s[30:31]
100+
%ld0 = load i8, ptr addrspace(1) %a
101+
%ld1 = load i8, ptr addrspace(1) %b
102+
%select = select i1 %cond, i8 %ld0, i8 %ld1
103+
ret i8 %select
104+
}
105+
106+
define i32 @select_load_i32_p1_offset(i1 %cond, ptr addrspace(1) %a, ptr addrspace(1) %b) {
107+
; CHECK-LABEL: select_load_i32_p1_offset:
108+
; CHECK: ; %bb.0:
109+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110+
; CHECK-NEXT: global_load_dword v3, v[1:2], off offset:256
111+
; CHECK-NEXT: global_load_dword v4, v[1:2], off offset:512
112+
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
113+
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
114+
; CHECK-NEXT: s_waitcnt vmcnt(0)
115+
; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
116+
; CHECK-NEXT: s_setpc_b64 s[30:31]
117+
%gep.a = getelementptr i8, ptr addrspace(1) %a, i64 256
118+
%gep.b = getelementptr i8, ptr addrspace(1) %a, i64 512
119+
%ld0 = load i32, ptr addrspace(1) %gep.a
120+
%ld1 = load i32, ptr addrspace(1) %gep.b
121+
%select = select i1 %cond, i32 %ld0, i32 %ld1
122+
ret i32 %select
123+
}

0 commit comments

Comments
 (0)