11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2- ; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
3- ; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
4- ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
5- ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
6- ; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
7- ; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
8- ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-GISEL,GFX12-GISEL-FAKE16 %s
9- ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
2+ ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-FAKE16 %s
3+ ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-TRUE16 %s
4+ ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-FAKE16 %s
5+ ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-TRUE16 %s
106
117define amdgpu_ps half @fadd_s16_uniform (half inreg %a , half inreg %b ) {
12- ; GFX11-SDAG- FAKE16-LABEL: fadd_s16_uniform:
13- ; GFX11-SDAG- FAKE16: ; %bb.0:
14- ; GFX11-SDAG- FAKE16-NEXT: v_add_f16_e64 v0, s0, s1
15- ; GFX11-SDAG- FAKE16-NEXT: ; return to shader part epilog
8+ ; GFX11-FAKE16-LABEL: fadd_s16_uniform:
9+ ; GFX11-FAKE16: ; %bb.0:
10+ ; GFX11-FAKE16-NEXT: v_add_f16_e64 v0, s0, s1
11+ ; GFX11-FAKE16-NEXT: ; return to shader part epilog
1612;
17- ; GFX11-SDAG-TRUE16-LABEL: fadd_s16_uniform:
18- ; GFX11-SDAG-TRUE16: ; %bb.0:
19- ; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e64 v0.l, s0, s1
20- ; GFX11-SDAG-TRUE16-NEXT: ; return to shader part epilog
21- ;
22- ; GFX11-GISEL-FAKE16-LABEL: fadd_s16_uniform:
23- ; GFX11-GISEL-FAKE16: ; %bb.0:
24- ; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e64 v0, s0, s1
25- ; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
26- ; GFX11-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
27- ; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s0
28- ; GFX11-GISEL-FAKE16-NEXT: ; return to shader part epilog
29- ;
30- ; GFX11-GISEL-TRUE16-LABEL: fadd_s16_uniform:
31- ; GFX11-GISEL-TRUE16: ; %bb.0:
32- ; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e64 v0.l, s0, s1
33- ; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
34- ; GFX11-GISEL-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
35- ; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s0
36- ; GFX11-GISEL-TRUE16-NEXT: ; return to shader part epilog
13+ ; GFX11-TRUE16-LABEL: fadd_s16_uniform:
14+ ; GFX11-TRUE16: ; %bb.0:
15+ ; GFX11-TRUE16-NEXT: v_add_f16_e64 v0.l, s0, s1
16+ ; GFX11-TRUE16-NEXT: ; return to shader part epilog
3717;
3818; GFX12-LABEL: fadd_s16_uniform:
3919; GFX12: ; %bb.0:
@@ -46,45 +26,25 @@ define amdgpu_ps half @fadd_s16_uniform(half inreg %a, half inreg %b) {
4626}
4727
4828define amdgpu_ps half @fadd_s16_div (half %a , half %b ) {
49- ; GFX11-SDAG-FAKE16-LABEL: fadd_s16_div:
50- ; GFX11-SDAG-FAKE16: ; %bb.0:
51- ; GFX11-SDAG-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
52- ; GFX11-SDAG-FAKE16-NEXT: ; return to shader part epilog
53- ;
54- ; GFX11-SDAG-TRUE16-LABEL: fadd_s16_div:
55- ; GFX11-SDAG-TRUE16: ; %bb.0:
56- ; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
57- ; GFX11-SDAG-TRUE16-NEXT: ; return to shader part epilog
58- ;
59- ; GFX11-GISEL-FAKE16-LABEL: fadd_s16_div:
60- ; GFX11-GISEL-FAKE16: ; %bb.0:
61- ; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
62- ; GFX11-GISEL-FAKE16-NEXT: ; return to shader part epilog
63- ;
64- ; GFX11-GISEL-TRUE16-LABEL: fadd_s16_div:
65- ; GFX11-GISEL-TRUE16: ; %bb.0:
66- ; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
67- ; GFX11-GISEL-TRUE16-NEXT: ; return to shader part epilog
68- ;
69- ; GFX12-SDAG-FAKE16-LABEL: fadd_s16_div:
70- ; GFX12-SDAG-FAKE16: ; %bb.0:
71- ; GFX12-SDAG-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
72- ; GFX12-SDAG-FAKE16-NEXT: ; return to shader part epilog
73- ;
74- ; GFX12-SDAG-TRUE16-LABEL: fadd_s16_div:
75- ; GFX12-SDAG-TRUE16: ; %bb.0:
76- ; GFX12-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
77- ; GFX12-SDAG-TRUE16-NEXT: ; return to shader part epilog
78- ;
79- ; GFX12-GISEL-FAKE16-LABEL: fadd_s16_div:
80- ; GFX12-GISEL-FAKE16: ; %bb.0:
81- ; GFX12-GISEL-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
82- ; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog
83- ;
84- ; GFX12-GISEL-TRUE16-LABEL: fadd_s16_div:
85- ; GFX12-GISEL-TRUE16: ; %bb.0:
86- ; GFX12-GISEL-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
87- ; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog
29+ ; GFX11-FAKE16-LABEL: fadd_s16_div:
30+ ; GFX11-FAKE16: ; %bb.0:
31+ ; GFX11-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
32+ ; GFX11-FAKE16-NEXT: ; return to shader part epilog
33+ ;
34+ ; GFX11-TRUE16-LABEL: fadd_s16_div:
35+ ; GFX11-TRUE16: ; %bb.0:
36+ ; GFX11-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
37+ ; GFX11-TRUE16-NEXT: ; return to shader part epilog
38+ ;
39+ ; GFX12-FAKE16-LABEL: fadd_s16_div:
40+ ; GFX12-FAKE16: ; %bb.0:
41+ ; GFX12-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
42+ ; GFX12-FAKE16-NEXT: ; return to shader part epilog
43+ ;
44+ ; GFX12-TRUE16-LABEL: fadd_s16_div:
45+ ; GFX12-TRUE16: ; %bb.0:
46+ ; GFX12-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
47+ ; GFX12-TRUE16-NEXT: ; return to shader part epilog
8848 %fadd = fadd half %a , %b
8949 ret half %fadd
9050}
@@ -155,92 +115,58 @@ define amdgpu_ps double @fadd_s64_div(double %a, double %b) {
155115 ret double %fadd
156116}
157117
158- define <2 x half > @fadd_v2s16_uniform (<2 x half > inreg %a , <2 x half > inreg %b ) {
118+ define amdgpu_ps <2 x half > @fadd_v2s16_uniform (<2 x half > inreg %a , <2 x half > inreg %b ) {
159119; GFX11-LABEL: fadd_v2s16_uniform:
160120; GFX11: ; %bb.0:
161- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162121; GFX11-NEXT: v_pk_add_f16 v0, s0, s1
163- ; GFX11-NEXT: s_setpc_b64 s[30:31]
122+ ; GFX11-NEXT: ; return to shader part epilog
164123;
165124; GFX12-LABEL: fadd_v2s16_uniform:
166125; GFX12: ; %bb.0:
167- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
168- ; GFX12-NEXT: s_wait_expcnt 0x0
169- ; GFX12-NEXT: s_wait_samplecnt 0x0
170- ; GFX12-NEXT: s_wait_bvhcnt 0x0
171- ; GFX12-NEXT: s_wait_kmcnt 0x0
172- ; GFX12-NEXT: v_pk_add_f16 v0, s0, s1
173- ; GFX12-NEXT: s_setpc_b64 s[30:31]
126+ ; GFX12-NEXT: s_lshr_b32 s2, s0, 16
127+ ; GFX12-NEXT: s_lshr_b32 s3, s1, 16
128+ ; GFX12-NEXT: s_add_f16 s0, s0, s1
129+ ; GFX12-NEXT: s_add_f16 s1, s2, s3
130+ ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1)
131+ ; GFX12-NEXT: s_pack_ll_b32_b16 s0, s0, s1
132+ ; GFX12-NEXT: v_mov_b32_e32 v0, s0
133+ ; GFX12-NEXT: ; return to shader part epilog
174134 %fadd = fadd <2 x half > %a , %b
175135 ret <2 x half > %fadd
176136}
177137
178- define <2 x half > @fadd_v2s16_div (<2 x half > %a , <2 x half > %b ) {
179- ; GFX11-LABEL: fadd_v2s16_div:
180- ; GFX11: ; %bb.0:
181- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182- ; GFX11-NEXT: v_pk_add_f16 v0, v0, v1
183- ; GFX11-NEXT: s_setpc_b64 s[30:31]
184- ;
185- ; GFX12-LABEL: fadd_v2s16_div:
186- ; GFX12: ; %bb.0:
187- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
188- ; GFX12-NEXT: s_wait_expcnt 0x0
189- ; GFX12-NEXT: s_wait_samplecnt 0x0
190- ; GFX12-NEXT: s_wait_bvhcnt 0x0
191- ; GFX12-NEXT: s_wait_kmcnt 0x0
192- ; GFX12-NEXT: v_pk_add_f16 v0, v0, v1
193- ; GFX12-NEXT: s_setpc_b64 s[30:31]
138+ define amdgpu_ps <2 x half > @fadd_v2s16_div (<2 x half > %a , <2 x half > %b ) {
139+ ; GCN-LABEL: fadd_v2s16_div:
140+ ; GCN: ; %bb.0:
141+ ; GCN-NEXT: v_pk_add_f16 v0, v0, v1
142+ ; GCN-NEXT: ; return to shader part epilog
194143 %fadd = fadd <2 x half > %a , %b
195144 ret <2 x half > %fadd
196145}
197146
198- define <2 x float > @fadd_v2s32_uniform (<2 x float > inreg %a , <2 x float > inreg %b ) {
147+ define amdgpu_ps <2 x float > @fadd_v2s32_uniform (<2 x float > inreg %a , <2 x float > inreg %b ) {
199148; GFX11-LABEL: fadd_v2s32_uniform:
200149; GFX11: ; %bb.0:
201- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
202150; GFX11-NEXT: v_add_f32_e64 v0, s0, s2
203151; GFX11-NEXT: v_add_f32_e64 v1, s1, s3
204- ; GFX11-NEXT: s_setpc_b64 s[30:31]
152+ ; GFX11-NEXT: ; return to shader part epilog
205153;
206154; GFX12-LABEL: fadd_v2s32_uniform:
207155; GFX12: ; %bb.0:
208- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
209- ; GFX12-NEXT: s_wait_expcnt 0x0
210- ; GFX12-NEXT: s_wait_samplecnt 0x0
211- ; GFX12-NEXT: s_wait_bvhcnt 0x0
212- ; GFX12-NEXT: s_wait_kmcnt 0x0
213156; GFX12-NEXT: s_add_f32 s0, s0, s2
214157; GFX12-NEXT: s_add_f32 s1, s1, s3
215- ; GFX12-NEXT: s_wait_alu 0xfffe
216- ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_2)
158+ ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
217159; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
218- ; GFX12-NEXT: s_setpc_b64 s[30:31]
160+ ; GFX12-NEXT: ; return to shader part epilog
219161 %fadd = fadd <2 x float > %a , %b
220162 ret <2 x float > %fadd
221163}
222164
223- define <2 x float > @fadd_v2s32_div (<2 x float > %a , <2 x float > %b ) {
224- ; GFX11-LABEL: fadd_v2s32_div:
225- ; GFX11: ; %bb.0:
226- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
227- ; GFX11-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3
228- ; GFX11-NEXT: s_setpc_b64 s[30:31]
229- ;
230- ; GFX12-LABEL: fadd_v2s32_div:
231- ; GFX12: ; %bb.0:
232- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
233- ; GFX12-NEXT: s_wait_expcnt 0x0
234- ; GFX12-NEXT: s_wait_samplecnt 0x0
235- ; GFX12-NEXT: s_wait_bvhcnt 0x0
236- ; GFX12-NEXT: s_wait_kmcnt 0x0
237- ; GFX12-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3
238- ; GFX12-NEXT: s_setpc_b64 s[30:31]
165+ define amdgpu_ps <2 x float > @fadd_v2s32_div (<2 x float > %a , <2 x float > %b ) {
166+ ; GCN-LABEL: fadd_v2s32_div:
167+ ; GCN: ; %bb.0:
168+ ; GCN-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3
169+ ; GCN-NEXT: ; return to shader part epilog
239170 %fadd = fadd <2 x float > %a , %b
240171 ret <2 x float > %fadd
241172}
242- ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
243- ; GFX11-GISEL: {{.*}}
244- ; GFX11-SDAG: {{.*}}
245- ; GFX12-GISEL: {{.*}}
246- ; GFX12-SDAG: {{.*}}
0 commit comments