@@ -12,123 +12,198 @@ declare void @llvm.nvvm.tcgen05.alloc.cg2(ptr %addr, i32 %ncols)
1212declare void @llvm.nvvm.tcgen05.alloc.shared.cg1 (ptr addrspace (3 ) %addr , i32 %ncols )
1313declare void @llvm.nvvm.tcgen05.alloc.shared.cg2 (ptr addrspace (3 ) %addr , i32 %ncols )
1414
15- ; CHECK-LABEL: test_tcgen05_alloc
16- define void @test_tcgen05_alloc (ptr %addr , i32 %ncols ) {
17- ; CHECK_PTX64-LABEL: test_tcgen05_alloc(
15+ define void @test_tcgen05_alloc_cg1 (ptr %addr , i32 %ncols ) {
16+ ; CHECK_PTX64-LABEL: test_tcgen05_alloc_cg1(
1817; CHECK_PTX64: {
1918; CHECK_PTX64-NEXT: .reg .b32 %r<2>;
2019; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
2120; CHECK_PTX64-EMPTY:
2221; CHECK_PTX64-NEXT: // %bb.0:
23- ; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_param_0 ];
24- ; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_param_1 ];
22+ ; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg1_param_0 ];
23+ ; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg1_param_1 ];
2524; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.b32 [%rd1], %r1;
26- ; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1;
2725; CHECK_PTX64-NEXT: ret;
2826;
29- ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc (
27+ ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_cg1 (
3028; CHECK_PTX64_SHARED32: {
3129; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>;
3230; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>;
3331; CHECK_PTX64_SHARED32-EMPTY:
3432; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
35- ; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_param_0 ];
36- ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_param_1 ];
33+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg1_param_0 ];
34+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg1_param_1 ];
3735; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.b32 [%rd1], %r1;
38- ; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1;
3936; CHECK_PTX64_SHARED32-NEXT: ret;
4037 call void @llvm.nvvm.tcgen05.alloc.cg1 (ptr %addr , i32 %ncols )
41- call void @llvm.nvvm.tcgen05.alloc.cg2 (ptr %addr , i32 %ncols )
38+ ret void
39+ }
4240
41+ define void @test_tcgen05_alloc_cg2 (ptr %addr , i32 %ncols ) {
42+ ; CHECK_PTX64-LABEL: test_tcgen05_alloc_cg2(
43+ ; CHECK_PTX64: {
44+ ; CHECK_PTX64-NEXT: .reg .b32 %r<2>;
45+ ; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
46+ ; CHECK_PTX64-EMPTY:
47+ ; CHECK_PTX64-NEXT: // %bb.0:
48+ ; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg2_param_0];
49+ ; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg2_param_1];
50+ ; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1;
51+ ; CHECK_PTX64-NEXT: ret;
52+ ;
53+ ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_cg2(
54+ ; CHECK_PTX64_SHARED32: {
55+ ; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>;
56+ ; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>;
57+ ; CHECK_PTX64_SHARED32-EMPTY:
58+ ; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
59+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg2_param_0];
60+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg2_param_1];
61+ ; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1;
62+ ; CHECK_PTX64_SHARED32-NEXT: ret;
63+ call void @llvm.nvvm.tcgen05.alloc.cg2 (ptr %addr , i32 %ncols )
4364 ret void
4465}
4566
46- ; CHECK-LABEL: test_tcgen05_alloc_shared
47- define void @test_tcgen05_alloc_shared (ptr addrspace (3 ) %addr , i32 %ncols ) {
48- ; CHECK_PTX64-LABEL: test_tcgen05_alloc_shared(
67+ define void @test_tcgen05_alloc_shared_cg1 (ptr addrspace (3 ) %addr , i32 %ncols ) {
68+ ; CHECK_PTX64-LABEL: test_tcgen05_alloc_shared_cg1(
4969; CHECK_PTX64: {
5070; CHECK_PTX64-NEXT: .reg .b32 %r<2>;
5171; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
5272; CHECK_PTX64-EMPTY:
5373; CHECK_PTX64-NEXT: // %bb.0:
54- ; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_shared_param_0 ];
55- ; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_param_1 ];
74+ ; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_shared_cg1_param_0 ];
75+ ; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg1_param_1 ];
5676; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.shared::cta.b32 [%rd1], %r1;
57- ; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%rd1], %r1;
5877; CHECK_PTX64-NEXT: ret;
5978;
60- ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_shared (
79+ ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_shared_cg1 (
6180; CHECK_PTX64_SHARED32: {
6281; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>;
6382; CHECK_PTX64_SHARED32-EMPTY:
6483; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
65- ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_param_0 ];
66- ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_alloc_shared_param_1 ];
84+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg1_param_0 ];
85+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_alloc_shared_cg1_param_1 ];
6786; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.shared::cta.b32 [%r1], %r2;
68- ; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%r1], %r2;
6987; CHECK_PTX64_SHARED32-NEXT: ret;
7088 call void @llvm.nvvm.tcgen05.alloc.shared.cg1 (ptr addrspace (3 ) %addr , i32 %ncols )
89+ ret void
90+ }
7191
92+ define void @test_tcgen05_alloc_shared_cg2 (ptr addrspace (3 ) %addr , i32 %ncols ) {
93+ ; CHECK_PTX64-LABEL: test_tcgen05_alloc_shared_cg2(
94+ ; CHECK_PTX64: {
95+ ; CHECK_PTX64-NEXT: .reg .b32 %r<2>;
96+ ; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
97+ ; CHECK_PTX64-EMPTY:
98+ ; CHECK_PTX64-NEXT: // %bb.0:
99+ ; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_shared_cg2_param_0];
100+ ; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg2_param_1];
101+ ; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%rd1], %r1;
102+ ; CHECK_PTX64-NEXT: ret;
103+ ;
104+ ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_shared_cg2(
105+ ; CHECK_PTX64_SHARED32: {
106+ ; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>;
107+ ; CHECK_PTX64_SHARED32-EMPTY:
108+ ; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
109+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg2_param_0];
110+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_alloc_shared_cg2_param_1];
111+ ; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%r1], %r2;
112+ ; CHECK_PTX64_SHARED32-NEXT: ret;
72113 call void @llvm.nvvm.tcgen05.alloc.shared.cg2 (ptr addrspace (3 ) %addr , i32 %ncols )
73114 ret void
74115}
75116
76117declare void @llvm.nvvm.tcgen05.dealloc.cg1 (ptr addrspace (6 ) %tmem_addr , i32 %ncols )
77118declare void @llvm.nvvm.tcgen05.dealloc.cg2 (ptr addrspace (6 ) %tmem_addr , i32 %ncols )
78119
79- ; CHECK-LABEL: test_tcgen05_dealloc
80- define void @test_tcgen05_dealloc (ptr addrspace (6 ) %tmem_addr , i32 %ncols ) {
81- ; CHECK_PTX64-LABEL: test_tcgen05_dealloc(
120+ define void @test_tcgen05_dealloc_cg1 (ptr addrspace (6 ) %tmem_addr , i32 %ncols ) {
121+ ; CHECK_PTX64-LABEL: test_tcgen05_dealloc_cg1(
82122; CHECK_PTX64: {
83123; CHECK_PTX64-NEXT: .reg .b32 %r<3>;
84124; CHECK_PTX64-EMPTY:
85125; CHECK_PTX64-NEXT: // %bb.0:
86- ; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_param_0 ];
87- ; CHECK_PTX64-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_param_1 ];
126+ ; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg1_param_0 ];
127+ ; CHECK_PTX64-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg1_param_1 ];
88128; CHECK_PTX64-NEXT: tcgen05.dealloc.cta_group::1.sync.aligned.b32 %r1, %r2;
89- ; CHECK_PTX64-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2;
90129; CHECK_PTX64-NEXT: ret;
91130;
92- ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_dealloc (
131+ ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_dealloc_cg1 (
93132; CHECK_PTX64_SHARED32: {
94133; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>;
95134; CHECK_PTX64_SHARED32-EMPTY:
96135; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
97- ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_param_0 ];
98- ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_param_1 ];
136+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg1_param_0 ];
137+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg1_param_1 ];
99138; CHECK_PTX64_SHARED32-NEXT: tcgen05.dealloc.cta_group::1.sync.aligned.b32 %r1, %r2;
100- ; CHECK_PTX64_SHARED32-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2;
101139; CHECK_PTX64_SHARED32-NEXT: ret;
102140 call void @llvm.nvvm.tcgen05.dealloc.cg1 (ptr addrspace (6 ) %tmem_addr , i32 %ncols )
141+ ret void
142+ }
103143
144+ define void @test_tcgen05_dealloc_cg2 (ptr addrspace (6 ) %tmem_addr , i32 %ncols ) {
145+ ; CHECK_PTX64-LABEL: test_tcgen05_dealloc_cg2(
146+ ; CHECK_PTX64: {
147+ ; CHECK_PTX64-NEXT: .reg .b32 %r<3>;
148+ ; CHECK_PTX64-EMPTY:
149+ ; CHECK_PTX64-NEXT: // %bb.0:
150+ ; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg2_param_0];
151+ ; CHECK_PTX64-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg2_param_1];
152+ ; CHECK_PTX64-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2;
153+ ; CHECK_PTX64-NEXT: ret;
154+ ;
155+ ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_dealloc_cg2(
156+ ; CHECK_PTX64_SHARED32: {
157+ ; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>;
158+ ; CHECK_PTX64_SHARED32-EMPTY:
159+ ; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
160+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg2_param_0];
161+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg2_param_1];
162+ ; CHECK_PTX64_SHARED32-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2;
163+ ; CHECK_PTX64_SHARED32-NEXT: ret;
104164 call void @llvm.nvvm.tcgen05.dealloc.cg2 (ptr addrspace (6 ) %tmem_addr , i32 %ncols )
105165 ret void
106166}
107167
108168declare void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg1 ()
109169declare void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg2 ()
110170
111- ; CHECK-LABEL: test_tcgen05_relinquish_alloc_permit
112- define void @test_tcgen05_relinquish_alloc_permit () {
113- ; CHECK_PTX64-LABEL: test_tcgen05_relinquish_alloc_permit(
171+ define void @test_tcgen05_relinquish_alloc_permit_cg1 () {
172+ ; CHECK_PTX64-LABEL: test_tcgen05_relinquish_alloc_permit_cg1(
114173; CHECK_PTX64: {
115174; CHECK_PTX64-EMPTY:
116175; CHECK_PTX64-EMPTY:
117176; CHECK_PTX64-NEXT: // %bb.0:
118177; CHECK_PTX64-NEXT: tcgen05.relinquish_alloc_permit.cta_group::1.sync.aligned;
119- ; CHECK_PTX64-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned;
120178; CHECK_PTX64-NEXT: ret;
121179;
122- ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_relinquish_alloc_permit (
180+ ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_relinquish_alloc_permit_cg1 (
123181; CHECK_PTX64_SHARED32: {
124182; CHECK_PTX64_SHARED32-EMPTY:
125183; CHECK_PTX64_SHARED32-EMPTY:
126184; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
127185; CHECK_PTX64_SHARED32-NEXT: tcgen05.relinquish_alloc_permit.cta_group::1.sync.aligned;
128- ; CHECK_PTX64_SHARED32-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned;
129186; CHECK_PTX64_SHARED32-NEXT: ret;
130187 call void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg1 ()
188+ ret void
189+ }
131190
191+ define void @test_tcgen05_relinquish_alloc_permit_cg2 () {
192+ ; CHECK_PTX64-LABEL: test_tcgen05_relinquish_alloc_permit_cg2(
193+ ; CHECK_PTX64: {
194+ ; CHECK_PTX64-EMPTY:
195+ ; CHECK_PTX64-EMPTY:
196+ ; CHECK_PTX64-NEXT: // %bb.0:
197+ ; CHECK_PTX64-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned;
198+ ; CHECK_PTX64-NEXT: ret;
199+ ;
200+ ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_relinquish_alloc_permit_cg2(
201+ ; CHECK_PTX64_SHARED32: {
202+ ; CHECK_PTX64_SHARED32-EMPTY:
203+ ; CHECK_PTX64_SHARED32-EMPTY:
204+ ; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
205+ ; CHECK_PTX64_SHARED32-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned;
206+ ; CHECK_PTX64_SHARED32-NEXT: ret;
132207 call void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg2 ()
133208 ret void
134209}
0 commit comments