Skip to content

Commit 415f512

Browse files
committed
AMDGPU: Stop using aligned VGPR classes for addRegisterClass
This is unnecessary. At use emission time, InstrEmitter will use the common subclass of the value type's register class and the use instruction register classes. This removes one of the obstacles to treating special case instructions that do not have the alignment requirement overly conservatively.
1 parent 77b6693 commit 415f512

File tree

2 files changed

+24
-22
lines changed

2 files changed

+24
-22
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -110,52 +110,52 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
110110
addRegisterClass(MVT::Untyped, V64RegClass);
111111

112112
addRegisterClass(MVT::v3i32, &AMDGPU::SGPR_96RegClass);
113-
addRegisterClass(MVT::v3f32, TRI->getVGPRClassForBitWidth(96));
113+
addRegisterClass(MVT::v3f32, &AMDGPU::VReg_96RegClass);
114114

115115
addRegisterClass(MVT::v2i64, &AMDGPU::SGPR_128RegClass);
116116
addRegisterClass(MVT::v2f64, &AMDGPU::SGPR_128RegClass);
117117

118118
addRegisterClass(MVT::v4i32, &AMDGPU::SGPR_128RegClass);
119-
addRegisterClass(MVT::v4f32, TRI->getVGPRClassForBitWidth(128));
119+
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
120120

121121
addRegisterClass(MVT::v5i32, &AMDGPU::SGPR_160RegClass);
122-
addRegisterClass(MVT::v5f32, TRI->getVGPRClassForBitWidth(160));
122+
addRegisterClass(MVT::v5f32, &AMDGPU::VReg_160RegClass);
123123

124124
addRegisterClass(MVT::v6i32, &AMDGPU::SGPR_192RegClass);
125-
addRegisterClass(MVT::v6f32, TRI->getVGPRClassForBitWidth(192));
125+
addRegisterClass(MVT::v6f32, &AMDGPU::VReg_192RegClass);
126126

127127
addRegisterClass(MVT::v3i64, &AMDGPU::SGPR_192RegClass);
128-
addRegisterClass(MVT::v3f64, TRI->getVGPRClassForBitWidth(192));
128+
addRegisterClass(MVT::v3f64, &AMDGPU::VReg_192RegClass);
129129

130130
addRegisterClass(MVT::v7i32, &AMDGPU::SGPR_224RegClass);
131-
addRegisterClass(MVT::v7f32, TRI->getVGPRClassForBitWidth(224));
131+
addRegisterClass(MVT::v7f32, &AMDGPU::VReg_224RegClass);
132132

133133
addRegisterClass(MVT::v8i32, &AMDGPU::SGPR_256RegClass);
134-
addRegisterClass(MVT::v8f32, TRI->getVGPRClassForBitWidth(256));
134+
addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
135135

136136
addRegisterClass(MVT::v4i64, &AMDGPU::SGPR_256RegClass);
137-
addRegisterClass(MVT::v4f64, TRI->getVGPRClassForBitWidth(256));
137+
addRegisterClass(MVT::v4f64, &AMDGPU::VReg_256RegClass);
138138

139139
addRegisterClass(MVT::v9i32, &AMDGPU::SGPR_288RegClass);
140-
addRegisterClass(MVT::v9f32, TRI->getVGPRClassForBitWidth(288));
140+
addRegisterClass(MVT::v9f32, &AMDGPU::VReg_288RegClass);
141141

142142
addRegisterClass(MVT::v10i32, &AMDGPU::SGPR_320RegClass);
143-
addRegisterClass(MVT::v10f32, TRI->getVGPRClassForBitWidth(320));
143+
addRegisterClass(MVT::v10f32, &AMDGPU::VReg_320RegClass);
144144

145145
addRegisterClass(MVT::v11i32, &AMDGPU::SGPR_352RegClass);
146-
addRegisterClass(MVT::v11f32, TRI->getVGPRClassForBitWidth(352));
146+
addRegisterClass(MVT::v11f32, &AMDGPU::VReg_352RegClass);
147147

148148
addRegisterClass(MVT::v12i32, &AMDGPU::SGPR_384RegClass);
149-
addRegisterClass(MVT::v12f32, TRI->getVGPRClassForBitWidth(384));
149+
addRegisterClass(MVT::v12f32, &AMDGPU::VReg_384RegClass);
150150

151151
addRegisterClass(MVT::v16i32, &AMDGPU::SGPR_512RegClass);
152-
addRegisterClass(MVT::v16f32, TRI->getVGPRClassForBitWidth(512));
152+
addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
153153

154154
addRegisterClass(MVT::v8i64, &AMDGPU::SGPR_512RegClass);
155-
addRegisterClass(MVT::v8f64, TRI->getVGPRClassForBitWidth(512));
155+
addRegisterClass(MVT::v8f64, &AMDGPU::VReg_512RegClass);
156156

157157
addRegisterClass(MVT::v16i64, &AMDGPU::SGPR_1024RegClass);
158-
addRegisterClass(MVT::v16f64, TRI->getVGPRClassForBitWidth(1024));
158+
addRegisterClass(MVT::v16f64, &AMDGPU::VReg_1024RegClass);
159159

160160
if (Subtarget->has16BitInsts()) {
161161
if (Subtarget->useRealTrue16Insts()) {
@@ -187,7 +187,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
187187
}
188188

189189
addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
190-
addRegisterClass(MVT::v32f32, TRI->getVGPRClassForBitWidth(1024));
190+
addRegisterClass(MVT::v32f32, &AMDGPU::VReg_1024RegClass);
191191

192192
computeRegisterProperties(Subtarget->getRegisterInfo());
193193

llvm/test/CodeGen/AMDGPU/mfma-loop.ll

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2430,8 +2430,9 @@ define amdgpu_kernel void @test_mfma_nested_loop_zeroinit(ptr addrspace(1) %arg)
24302430
; GFX90A-NEXT: v_accvgpr_mov_b32 a29, a0
24312431
; GFX90A-NEXT: v_accvgpr_mov_b32 a30, a0
24322432
; GFX90A-NEXT: v_accvgpr_mov_b32 a31, a0
2433-
; GFX90A-NEXT: v_mov_b32_e32 v0, 2.0
2434-
; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
2433+
; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
2434+
; GFX90A-NEXT: v_mov_b32_e32 v1, 2.0
2435+
; GFX90A-NEXT: ; kill: def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 killed $exec
24352436
; GFX90A-NEXT: .LBB9_1: ; %for.cond.preheader
24362437
; GFX90A-NEXT: ; =>This Loop Header: Depth=1
24372438
; GFX90A-NEXT: ; Child Loop BB9_2 Depth 2
@@ -2440,7 +2441,7 @@ define amdgpu_kernel void @test_mfma_nested_loop_zeroinit(ptr addrspace(1) %arg)
24402441
; GFX90A-NEXT: ; Parent Loop BB9_1 Depth=1
24412442
; GFX90A-NEXT: ; => This Inner Loop Header: Depth=2
24422443
; GFX90A-NEXT: s_nop 0
2443-
; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
2444+
; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
24442445
; GFX90A-NEXT: s_add_i32 s1, s1, -1
24452446
; GFX90A-NEXT: s_cmp_lg_u32 s1, 0
24462447
; GFX90A-NEXT: s_cbranch_scc1 .LBB9_2
@@ -2500,8 +2501,9 @@ define amdgpu_kernel void @test_mfma_nested_loop_zeroinit(ptr addrspace(1) %arg)
25002501
; GFX942-NEXT: v_accvgpr_mov_b32 a29, a0
25012502
; GFX942-NEXT: v_accvgpr_mov_b32 a30, a0
25022503
; GFX942-NEXT: v_accvgpr_mov_b32 a31, a0
2503-
; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
2504-
; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
2504+
; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
2505+
; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
2506+
; GFX942-NEXT: ; kill: def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 killed $exec
25052507
; GFX942-NEXT: .LBB9_1: ; %for.cond.preheader
25062508
; GFX942-NEXT: ; =>This Loop Header: Depth=1
25072509
; GFX942-NEXT: ; Child Loop BB9_2 Depth 2
@@ -2510,7 +2512,7 @@ define amdgpu_kernel void @test_mfma_nested_loop_zeroinit(ptr addrspace(1) %arg)
25102512
; GFX942-NEXT: ; Parent Loop BB9_1 Depth=1
25112513
; GFX942-NEXT: ; => This Inner Loop Header: Depth=2
25122514
; GFX942-NEXT: s_nop 0
2513-
; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v1, v0, a[0:31]
2515+
; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
25142516
; GFX942-NEXT: s_add_i32 s1, s1, -1
25152517
; GFX942-NEXT: s_cmp_lg_u32 s1, 0
25162518
; GFX942-NEXT: s_cbranch_scc1 .LBB9_2

0 commit comments

Comments
 (0)