Skip to content

Commit eb3bb42

Browse files
committed
AMDGPU: Stop using aligned VGPR classes for addRegisterClass
This is unnecessary. At use emission time, InstrEmitter will use the common subclass of the value type's register class and the use instruction register classes. This removes one of the obstacles to treating special case instructions that do not have the alignment requirement overly conservatively.
1 parent 5b4d86d commit eb3bb42

File tree

2 files changed

+24
-22
lines changed

2 files changed

+24
-22
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -111,52 +111,52 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
111111
addRegisterClass(MVT::Untyped, V64RegClass);
112112

113113
addRegisterClass(MVT::v3i32, &AMDGPU::SGPR_96RegClass);
114-
addRegisterClass(MVT::v3f32, TRI->getVGPRClassForBitWidth(96));
114+
addRegisterClass(MVT::v3f32, &AMDGPU::VReg_96RegClass);
115115

116116
addRegisterClass(MVT::v2i64, &AMDGPU::SGPR_128RegClass);
117117
addRegisterClass(MVT::v2f64, &AMDGPU::SGPR_128RegClass);
118118

119119
addRegisterClass(MVT::v4i32, &AMDGPU::SGPR_128RegClass);
120-
addRegisterClass(MVT::v4f32, TRI->getVGPRClassForBitWidth(128));
120+
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
121121

122122
addRegisterClass(MVT::v5i32, &AMDGPU::SGPR_160RegClass);
123-
addRegisterClass(MVT::v5f32, TRI->getVGPRClassForBitWidth(160));
123+
addRegisterClass(MVT::v5f32, &AMDGPU::VReg_160RegClass);
124124

125125
addRegisterClass(MVT::v6i32, &AMDGPU::SGPR_192RegClass);
126-
addRegisterClass(MVT::v6f32, TRI->getVGPRClassForBitWidth(192));
126+
addRegisterClass(MVT::v6f32, &AMDGPU::VReg_192RegClass);
127127

128128
addRegisterClass(MVT::v3i64, &AMDGPU::SGPR_192RegClass);
129-
addRegisterClass(MVT::v3f64, TRI->getVGPRClassForBitWidth(192));
129+
addRegisterClass(MVT::v3f64, &AMDGPU::VReg_192RegClass);
130130

131131
addRegisterClass(MVT::v7i32, &AMDGPU::SGPR_224RegClass);
132-
addRegisterClass(MVT::v7f32, TRI->getVGPRClassForBitWidth(224));
132+
addRegisterClass(MVT::v7f32, &AMDGPU::VReg_224RegClass);
133133

134134
addRegisterClass(MVT::v8i32, &AMDGPU::SGPR_256RegClass);
135-
addRegisterClass(MVT::v8f32, TRI->getVGPRClassForBitWidth(256));
135+
addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
136136

137137
addRegisterClass(MVT::v4i64, &AMDGPU::SGPR_256RegClass);
138-
addRegisterClass(MVT::v4f64, TRI->getVGPRClassForBitWidth(256));
138+
addRegisterClass(MVT::v4f64, &AMDGPU::VReg_256RegClass);
139139

140140
addRegisterClass(MVT::v9i32, &AMDGPU::SGPR_288RegClass);
141-
addRegisterClass(MVT::v9f32, TRI->getVGPRClassForBitWidth(288));
141+
addRegisterClass(MVT::v9f32, &AMDGPU::VReg_288RegClass);
142142

143143
addRegisterClass(MVT::v10i32, &AMDGPU::SGPR_320RegClass);
144-
addRegisterClass(MVT::v10f32, TRI->getVGPRClassForBitWidth(320));
144+
addRegisterClass(MVT::v10f32, &AMDGPU::VReg_320RegClass);
145145

146146
addRegisterClass(MVT::v11i32, &AMDGPU::SGPR_352RegClass);
147-
addRegisterClass(MVT::v11f32, TRI->getVGPRClassForBitWidth(352));
147+
addRegisterClass(MVT::v11f32, &AMDGPU::VReg_352RegClass);
148148

149149
addRegisterClass(MVT::v12i32, &AMDGPU::SGPR_384RegClass);
150-
addRegisterClass(MVT::v12f32, TRI->getVGPRClassForBitWidth(384));
150+
addRegisterClass(MVT::v12f32, &AMDGPU::VReg_384RegClass);
151151

152152
addRegisterClass(MVT::v16i32, &AMDGPU::SGPR_512RegClass);
153-
addRegisterClass(MVT::v16f32, TRI->getVGPRClassForBitWidth(512));
153+
addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
154154

155155
addRegisterClass(MVT::v8i64, &AMDGPU::SGPR_512RegClass);
156-
addRegisterClass(MVT::v8f64, TRI->getVGPRClassForBitWidth(512));
156+
addRegisterClass(MVT::v8f64, &AMDGPU::VReg_512RegClass);
157157

158158
addRegisterClass(MVT::v16i64, &AMDGPU::SGPR_1024RegClass);
159-
addRegisterClass(MVT::v16f64, TRI->getVGPRClassForBitWidth(1024));
159+
addRegisterClass(MVT::v16f64, &AMDGPU::VReg_1024RegClass);
160160

161161
if (Subtarget->has16BitInsts()) {
162162
if (Subtarget->useRealTrue16Insts()) {
@@ -188,7 +188,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
188188
}
189189

190190
addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
191-
addRegisterClass(MVT::v32f32, TRI->getVGPRClassForBitWidth(1024));
191+
addRegisterClass(MVT::v32f32, &AMDGPU::VReg_1024RegClass);
192192

193193
computeRegisterProperties(Subtarget->getRegisterInfo());
194194

llvm/test/CodeGen/AMDGPU/mfma-loop.ll

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2399,8 +2399,9 @@ define amdgpu_kernel void @test_mfma_nested_loop_zeroinit(ptr addrspace(1) %arg)
23992399
; GFX90A-NEXT: v_accvgpr_mov_b32 a29, a0
24002400
; GFX90A-NEXT: v_accvgpr_mov_b32 a30, a0
24012401
; GFX90A-NEXT: v_accvgpr_mov_b32 a31, a0
2402-
; GFX90A-NEXT: v_mov_b32_e32 v0, 2.0
2403-
; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
2402+
; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
2403+
; GFX90A-NEXT: v_mov_b32_e32 v1, 2.0
2404+
; GFX90A-NEXT: ; kill: def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 killed $exec
24042405
; GFX90A-NEXT: .LBB9_1: ; %for.cond.preheader
24052406
; GFX90A-NEXT: ; =>This Loop Header: Depth=1
24062407
; GFX90A-NEXT: ; Child Loop BB9_2 Depth 2
@@ -2409,7 +2410,7 @@ define amdgpu_kernel void @test_mfma_nested_loop_zeroinit(ptr addrspace(1) %arg)
24092410
; GFX90A-NEXT: ; Parent Loop BB9_1 Depth=1
24102411
; GFX90A-NEXT: ; => This Inner Loop Header: Depth=2
24112412
; GFX90A-NEXT: s_nop 0
2412-
; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
2413+
; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
24132414
; GFX90A-NEXT: s_add_i32 s1, s1, -1
24142415
; GFX90A-NEXT: s_cmp_lg_u32 s1, 0
24152416
; GFX90A-NEXT: s_cbranch_scc1 .LBB9_2
@@ -2468,8 +2469,9 @@ define amdgpu_kernel void @test_mfma_nested_loop_zeroinit(ptr addrspace(1) %arg)
24682469
; GFX942-NEXT: v_accvgpr_mov_b32 a29, a0
24692470
; GFX942-NEXT: v_accvgpr_mov_b32 a30, a0
24702471
; GFX942-NEXT: v_accvgpr_mov_b32 a31, a0
2471-
; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
2472-
; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
2472+
; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
2473+
; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
2474+
; GFX942-NEXT: ; kill: def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 killed $exec
24732475
; GFX942-NEXT: .LBB9_1: ; %for.cond.preheader
24742476
; GFX942-NEXT: ; =>This Loop Header: Depth=1
24752477
; GFX942-NEXT: ; Child Loop BB9_2 Depth 2
@@ -2478,7 +2480,7 @@ define amdgpu_kernel void @test_mfma_nested_loop_zeroinit(ptr addrspace(1) %arg)
24782480
; GFX942-NEXT: ; Parent Loop BB9_1 Depth=1
24792481
; GFX942-NEXT: ; => This Inner Loop Header: Depth=2
24802482
; GFX942-NEXT: s_nop 0
2481-
; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v1, v0, a[0:31]
2483+
; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
24822484
; GFX942-NEXT: s_add_i32 s1, s1, -1
24832485
; GFX942-NEXT: s_cmp_lg_u32 s1, 0
24842486
; GFX942-NEXT: s_cbranch_scc1 .LBB9_2

0 commit comments

Comments
 (0)