Skip to content

Commit 207cd5f

Browse files
committed
[AMDGPU] Add the SGPR used for FP copy to block livein lists.
The temporary register used for FP copy should be live throughout the function.
1 parent 10ff24d commit 207cd5f

File tree

2 files changed

+48
-0
lines changed

2 files changed

+48
-0
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,9 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
614614
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy)
615615
.addReg(FramePtrReg)
616616
.setMIFlag(MachineInstr::FrameSetup);
617+
// Make the register live throughout the function.
618+
for (MachineBasicBlock &MBB : MF)
619+
MBB.addLiveIn(FuncInfo->SGPRForFPSaveRestoreCopy);
617620
}
618621

619622
for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; RUN: llc -march=amdgcn -mcpu=gfx900 -stop-after=prologepilog -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2+
3+
; It is a small loop test that iterates over the array member of the structure argument passed byval to the function.
4+
; The loop code will keep the prologue and epilogue blocks apart.
5+
; The test is primarily to check the temp register used to preserve the earlier FP value
6+
; is live-in at every BB in the function.
7+
8+
%struct.Data = type { [20 x i32] }
9+
10+
define i32 @fp_save_restore_in_temp_sgpr(%struct.Data addrspace(5)* nocapture readonly byval(%struct.Data) align 4 %arg) #0 {
11+
; GCN-LABEL: name: fp_save_restore_in_temp_sgpr
12+
; GCN: bb.0.begin:
13+
; GCN: liveins: $sgpr30_sgpr31, $sgpr7
14+
; GCN: $sgpr7 = frame-setup COPY $sgpr33
15+
; GCN: $sgpr33 = frame-setup COPY $sgpr32
16+
; GCN: bb.1.lp_end:
17+
; GCN: liveins: $sgpr6, $vgpr1, $sgpr4_sgpr5, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31, $sgpr7
18+
; GCN: bb.2.lp_begin:
19+
; GCN: liveins: $sgpr6, $vgpr1, $sgpr4_sgpr5, $sgpr8_sgpr9, $sgpr30_sgpr31, $sgpr7
20+
; GCN: bb.3.Flow:
21+
; GCN: liveins: $sgpr6, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31, $sgpr7
22+
; GCN: bb.4.end:
23+
; GCN: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr30_sgpr31, $sgpr7
24+
; GCN: $sgpr33 = frame-setup COPY $sgpr7
25+
begin:
26+
br label %lp_begin
27+
28+
lp_end: ; preds = %lp_begin
29+
%cur_idx = add nuw nsw i32 %idx, 1
30+
%lp_term_cond = icmp eq i32 %cur_idx, 20
31+
br i1 %lp_term_cond, label %end, label %lp_begin
32+
33+
lp_begin: ; preds = %lp_end, %begin
34+
%idx = phi i32 [ 0, %begin ], [ %cur_idx, %lp_end ]
35+
%ptr = getelementptr inbounds %struct.Data, %struct.Data addrspace(5)* %arg, i32 0, i32 0, i32 %idx
36+
%data = load i32, i32 addrspace(5)* %ptr, align 4
37+
%data_cmp = icmp eq i32 %data, %idx
38+
br i1 %data_cmp, label %lp_end, label %end
39+
40+
end: ; preds = %lp_end, %lp_begin
41+
%ret_val = phi i32 [ 0, %lp_begin ], [ 1, %lp_end ]
42+
ret i32 %ret_val
43+
}
44+
45+
attributes #0 = { norecurse nounwind "frame-pointer"="all" }

0 commit comments

Comments
 (0)