Skip to content

Commit b575811

Browse files
committed
Address review comments
1 parent 4f2c25e commit b575811

File tree

4 files changed

+59
-39
lines changed

4 files changed

+59
-39
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6724,16 +6724,18 @@ bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr(
67246724
return true;
67256725
}
67266726

6727-
bool AMDGPULegalizerInfo::legalizeTrapHsa(
6728-
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
6729-
if (!ST.requiresSimulatedTrap()) {
6730-
B.buildInstr(AMDGPU::S_TRAP)
6731-
.addImm(static_cast<unsigned>(GCNSubtarget::TrapID::LLVMAMDHSATrap));
6727+
bool AMDGPULegalizerInfo::legalizeTrapHsa(MachineInstr &MI,
6728+
MachineRegisterInfo &MRI,
6729+
MachineIRBuilder &B) const {
6730+
if (ST.requiresSimulatedTrap()) {
6731+
ST.getInstrInfo()->insertSimulatedTrap(MRI, B.getMBB(), MI,
6732+
MI.getDebugLoc());
67326733
MI.eraseFromParent();
67336734
return true;
67346735
}
67356736

6736-
ST.getInstrInfo()->insertSimulatedTrap(MRI, B.getMBB(), MI, MI.getDebugLoc());
6737+
B.buildInstr(AMDGPU::S_TRAP)
6738+
.addImm(static_cast<unsigned>(GCNSubtarget::TrapID::LLVMAMDHSATrap));
67376739
MI.eraseFromParent();
67386740
return true;
67396741
}

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "Utils/AMDGPUBaseInfo.h"
2424
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
2525
#include "llvm/Support/ErrorHandling.h"
26+
#include "llvm/TargetParser/TargetParser.h"
2627

2728
#define GET_SUBTARGETINFO_HEADER
2829
#include "AMDGPUGenSubtargetInfo.inc"
@@ -443,7 +444,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
443444
return isAmdHsaOS() ? TrapHandlerAbi::AMDHSA : TrapHandlerAbi::NONE;
444445
}
445446

446-
bool requiresSimulatedTrap() const { return getGeneration() == GFX11; }
447+
// True on hardware where 's_trap 2' is treated as a nop that must be
448+
// simulated.
449+
bool requiresSimulatedTrap() const {
450+
AMDGPU::IsaVersion V = AMDGPU::getIsaVersion(getCPU());
451+
return V.Major == 11 && V.Minor <= 3;
452+
}
447453

448454
bool supportsGetDoorbellID() const {
449455
// The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2035,6 +2035,9 @@ MachineBasicBlock *SIInstrInfo::insertSimulatedTrap(MachineRegisterInfo &MRI,
20352035
MachineBasicBlock *HaltLoop = MF->CreateMachineBasicBlock();
20362036
MF->push_back(HaltLoop);
20372037

2038+
constexpr unsigned DoorbellIDMask = 0x3ff;
2039+
constexpr unsigned ECQueueWaveAbort = 0x400;
2040+
20382041
// Start with a `s_trap 2`, if we're in PRIV=1 and we need the workaround this
20392042
// will be a nop.
20402043
BuildMI(MBB, MI, DL, get(AMDGPU::S_TRAP))
@@ -2044,15 +2047,16 @@ MachineBasicBlock *SIInstrInfo::insertSimulatedTrap(MachineRegisterInfo &MRI,
20442047
.addImm(AMDGPU::SendMsg::ID_RTN_GET_DOORBELL);
20452048
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
20462049
.addUse(AMDGPU::M0);
2047-
Register And0x3ff = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2048-
BuildMI(MBB, MI, DL, get(AMDGPU::S_AND_B32), And0x3ff)
2050+
Register DoorbellRegMasked =
2051+
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2052+
BuildMI(MBB, MI, DL, get(AMDGPU::S_AND_B32), DoorbellRegMasked)
20492053
.addUse(DoorbellReg)
2050-
.addImm(0x3ff);
2054+
.addImm(DoorbellIDMask);
20512055
Register SetWaveAbortBit =
20522056
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
20532057
BuildMI(MBB, MI, DL, get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2054-
.addUse(And0x3ff)
2055-
.addImm(0x400);
2058+
.addUse(DoorbellRegMasked)
2059+
.addImm(ECQueueWaveAbort);
20562060
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), AMDGPU::M0)
20572061
.addUse(SetWaveAbortBit);
20582062
BuildMI(MBB, MI, DL, get(AMDGPU::S_SENDMSG))

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trap-gfx11.mir

Lines changed: 35 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,44 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
2-
# RUN: llc -global-isel=1 -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -o - -run-pass=legalizer %s | FileCheck -check-prefix=GCN %s
2+
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -o - -run-pass=legalizer %s | FileCheck -check-prefix=GFX1100 %s
3+
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1150 -o - -run-pass=legalizer %s | FileCheck -check-prefix=GFX1150 %s
34

45
---
56
name: test_trap
67
body: |
78
bb.0:
8-
; GCN-LABEL: name: test_trap
9-
; GCN: successors: %bb.2(0x80000000)
10-
; GCN-NEXT: {{ $}}
11-
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
12-
; GCN-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
13-
; GCN-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1)
14-
; GCN-NEXT: S_TRAP 2
15-
; GCN-NEXT: [[S_SENDMSG_RTN_B32_:%[0-9]+]]:sreg_32 = S_SENDMSG_RTN_B32 128
16-
; GCN-NEXT: $ttmp2 = S_MOV_B32 $m0
17-
; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_SENDMSG_RTN_B32_]], 1023, implicit-def $scc
18-
; GCN-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_AND_B32_]], 1024, implicit-def $scc
19-
; GCN-NEXT: $m0 = S_MOV_B32 [[S_OR_B32_]]
20-
; GCN-NEXT: S_SENDMSG 1, implicit $exec, implicit $m0
21-
; GCN-NEXT: $m0 = S_MOV_B32 $ttmp2
22-
; GCN-NEXT: S_BRANCH %bb.2
23-
; GCN-NEXT: {{ $}}
24-
; GCN-NEXT: .1:
25-
; GCN-NEXT: successors:
26-
; GCN-NEXT: {{ $}}
27-
; GCN-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1)
28-
; GCN-NEXT: {{ $}}
29-
; GCN-NEXT: .2:
30-
; GCN-NEXT: successors: %bb.2(0x80000000)
31-
; GCN-NEXT: {{ $}}
32-
; GCN-NEXT: S_SETHALT 5
33-
; GCN-NEXT: S_BRANCH %bb.2
9+
; GFX1100-LABEL: name: test_trap
10+
; GFX1100: successors: %bb.2(0x80000000)
11+
; GFX1100-NEXT: {{ $}}
12+
; GFX1100-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
13+
; GFX1100-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
14+
; GFX1100-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1)
15+
; GFX1100-NEXT: S_TRAP 2
16+
; GFX1100-NEXT: [[S_SENDMSG_RTN_B32_:%[0-9]+]]:sreg_32 = S_SENDMSG_RTN_B32 128
17+
; GFX1100-NEXT: $ttmp2 = S_MOV_B32 $m0
18+
; GFX1100-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_SENDMSG_RTN_B32_]], 1023, implicit-def $scc
19+
; GFX1100-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_AND_B32_]], 1024, implicit-def $scc
20+
; GFX1100-NEXT: $m0 = S_MOV_B32 [[S_OR_B32_]]
21+
; GFX1100-NEXT: S_SENDMSG 1, implicit $exec, implicit $m0
22+
; GFX1100-NEXT: $m0 = S_MOV_B32 $ttmp2
23+
; GFX1100-NEXT: S_BRANCH %bb.2
24+
; GFX1100-NEXT: {{ $}}
25+
; GFX1100-NEXT: .1:
26+
; GFX1100-NEXT: successors:
27+
; GFX1100-NEXT: {{ $}}
28+
; GFX1100-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1)
29+
; GFX1100-NEXT: {{ $}}
30+
; GFX1100-NEXT: .2:
31+
; GFX1100-NEXT: successors: %bb.2(0x80000000)
32+
; GFX1100-NEXT: {{ $}}
33+
; GFX1100-NEXT: S_SETHALT 5
34+
; GFX1100-NEXT: S_BRANCH %bb.2
35+
;
36+
; GFX1150-LABEL: name: test_trap
37+
; GFX1150: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
38+
; GFX1150-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
39+
; GFX1150-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1)
40+
; GFX1150-NEXT: S_TRAP 2
41+
; GFX1150-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1)
3442
%0:_(s8) = G_CONSTANT i8 0
3543
%1:_(p1) = G_CONSTANT i64 0
3644
G_STORE %0, %1 :: (store 1, addrspace 1)

0 commit comments

Comments
 (0)