Skip to content

Commit adbca59

Browse files
committed
[IR] Add CallBr intrinsics support
This commit adds support for using intrinsics with callbr. The uses of this will most of the time look like this example: ```llvm callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill] kill: unreachable cont: ... ```
1 parent 77d655b commit adbca59

File tree

9 files changed

+313
-34
lines changed

9 files changed

+313
-34
lines changed

llvm/docs/LangRef.rst

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9788,8 +9788,12 @@ The '``callbr``' instruction causes control to transfer to a specified
97889788
function, with the possibility of control flow transfer to either the
97899789
'``fallthrough``' label or one of the '``indirect``' labels.
97909790

9791-
This instruction should only be used to implement the "goto" feature of gcc
9792-
style inline assembly. Any other usage is an error in the IR verifier.
9791+
This instruction can currently only be used
9792+
9793+
#. to implement the "goto" feature of gcc style inline assembly or
9794+
#. to call selected intrinsics.
9795+
9796+
Any other usage is an error in the IR verifier.
97939797

97949798
Note that in order to support outputs along indirect edges, LLVM may need to
97959799
split critical edges, which may require synthesizing a replacement block for
@@ -9838,7 +9842,7 @@ This instruction requires several arguments:
98389842
indicates the function accepts a variable number of arguments, the
98399843
extra arguments can be specified.
98409844
#. '``fallthrough label``': the label reached when the inline assembly's
9841-
execution exits the bottom.
9845+
execution exits the bottom / the intrinsic call returns.
98429846
#. '``indirect labels``': the labels reached when a callee transfers control
98439847
to a location other than the '``fallthrough label``'. Label constraints
98449848
refer to these destinations.
@@ -9856,9 +9860,12 @@ flow goes after the call.
98569860
The output values of a '``callbr``' instruction are available both in the
98579861
the '``fallthrough``' block, and any '``indirect``' blocks(s).
98589862

9859-
The only use of this today is to implement the "goto" feature of gcc inline
9860-
assembly where additional labels can be provided as locations for the inline
9861-
assembly to jump to.
9863+
The only current uses of this are:
9864+
9865+
#. implement the "goto" feature of gcc inline assembly where additional
9866+
labels can be provided as locations for the inline assembly to jump to.
9867+
#. support selected intrinsics which manipulate control flow and should
9868+
be chained to specific terminators, such as '``unreachable``'.
98629869

98639870
Example:
98649871
""""""""
@@ -9873,6 +9880,14 @@ Example:
98739880
<result> = callbr i32 asm "", "=r,r,!i"(i32 %x)
98749881
to label %fallthrough [label %indirect]
98759882

9883+
; intrinsic which should be followed by unreachable (the order of the
9884+
; blocks after the callbr instruction doesn't matter)
9885+
callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
9886+
cont:
9887+
...
9888+
kill:
9889+
unreachable
9890+
98769891
.. _i_resume:
98779892

98789893
'``resume``' Instruction

llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,8 @@ class IRTranslator : public MachineFunctionPass {
317317
bool translateInvoke(const User &U, MachineIRBuilder &MIRBuilder);
318318

319319
bool translateCallBr(const User &U, MachineIRBuilder &MIRBuilder);
320+
bool translateCallBrIntrinsic(const CallBrInst &I,
321+
MachineIRBuilder &MIRBuilder);
320322

321323
bool translateLandingPad(const User &U, MachineIRBuilder &MIRBuilder);
322324

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2824,7 +2824,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
28242824
IsTgtMemIntrinsic ? &Info : nullptr);
28252825
}
28262826

2827-
/// Translate a call to an intrinsic.
2827+
/// Translate a call or callbr to an intrinsic.
28282828
/// Depending on whether TLI->getTgtMemIntrinsic() is true, TgtMemIntrinsicInfo
28292829
/// is a pointer to the correspondingly populated IntrinsicInfo object.
28302830
/// Otherwise, this pointer is null.
@@ -3052,10 +3052,39 @@ bool IRTranslator::translateInvoke(const User &U,
30523052
return true;
30533053
}
30543054

3055+
/// The intrinsics currently supported by callbr are implicit control flow
3056+
/// intrinsics such as amdgcn.kill.
30553057
bool IRTranslator::translateCallBr(const User &U,
30563058
MachineIRBuilder &MIRBuilder) {
3057-
// FIXME: Implement this.
3058-
return false;
3059+
if (containsBF16Type(U))
3060+
return false; // see translateCall
3061+
3062+
const CallBrInst &I = cast<CallBrInst>(U);
3063+
MachineBasicBlock *CallBrMBB = &MIRBuilder.getMBB();
3064+
3065+
// FIXME: inline asm is not yet supported for callbr in GlobalISel. As soon as
3066+
// we add support, we need to handle the indirect asm targets, see
3067+
// SelectionDAGBuilder::visitCallBr().
3068+
Intrinsic::ID IID = I.getIntrinsicID();
3069+
if (I.isInlineAsm())
3070+
return false;
3071+
if (!translateIntrinsic(I, IID, MIRBuilder))
3072+
return false;
3073+
3074+
// Retrieve successors.
3075+
SmallPtrSet<BasicBlock *, 8> Dests = {I.getDefaultDest()};
3076+
MachineBasicBlock *Return = &getMBB(*I.getDefaultDest());
3077+
3078+
// Update successor info.
3079+
addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
3080+
// TODO: For most of the cases where there is an intrinsic callbr, we're
3081+
// having exactly one indirect target, which will be unreachable. As soon as
3082+
// this changes, we might need to enhance
3083+
// Target->setIsInlineAsmBrIndirectTarget or add something similar for
3084+
// intrinsic indirect branches.
3085+
CallBrMBB->normalizeSuccProbs();
3086+
3087+
return true;
30593088
}
30603089

30613090
bool IRTranslator::translateLandingPad(const User &U,

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 49 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3507,16 +3507,39 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
35073507
DAG.getBasicBlock(Return)));
35083508
}
35093509

3510+
/// The intrinsics currently supported by callbr are implicit control flow
3511+
/// intrinsics such as amdgcn.kill.
3512+
/// - they should be called (no "dontcall-" attributes)
3513+
/// - they do not touch memory on the target (= !TLI.getTgtMemIntrinsic())
3514+
/// - they do not need custom argument handling (no
3515+
/// TLI.CollectTargetIntrinsicOperands())
3516+
void SelectionDAGBuilder::visitCallBrIntrinsic(const CallBrInst &I) {
3517+
auto [HasChain, OnlyLoad] = getTargetIntrinsicCallProperties(I);
3518+
3519+
SmallVector<SDValue, 8> Ops =
3520+
getTargetIntrinsicOperands(I, HasChain, OnlyLoad);
3521+
SDVTList VTs = getTargetIntrinsicVTList(I, HasChain);
3522+
3523+
// Create the node.
3524+
SDValue Result =
3525+
getTargetNonMemIntrinsicNode(*I.getType(), HasChain, Ops, VTs);
3526+
Result = handleTargetIntrinsicRet(I, HasChain, OnlyLoad, Result);
3527+
3528+
setValue(&I, Result);
3529+
}
3530+
35103531
void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
35113532
MachineBasicBlock *CallBrMBB = FuncInfo.MBB;
35123533

3513-
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
3514-
// have to do anything here to lower funclet bundles.
3515-
failForInvalidBundles(I, "callbrs",
3516-
{LLVMContext::OB_deopt, LLVMContext::OB_funclet});
3517-
3518-
assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr");
3519-
visitInlineAsm(I);
3534+
if (I.isInlineAsm()) {
3535+
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
3536+
// have to do anything here to lower funclet bundles.
3537+
failForInvalidBundles(I, "callbrs",
3538+
{LLVMContext::OB_deopt, LLVMContext::OB_funclet});
3539+
visitInlineAsm(I);
3540+
} else {
3541+
visitCallBrIntrinsic(I);
3542+
}
35203543
CopyToExportRegsIfNeeded(&I);
35213544

35223545
// Retrieve successors.
@@ -3526,18 +3549,25 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
35263549

35273550
// Update successor info.
35283551
addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
3529-
for (BasicBlock *Dest : I.getIndirectDests()) {
3530-
MachineBasicBlock *Target = FuncInfo.getMBB(Dest);
3531-
Target->setIsInlineAsmBrIndirectTarget();
3532-
// If we introduce a type of asm goto statement that is permitted to use an
3533-
// indirect call instruction to jump to its labels, then we should add a
3534-
// call to Target->setMachineBlockAddressTaken() here, to mark the target
3535-
// block as requiring a BTI.
3536-
3537-
Target->setLabelMustBeEmitted();
3538-
// Don't add duplicate machine successors.
3539-
if (Dests.insert(Dest).second)
3540-
addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
3552+
// TODO: For most of the cases where there is an intrinsic callbr, we're
3553+
// having exactly one indirect target, which will be unreachable. As soon as
3554+
// this changes, we might need to enhance
3555+
// Target->setIsInlineAsmBrIndirectTarget or add something similar for
3556+
// intrinsic indirect branches.
3557+
if (I.isInlineAsm()) {
3558+
for (BasicBlock *Dest : I.getIndirectDests()) {
3559+
MachineBasicBlock *Target = FuncInfo.getMBB(Dest);
3560+
Target->setIsInlineAsmBrIndirectTarget();
3561+
// If we introduce a type of asm goto statement that is permitted to use
3562+
// an indirect call instruction to jump to its labels, then we should add
3563+
// a call to Target->setMachineBlockAddressTaken() here, to mark the
3564+
// target block as requiring a BTI.
3565+
3566+
Target->setLabelMustBeEmitted();
3567+
// Don't add duplicate machine successors.
3568+
if (Dests.insert(Dest).second)
3569+
addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
3570+
}
35413571
}
35423572
CallBrMBB->normalizeSuccProbs();
35433573

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -551,10 +551,12 @@ class SelectionDAGBuilder {
551551
private:
552552
// These all get lowered before this pass.
553553
void visitInvoke(const InvokeInst &I);
554-
void visitCallBr(const CallBrInst &I);
555554
void visitCallBrLandingPad(const CallInst &I);
556555
void visitResume(const ResumeInst &I);
557556

557+
void visitCallBr(const CallBrInst &I);
558+
void visitCallBrIntrinsic(const CallBrInst &I);
559+
558560
void visitUnary(const User &I, unsigned Opcode);
559561
void visitFNeg(const User &I) { visitUnary(I, ISD::FNEG); }
560562

llvm/lib/IR/Verifier.cpp

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3379,11 +3379,34 @@ void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
33793379
}
33803380

33813381
void Verifier::visitCallBrInst(CallBrInst &CBI) {
3382-
Check(CBI.isInlineAsm(), "Callbr is currently only used for asm-goto!", &CBI);
3383-
const InlineAsm *IA = cast<InlineAsm>(CBI.getCalledOperand());
3384-
Check(!IA->canThrow(), "Unwinding from Callbr is not allowed");
3382+
if (!CBI.isInlineAsm()) {
3383+
Check(CBI.getCalledFunction(),
3384+
"Callbr: indirect function / invalid signature");
3385+
Check(!CBI.hasOperandBundles(),
3386+
"Callbr for intrinsics currently doesn't support operand bundles");
3387+
3388+
switch (CBI.getIntrinsicID()) {
3389+
case Intrinsic::amdgcn_kill: {
3390+
Check(CBI.getNumIndirectDests() == 1,
3391+
"Callbr amdgcn_kill only supports one indirect dest");
3392+
bool Unreachable = isa<UnreachableInst>(CBI.getIndirectDest(0)->begin());
3393+
CallInst *Call = dyn_cast<CallInst>(CBI.getIndirectDest(0)->begin());
3394+
Check(Unreachable || (Call && Call->getIntrinsicID() ==
3395+
Intrinsic::amdgcn_unreachable),
3396+
"Callbr amdgcn_kill indirect dest needs to be unreachable");
3397+
break;
3398+
}
3399+
default:
3400+
CheckFailed(
3401+
"Callbr currently only supports asm-goto and selected intrinsics");
3402+
}
3403+
visitIntrinsicCall(CBI.getIntrinsicID(), CBI);
3404+
} else {
3405+
const InlineAsm *IA = cast<InlineAsm>(CBI.getCalledOperand());
3406+
Check(!IA->canThrow(), "Unwinding from Callbr is not allowed");
33853407

3386-
verifyInlineAsmCall(CBI);
3408+
verifyInlineAsmCall(CBI);
3409+
}
33873410
visitTerminator(CBI);
33883411
}
33893412

@@ -5479,7 +5502,7 @@ void Verifier::visitInstruction(Instruction &I) {
54795502
(CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i)) ||
54805503
IsAttachedCallOperand(F, CBI, i)),
54815504
"Cannot take the address of an intrinsic!", &I);
5482-
Check(!F->isIntrinsic() || isa<CallInst>(I) ||
5505+
Check(!F->isIntrinsic() || isa<CallInst>(I) || isa<CallBrInst>(I) ||
54835506
F->getIntrinsicID() == Intrinsic::donothing ||
54845507
F->getIntrinsicID() == Intrinsic::seh_try_begin ||
54855508
F->getIntrinsicID() == Intrinsic::seh_try_end ||

llvm/test/Assembler/callbr.ll

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
2+
3+
declare void @llvm.amdgcn.kill(i1)
4+
5+
define void @test_kill(i1 %c) {
6+
; CHECK-LABEL: define void @test_kill(
7+
; CHECK-SAME: i1 [[C:%.*]]) {
8+
; CHECK-NEXT: callbr void @llvm.amdgcn.kill(i1 [[C]])
9+
; CHECK-NEXT: to label %[[CONT:.*]] [label %kill]
10+
; CHECK: [[KILL:.*:]]
11+
; CHECK-NEXT: unreachable
12+
; CHECK: [[CONT]]:
13+
; CHECK-NEXT: ret void
14+
;
15+
callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
16+
kill:
17+
unreachable
18+
cont:
19+
ret void
20+
}
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s
3+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck --check-prefix=GISEL %s
4+
5+
define void @test_kill(ptr %src, ptr %dst, i1 %c) {
6+
; CHECK-LABEL: test_kill:
7+
; CHECK: ; %bb.0:
8+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9+
; CHECK-NEXT: flat_load_dword v0, v[0:1]
10+
; CHECK-NEXT: v_and_b32_e32 v1, 1, v4
11+
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
12+
; CHECK-NEXT: s_mov_b64 s[4:5], exec
13+
; CHECK-NEXT: s_andn2_b64 s[6:7], exec, vcc
14+
; CHECK-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7]
15+
; CHECK-NEXT: s_cbranch_scc0 .LBB0_2
16+
; CHECK-NEXT: ; %bb.1:
17+
; CHECK-NEXT: s_and_b64 exec, exec, s[4:5]
18+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
19+
; CHECK-NEXT: flat_store_dword v[2:3], v0
20+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
21+
; CHECK-NEXT: s_setpc_b64 s[30:31]
22+
; CHECK-NEXT: .LBB0_2:
23+
; CHECK-NEXT: s_mov_b64 exec, 0
24+
; CHECK-NEXT: s_endpgm
25+
;
26+
; GISEL-LABEL: test_kill:
27+
; GISEL: ; %bb.0:
28+
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29+
; GISEL-NEXT: flat_load_dword v0, v[0:1]
30+
; GISEL-NEXT: v_and_b32_e32 v1, 1, v4
31+
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
32+
; GISEL-NEXT: s_mov_b64 s[4:5], exec
33+
; GISEL-NEXT: s_andn2_b64 s[6:7], exec, vcc
34+
; GISEL-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7]
35+
; GISEL-NEXT: s_cbranch_scc0 .LBB0_2
36+
; GISEL-NEXT: ; %bb.1:
37+
; GISEL-NEXT: s_and_b64 exec, exec, s[4:5]
38+
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
39+
; GISEL-NEXT: flat_store_dword v[2:3], v0
40+
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
41+
; GISEL-NEXT: s_setpc_b64 s[30:31]
42+
; GISEL-NEXT: .LBB0_2:
43+
; GISEL-NEXT: s_mov_b64 exec, 0
44+
; GISEL-NEXT: s_endpgm
45+
%a = load i32, ptr %src, align 4
46+
callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
47+
kill:
48+
unreachable
49+
cont:
50+
store i32 %a, ptr %dst, align 4
51+
ret void
52+
}
53+
54+
define void @test_kill_block_order(ptr %src, ptr %dst, i1 %c) {
55+
; CHECK-LABEL: test_kill_block_order:
56+
; CHECK: ; %bb.0:
57+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
58+
; CHECK-NEXT: flat_load_dword v0, v[0:1]
59+
; CHECK-NEXT: v_and_b32_e32 v1, 1, v4
60+
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
61+
; CHECK-NEXT: s_mov_b64 s[4:5], exec
62+
; CHECK-NEXT: s_andn2_b64 s[6:7], exec, vcc
63+
; CHECK-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7]
64+
; CHECK-NEXT: s_cbranch_scc0 .LBB1_2
65+
; CHECK-NEXT: ; %bb.1:
66+
; CHECK-NEXT: s_and_b64 exec, exec, s[4:5]
67+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
68+
; CHECK-NEXT: flat_store_dword v[2:3], v0
69+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
70+
; CHECK-NEXT: s_setpc_b64 s[30:31]
71+
; CHECK-NEXT: .LBB1_2:
72+
; CHECK-NEXT: s_mov_b64 exec, 0
73+
; CHECK-NEXT: s_endpgm
74+
;
75+
; GISEL-LABEL: test_kill_block_order:
76+
; GISEL: ; %bb.0:
77+
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78+
; GISEL-NEXT: flat_load_dword v0, v[0:1]
79+
; GISEL-NEXT: v_and_b32_e32 v1, 1, v4
80+
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
81+
; GISEL-NEXT: s_mov_b64 s[4:5], exec
82+
; GISEL-NEXT: s_andn2_b64 s[6:7], exec, vcc
83+
; GISEL-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7]
84+
; GISEL-NEXT: s_cbranch_scc0 .LBB1_2
85+
; GISEL-NEXT: ; %bb.1:
86+
; GISEL-NEXT: s_and_b64 exec, exec, s[4:5]
87+
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
88+
; GISEL-NEXT: flat_store_dword v[2:3], v0
89+
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
90+
; GISEL-NEXT: s_setpc_b64 s[30:31]
91+
; GISEL-NEXT: .LBB1_2:
92+
; GISEL-NEXT: s_mov_b64 exec, 0
93+
; GISEL-NEXT: s_endpgm
94+
%a = load i32, ptr %src, align 4
95+
callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
96+
cont:
97+
store i32 %a, ptr %dst, align 4
98+
ret void
99+
kill:
100+
unreachable
101+
}

0 commit comments

Comments
 (0)