Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 21 additions & 6 deletions llvm/docs/LangRef.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9793,8 +9793,12 @@ The '``callbr``' instruction causes control to transfer to a specified
function, with the possibility of control flow transfer to either the
'``fallthrough``' label or one of the '``indirect``' labels.

This instruction should only be used to implement the "goto" feature of gcc
style inline assembly. Any other usage is an error in the IR verifier.
This instruction can currently only be used

#. to implement the "goto" feature of gcc style inline assembly or
#. to call selected intrinsics.

Any other usage is an error in the IR verifier.

Note that in order to support outputs along indirect edges, LLVM may need to
split critical edges, which may require synthesizing a replacement block for
Expand Down Expand Up @@ -9843,7 +9847,7 @@ This instruction requires several arguments:
indicates the function accepts a variable number of arguments, the
extra arguments can be specified.
#. '``fallthrough label``': the label reached when the inline assembly's
execution exits the bottom.
execution exits the bottom / the intrinsic call returns.
#. '``indirect labels``': the labels reached when a callee transfers control
to a location other than the '``fallthrough label``'. Label constraints
refer to these destinations.
Expand All @@ -9861,9 +9865,12 @@ flow goes after the call.
The output values of a '``callbr``' instruction are available both in the
the '``fallthrough``' block, and any '``indirect``' blocks(s).

The only use of this today is to implement the "goto" feature of gcc inline
assembly where additional labels can be provided as locations for the inline
assembly to jump to.
The only current uses of this are:

#. implement the "goto" feature of gcc inline assembly where additional
labels can be provided as locations for the inline assembly to jump to.
#. support selected intrinsics which manipulate control flow and should
be chained to specific terminators, such as '``unreachable``'.

Example:
""""""""
Expand All @@ -9878,6 +9885,14 @@ Example:
<result> = callbr i32 asm "", "=r,r,!i"(i32 %x)
to label %fallthrough [label %indirect]

; intrinsic which should be followed by unreachable (the order of the
; blocks after the callbr instruction doesn't matter)
callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
cont:
...
kill:
unreachable

.. _i_resume:

'``resume``' Instruction
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,8 @@ class IRTranslator : public MachineFunctionPass {
bool translateInvoke(const User &U, MachineIRBuilder &MIRBuilder);

bool translateCallBr(const User &U, MachineIRBuilder &MIRBuilder);
bool translateCallBrIntrinsic(const CallBrInst &I,
MachineIRBuilder &MIRBuilder);

bool translateLandingPad(const User &U, MachineIRBuilder &MIRBuilder);

Expand Down
35 changes: 32 additions & 3 deletions llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2828,7 +2828,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
IsTgtMemIntrinsic ? &Info : nullptr);
}

/// Translate a call to an intrinsic.
/// Translate a call or callbr to an intrinsic.
/// Depending on whether TLI->getTgtMemIntrinsic() is true, TgtMemIntrinsicInfo
/// is a pointer to the correspondingly populated IntrinsicInfo object.
/// Otherwise, this pointer is null.
Expand Down Expand Up @@ -3056,10 +3056,39 @@ bool IRTranslator::translateInvoke(const User &U,
return true;
}

/// The intrinsics currently supported by callbr are implicit control flow
/// intrinsics such as amdgcn.kill.
bool IRTranslator::translateCallBr(const User &U,
MachineIRBuilder &MIRBuilder) {
// FIXME: Implement this.
return false;
if (containsBF16Type(U))
return false; // see translateCall

const CallBrInst &I = cast<CallBrInst>(U);
MachineBasicBlock *CallBrMBB = &MIRBuilder.getMBB();

// FIXME: inline asm is not yet supported for callbr in GlobalISel. As soon as
// we add support, we need to handle the indirect asm targets, see
// SelectionDAGBuilder::visitCallBr().
Intrinsic::ID IID = I.getIntrinsicID();
if (I.isInlineAsm())
return false;
if (!translateIntrinsic(I, IID, MIRBuilder))
return false;

// Retrieve successors.
SmallPtrSet<BasicBlock *, 8> Dests = {I.getDefaultDest()};
MachineBasicBlock *Return = &getMBB(*I.getDefaultDest());

// Update successor info.
addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
// TODO: For most of the cases where there is an intrinsic callbr, we're
// having exactly one indirect target, which will be unreachable. As soon as
// this changes, we might need to enhance
// Target->setIsInlineAsmBrIndirectTarget or add something similar for
// intrinsic indirect branches.
CallBrMBB->normalizeSuccProbs();

return true;
}

bool IRTranslator::translateLandingPad(const User &U,
Expand Down
68 changes: 49 additions & 19 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3507,16 +3507,39 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
DAG.getBasicBlock(Return)));
}

/// The intrinsics currently supported by callbr are implicit control flow
/// intrinsics such as amdgcn.kill.
/// - they should be called (no "dontcall-" attributes)
/// - they do not touch memory on the target (= !TLI.getTgtMemIntrinsic())
/// - they do not need custom argument handling (no
/// TLI.CollectTargetIntrinsicOperands())
void SelectionDAGBuilder::visitCallBrIntrinsic(const CallBrInst &I) {
auto [HasChain, OnlyLoad] = getTargetIntrinsicCallProperties(I);

SmallVector<SDValue, 8> Ops =
getTargetIntrinsicOperands(I, HasChain, OnlyLoad);
SDVTList VTs = getTargetIntrinsicVTList(I, HasChain);

// Create the node.
SDValue Result =
getTargetNonMemIntrinsicNode(*I.getType(), HasChain, Ops, VTs);
Result = handleTargetIntrinsicRet(I, HasChain, OnlyLoad, Result);

setValue(&I, Result);
}

void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
MachineBasicBlock *CallBrMBB = FuncInfo.MBB;

// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
failForInvalidBundles(I, "callbrs",
{LLVMContext::OB_deopt, LLVMContext::OB_funclet});

assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr");
visitInlineAsm(I);
if (I.isInlineAsm()) {
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
failForInvalidBundles(I, "callbrs",
{LLVMContext::OB_deopt, LLVMContext::OB_funclet});
visitInlineAsm(I);
} else {
visitCallBrIntrinsic(I);
}
CopyToExportRegsIfNeeded(&I);

// Retrieve successors.
Expand All @@ -3526,18 +3549,25 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {

// Update successor info.
addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
for (BasicBlock *Dest : I.getIndirectDests()) {
MachineBasicBlock *Target = FuncInfo.getMBB(Dest);
Target->setIsInlineAsmBrIndirectTarget();
// If we introduce a type of asm goto statement that is permitted to use an
// indirect call instruction to jump to its labels, then we should add a
// call to Target->setMachineBlockAddressTaken() here, to mark the target
// block as requiring a BTI.

Target->setLabelMustBeEmitted();
// Don't add duplicate machine successors.
if (Dests.insert(Dest).second)
addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
// TODO: For most of the cases where there is an intrinsic callbr, we're
// having exactly one indirect target, which will be unreachable. As soon as
// this changes, we might need to enhance
// Target->setIsInlineAsmBrIndirectTarget or add something similar for
// intrinsic indirect branches.
if (I.isInlineAsm()) {
for (BasicBlock *Dest : I.getIndirectDests()) {
MachineBasicBlock *Target = FuncInfo.getMBB(Dest);
Target->setIsInlineAsmBrIndirectTarget();
// If we introduce a type of asm goto statement that is permitted to use
// an indirect call instruction to jump to its labels, then we should add
// a call to Target->setMachineBlockAddressTaken() here, to mark the
// target block as requiring a BTI.

Target->setLabelMustBeEmitted();
// Don't add duplicate machine successors.
if (Dests.insert(Dest).second)
addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
}
}
CallBrMBB->normalizeSuccProbs();

Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -551,10 +551,12 @@ class SelectionDAGBuilder {
private:
// These all get lowered before this pass.
void visitInvoke(const InvokeInst &I);
void visitCallBr(const CallBrInst &I);
void visitCallBrLandingPad(const CallInst &I);
void visitResume(const ResumeInst &I);

void visitCallBr(const CallBrInst &I);
void visitCallBrIntrinsic(const CallBrInst &I);

void visitUnary(const User &I, unsigned Opcode);
void visitFNeg(const User &I) { visitUnary(I, ISD::FNEG); }

Expand Down
33 changes: 28 additions & 5 deletions llvm/lib/IR/Verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3375,11 +3375,34 @@ void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
}

void Verifier::visitCallBrInst(CallBrInst &CBI) {
Check(CBI.isInlineAsm(), "Callbr is currently only used for asm-goto!", &CBI);
const InlineAsm *IA = cast<InlineAsm>(CBI.getCalledOperand());
Check(!IA->canThrow(), "Unwinding from Callbr is not allowed");
if (!CBI.isInlineAsm()) {
Check(CBI.getCalledFunction(),
"Callbr: indirect function / invalid signature");
Check(!CBI.hasOperandBundles(),
"Callbr for intrinsics currently doesn't support operand bundles");

switch (CBI.getIntrinsicID()) {
case Intrinsic::amdgcn_kill: {
Check(CBI.getNumIndirectDests() == 1,
"Callbr amdgcn_kill only supports one indirect dest");
bool Unreachable = isa<UnreachableInst>(CBI.getIndirectDest(0)->begin());
CallInst *Call = dyn_cast<CallInst>(CBI.getIndirectDest(0)->begin());
Check(Unreachable || (Call && Call->getIntrinsicID() ==
Intrinsic::amdgcn_unreachable),
"Callbr amdgcn_kill indirect dest needs to be unreachable");
break;
}
default:
CheckFailed(
"Callbr currently only supports asm-goto and selected intrinsics");
}
visitIntrinsicCall(CBI.getIntrinsicID(), CBI);
} else {
const InlineAsm *IA = cast<InlineAsm>(CBI.getCalledOperand());
Check(!IA->canThrow(), "Unwinding from Callbr is not allowed");

verifyInlineAsmCall(CBI);
verifyInlineAsmCall(CBI);
}
visitTerminator(CBI);
}

Expand Down Expand Up @@ -5475,7 +5498,7 @@ void Verifier::visitInstruction(Instruction &I) {
(CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i)) ||
IsAttachedCallOperand(F, CBI, i)),
"Cannot take the address of an intrinsic!", &I);
Check(!F->isIntrinsic() || isa<CallInst>(I) ||
Check(!F->isIntrinsic() || isa<CallInst>(I) || isa<CallBrInst>(I) ||
F->getIntrinsicID() == Intrinsic::donothing ||
F->getIntrinsicID() == Intrinsic::seh_try_begin ||
F->getIntrinsicID() == Intrinsic::seh_try_end ||
Expand Down
20 changes: 20 additions & 0 deletions llvm/test/Assembler/callbr.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
; RUN: llvm-as < %s | llvm-dis | FileCheck %s

declare void @llvm.amdgcn.kill(i1)

define void @test_kill(i1 %c) {
; CHECK-LABEL: define void @test_kill(
; CHECK-SAME: i1 [[C:%.*]]) {
; CHECK-NEXT: callbr void @llvm.amdgcn.kill(i1 [[C]])
; CHECK-NEXT: to label %[[CONT:.*]] [label %kill]
; CHECK: [[KILL:.*:]]
; CHECK-NEXT: unreachable
; CHECK: [[CONT]]:
; CHECK-NEXT: ret void
;
callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
kill:
unreachable
cont:
ret void
}
101 changes: 101 additions & 0 deletions llvm/test/CodeGen/AMDGPU/callbr-intrinsics.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck --check-prefix=GISEL %s

define void @test_kill(ptr %src, ptr %dst, i1 %c) {
; CHECK-LABEL: test_kill:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dword v0, v[0:1]
; CHECK-NEXT: v_and_b32_e32 v1, 1, v4
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
; CHECK-NEXT: s_mov_b64 s[4:5], exec
; CHECK-NEXT: s_andn2_b64 s[6:7], exec, vcc
; CHECK-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7]
; CHECK-NEXT: s_cbranch_scc0 .LBB0_2
; CHECK-NEXT: ; %bb.1:
; CHECK-NEXT: s_and_b64 exec, exec, s[4:5]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_store_dword v[2:3], v0
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: s_mov_b64 exec, 0
; CHECK-NEXT: s_endpgm
;
; GISEL-LABEL: test_kill:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: flat_load_dword v0, v[0:1]
; GISEL-NEXT: v_and_b32_e32 v1, 1, v4
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
; GISEL-NEXT: s_mov_b64 s[4:5], exec
; GISEL-NEXT: s_andn2_b64 s[6:7], exec, vcc
; GISEL-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7]
; GISEL-NEXT: s_cbranch_scc0 .LBB0_2
; GISEL-NEXT: ; %bb.1:
; GISEL-NEXT: s_and_b64 exec, exec, s[4:5]
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GISEL-NEXT: flat_store_dword v[2:3], v0
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_setpc_b64 s[30:31]
; GISEL-NEXT: .LBB0_2:
; GISEL-NEXT: s_mov_b64 exec, 0
; GISEL-NEXT: s_endpgm
%a = load i32, ptr %src, align 4
callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
kill:
unreachable
cont:
store i32 %a, ptr %dst, align 4
ret void
}

define void @test_kill_block_order(ptr %src, ptr %dst, i1 %c) {
; CHECK-LABEL: test_kill_block_order:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dword v0, v[0:1]
; CHECK-NEXT: v_and_b32_e32 v1, 1, v4
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
; CHECK-NEXT: s_mov_b64 s[4:5], exec
; CHECK-NEXT: s_andn2_b64 s[6:7], exec, vcc
; CHECK-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7]
; CHECK-NEXT: s_cbranch_scc0 .LBB1_2
; CHECK-NEXT: ; %bb.1:
; CHECK-NEXT: s_and_b64 exec, exec, s[4:5]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_store_dword v[2:3], v0
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: s_mov_b64 exec, 0
; CHECK-NEXT: s_endpgm
;
; GISEL-LABEL: test_kill_block_order:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: flat_load_dword v0, v[0:1]
; GISEL-NEXT: v_and_b32_e32 v1, 1, v4
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
; GISEL-NEXT: s_mov_b64 s[4:5], exec
; GISEL-NEXT: s_andn2_b64 s[6:7], exec, vcc
; GISEL-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7]
; GISEL-NEXT: s_cbranch_scc0 .LBB1_2
; GISEL-NEXT: ; %bb.1:
; GISEL-NEXT: s_and_b64 exec, exec, s[4:5]
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GISEL-NEXT: flat_store_dword v[2:3], v0
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_setpc_b64 s[30:31]
; GISEL-NEXT: .LBB1_2:
; GISEL-NEXT: s_mov_b64 exec, 0
; GISEL-NEXT: s_endpgm
%a = load i32, ptr %src, align 4
callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
cont:
store i32 %a, ptr %dst, align 4
ret void
kill:
unreachable
}
Loading