1
+ /* ========================== begin_copyright_notice ============================
2
+
3
+ Copyright (C) 2025 Intel Corporation
4
+
5
+ SPDX-License-Identifier: MIT
6
+
7
+ ============================= end_copyright_notice ===========================*/
8
+
9
+ #include " CallMergerPass.hpp"
10
+
11
+ #include " CodeGenPublic.h"
12
+ #include " Compiler/CISACodeGen/helper.h"
13
+ #include " Compiler/IGCPassSupport.h"
14
+ #include " llvmWrapper/IR/BasicBlock.h"
15
+
16
+ #include " Probe/Assertion.h"
17
+ #include " common/LLVMWarningsPush.hpp"
18
+ #include < llvm/ADT/SmallPtrSet.h>
19
+ #include < llvm/IR/Function.h>
20
+ #include < llvm/IR/Instruction.h>
21
+ #include < llvm/IR/Instructions.h>
22
+ #include < llvm/IR/Use.h>
23
+ #include < llvm/Pass.h>
24
+ #include < llvm/ADT/DenseMap.h>
25
+ #include < llvm/ADT/SmallVector.h>
26
+ #include " common/LLVMWarningsPop.hpp"
27
+
28
+ using namespace IGC ;
29
+ using namespace llvm ;
30
+
31
+ // Register pass to igc-opt
32
+ namespace IGC
33
+ {
34
+ #define PASS_FLAG " call-merger-pass"
35
+ #define PASS_DESCRIPTION \
36
+ " Merge mutually exclusive calls to enable further inlining."
37
+ #define PASS_CFG_ONLY false
38
+ #define PASS_ANALYSIS false
39
+ IGC_INITIALIZE_PASS_BEGIN (CallMerger, PASS_FLAG, PASS_DESCRIPTION,
40
+ PASS_CFG_ONLY, PASS_ANALYSIS)
41
+ IGC_INITIALIZE_PASS_DEPENDENCY (CodeGenContextWrapper)
42
+ IGC_INITIALIZE_PASS_DEPENDENCY (EstimateFunctionSize)
43
+ IGC_INITIALIZE_PASS_END (CallMerger, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY,
44
+ PASS_ANALYSIS)
45
+ } // namespace IGC
46
+
47
+ using CallSiteMap = DenseMap<Function *, SmallVector<CallInst *, 2 >>;
48
+
49
+ namespace {
50
+ CallSiteMap collectAllCallSites (Function &F) {
51
+ CallSiteMap callSites;
52
+ for (auto &BB : F) {
53
+ for (auto &I : BB) {
54
+ if (auto *callInst = dyn_cast<CallInst>(&I)) {
55
+ auto *calledFunc = callInst->getCalledFunction ();
56
+ if (calledFunc && !calledFunc->isIntrinsic ()) {
57
+ callSites[calledFunc].push_back (callInst);
58
+ }
59
+ }
60
+ }
61
+ }
62
+ return callSites;
63
+ }
64
+
65
+ void setNewTerminator (BasicBlock *oldBB, BasicBlock *newBB) {
66
+ auto *oldTerminator = oldBB->getTerminator ();
67
+ oldTerminator->eraseFromParent ();
68
+ IGCLLVM::pushBackInstruction (oldBB, BranchInst::Create (newBB));
69
+ }
70
+
71
+ // We assume that BBs with call instructions have terminator with single successor
72
+ // and that the list of uses of both calls is the same.
73
+ void mergeCalls (Function* F, CallInst *call1, CallInst *call2) {
74
+ auto * parentBB1 = call1->getParent ();
75
+ auto * parentBB2 = call2->getParent ();
76
+ auto * successorBB = parentBB1->getSingleSuccessor ();
77
+
78
+ auto * newBB = llvm::BasicBlock::Create (F->getContext (), " mergedCallsBB" , F, successorBB);
79
+ llvm::IRBuilder<> Builder (newBB);
80
+
81
+ IGC_ASSERT (call1->arg_size () == call2->arg_size ());
82
+
83
+ SmallVector<Value*, 4 > args;
84
+ for (unsigned i = 0 ; i < call1->arg_size (); ++i) {
85
+ auto *arg1 = call1->getArgOperand (i);
86
+ auto *arg2 = call2->getArgOperand (i);
87
+ if (arg1 == arg2) {
88
+ args.push_back (arg1);
89
+ continue ;
90
+ }
91
+ auto *PN = Builder.CreatePHI (arg1->getType (), 2 );
92
+ PN->addIncoming (arg1, parentBB1);
93
+ PN->addIncoming (arg2, parentBB2);
94
+ args.push_back (PN);
95
+ }
96
+ auto * newCall = Builder.CreateCall (call1->getCalledFunction (), args);
97
+ newCall->setCallingConv (call1->getCallingConv ());
98
+ newCall->setAttributes (call1->getAttributes ());
99
+ newCall->setTailCall (call1->isTailCall ());
100
+ Builder.CreateBr (successorBB);
101
+
102
+ setNewTerminator (parentBB1, newBB);
103
+ setNewTerminator (parentBB2, newBB);
104
+ for (auto & u: call1->uses ()) {
105
+ auto *userI = cast<Instruction>(u.getUser ());
106
+ userI->replaceUsesOfWith (call1, newCall);
107
+ }
108
+ for (auto & u: call2->uses ()) {
109
+ auto *userI = cast<Instruction>(u.getUser ());
110
+ userI->replaceUsesOfWith (call2, newCall);
111
+ }
112
+ call1->eraseFromParent ();
113
+ call2->eraseFromParent ();
114
+ }
115
+
116
+ bool haveSingleCommonSuccessor (CallInst *call1, CallInst *call2) {
117
+ auto *successor1 = call1->getParent ()->getSingleSuccessor ();
118
+ auto *successor2 = call2->getParent ()->getSingleSuccessor ();
119
+ if (!successor1 || !successor2 || successor1 != successor2) {
120
+ return false ;
121
+ }
122
+ return true ;
123
+ }
124
+
125
+ bool isAfterInstInBB (Instruction* inst1, Instruction* inst2){
126
+ for (auto & I : *inst1->getParent ()) {
127
+ if (&I == inst1) {
128
+ return false ;
129
+ }
130
+ if (&I == inst2) {
131
+ return true ;
132
+ }
133
+ }
134
+ return false ;
135
+ }
136
+
137
+ bool hasUsesInCurrentBB (CallInst *call) {
138
+ auto *currentBB = call->getParent ();
139
+
140
+ // Check if call results is used in same block as call
141
+ for (auto *user : call->users ()) {
142
+ auto * userI = cast<Instruction>(user);
143
+ if (userI->getParent () == currentBB) {
144
+ return true ;
145
+ }
146
+ }
147
+
148
+ // Check if any non const argument is used in call block
149
+ // after call
150
+ for (auto & arg : call->args ()) {
151
+ if (!arg->getType ()->isPointerTy ()) {
152
+ continue ;
153
+ }
154
+ for (auto *user : arg->users ()) {
155
+ if (auto * userI = dyn_cast<Instruction>(user)) {
156
+ if (userI == call || userI->getParent () != currentBB) {
157
+ continue ;
158
+ }
159
+ if (isAfterInstInBB (call, userI)) {
160
+ continue ;
161
+ }
162
+ return true ;
163
+ }
164
+ }
165
+ }
166
+ return false ;
167
+ }
168
+
169
+ bool hasSameUsesAs (CallInst *call1, CallInst *call2) {
170
+ if (call1->getNumUses () != call2->getNumUses ()) {
171
+ return false ;
172
+ }
173
+
174
+ for (auto &use1 : call1->uses ()) {
175
+ bool matched = false ;
176
+ for (auto &use2 : call2->uses ()) {
177
+ if (use1 == use2) {
178
+ matched = true ;
179
+ break ;
180
+ }
181
+ }
182
+ if (!matched) {
183
+ return false ;
184
+ }
185
+ }
186
+ return true ;
187
+ }
188
+
189
+ void filterCallSites (CallSiteMap &callSites, EstimateFunctionSize *EFS) {
190
+ SmallVector<Function*, 4 > elementsToErase;
191
+ size_t PerFuncThreshold = IGC_GET_FLAG_VALUE (SubroutineInlinerThreshold);
192
+
193
+ for (const auto &[calledFunc, callInsts] : callSites) {
194
+ if (callInsts.size () != 2 ) {
195
+ elementsToErase.push_back (calledFunc);
196
+ continue ;
197
+ }
198
+
199
+ // We don't need to process function that can't get inlined
200
+ if (calledFunc->hasFnAttribute (llvm::Attribute::NoInline) ||
201
+ calledFunc->hasFnAttribute (" igc-force-stackcall" ) ||
202
+ calledFunc->hasFnAttribute (" KMPLOCK" )){
203
+ elementsToErase.push_back (calledFunc);
204
+ continue ;
205
+ }
206
+
207
+ // We can skip functions that are small enough to be inlined.
208
+ if (EFS->getExpandedSize (calledFunc) <= PerFuncThreshold) {
209
+ elementsToErase.push_back (calledFunc);
210
+ continue ;
211
+ }
212
+
213
+ // We can merge calls with common successor, without result or args having uses in
214
+ // call block. We also only merge function calls with same use list.
215
+ if (!haveSingleCommonSuccessor (callInsts[0 ], callInsts[1 ]) ||
216
+ hasUsesInCurrentBB (callInsts[0 ]) ||
217
+ hasUsesInCurrentBB (callInsts[1 ]) ||
218
+ !hasSameUsesAs (callInsts[0 ], callInsts[1 ])) {
219
+ elementsToErase.push_back (calledFunc);
220
+ continue ;
221
+ }
222
+ }
223
+
224
+ for (auto *calledFunc : elementsToErase) {
225
+ callSites.erase (calledFunc);
226
+ }
227
+ }
228
+ } // anonymous namespace
229
+
230
+ char CallMerger::ID = 0 ;
231
+
232
+ CallMerger::CallMerger () : ModulePass(ID) {
233
+ initializeCallMergerPass (*PassRegistry::getPassRegistry ());
234
+ }
235
+
236
+ void CallMerger::getAnalysisUsage (llvm::AnalysisUsage &AU) const {
237
+ AU.addRequired <CodeGenContextWrapper>();
238
+ AU.addRequired <EstimateFunctionSize>();
239
+ }
240
+
241
+ bool CallMerger::runOnFunction (Function& F) {
242
+ auto callSites = collectAllCallSites (F);
243
+
244
+ filterCallSites (callSites, EFS);
245
+ if (callSites.empty ()) {
246
+ return false ;
247
+ }
248
+
249
+ for (auto &[calledFunc, callInsts] : callSites) {
250
+ mergeCalls (&F, callInsts[0 ], callInsts[1 ]);
251
+ }
252
+
253
+ return true ;
254
+ }
255
+
256
+ bool CallMerger::runOnModule (Module &M) {
257
+ EFS = &getAnalysis<EstimateFunctionSize>();
258
+ CTX = getAnalysis<CodeGenContextWrapper>().getCodeGenContext ();
259
+
260
+ // We don't need to do any work if all functions will get inlined
261
+ // or function control is not default.
262
+ if (IGC::ForceAlwaysInline (CTX) ||
263
+ CTX->m_enableSubroutine == false ||
264
+ getFunctionControl (CTX) != FLAG_FCALL_DEFAULT ||
265
+ !EFS->shouldEnableSubroutine ()) {
266
+ return false ;
267
+ }
268
+
269
+ bool changed = false ;
270
+ for (auto &F : M) {
271
+ if (F.isDeclaration () || F.isIntrinsic () || F.hasOptNone ()) {
272
+ continue ;
273
+ }
274
+ changed |= runOnFunction (F);
275
+ }
276
+ return changed;
277
+ }
0 commit comments