Skip to content

Commit 3727f8e

Browse files
fhossein-quicrutkoorBrendon CahoonabhikranSumanth Gundapaneni
committed
Hexagon QFP Optimizer
Patch By: Fateme Hosseini Co-authored-by: Rahul Utkoor <[email protected]> Co-authored-by: Brendon Cahoon <[email protected]> Co-authored-by: abhikran <[email protected]> Co-authored-by: Sumanth Gundapaneni <[email protected]> Co-authored-by: Ikhlas Ajbar <[email protected]> Co-authored-by: Anirudh Sundar <[email protected]> Co-authored-by: Yashas Andaluri <[email protected]> Co-authored-by: quic-santdas <[email protected]>
1 parent d58b5a6 commit 3727f8e

File tree

12 files changed

+660
-0
lines changed

12 files changed

+660
-0
lines changed

llvm/lib/Target/Hexagon/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ add_llvm_target(HexagonCodeGen
5454
HexagonOptAddrMode.cpp
5555
HexagonOptimizeSZextends.cpp
5656
HexagonPeephole.cpp
57+
HexagonQFPoptimizer.cpp
5758
HexagonRDFOpt.cpp
5859
HexagonRegisterInfo.cpp
5960
HexagonSelectionDAGInfo.cpp

llvm/lib/Target/Hexagon/Hexagon.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ void initializeHexagonPeepholePass(PassRegistry &);
6767
void initializeHexagonSplitConst32AndConst64Pass(PassRegistry &);
6868
void initializeHexagonVectorPrintPass(PassRegistry &);
6969

70+
void initializeHexagonQFPoptimizerPass(PassRegistry &);
71+
7072
Pass *createHexagonLoopIdiomPass();
7173
Pass *createHexagonVectorLoopCarriedReuseLegacyPass();
7274

@@ -112,6 +114,7 @@ FunctionPass *createHexagonVectorCombineLegacyPass();
112114
FunctionPass *createHexagonVectorPrint();
113115
FunctionPass *createHexagonVExtract();
114116
FunctionPass *createHexagonExpandCondsets();
117+
FunctionPass *createHexagonQFPoptimizer();
115118

116119
} // end namespace llvm;
117120

Lines changed: 321 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,321 @@
1+
//===----- HexagonQFPOptimizer.cpp - Qualcomm-FP to IEEE-FP conversions
2+
// optimizer ------------------===//
3+
//
4+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
//
8+
//===----------------------------------------------------------------------===//
9+
//
10+
// Basic infrastructure for optimizing intermediate conversion instructions
11+
// generated while performing vector floating point operations.
12+
// Currently run at the starting of the code generation for Hexagon, cleans
13+
// up redundant conversion instructions and replaces the uses of conversion
14+
// with appropriate machine operand. Liveness is preserved after this pass.
15+
//
16+
// @note: The redundant conversion instructions are not eliminated in this pass.
17+
// In this pass, we are only trying to replace the uses of conversion
18+
// instructions with its appropriate QFP instruction. We are leaving the job to
19+
// Dead instruction Elimination pass to remove redundant conversion
20+
// instructions.
21+
//
22+
// Brief overview of working of this QFP optimizer.
23+
// This version of Hexagon QFP optimizer basically iterates over each
24+
// instruction, checks whether if it belongs to hexagon floating point HVX
25+
// arithmetic instruction category(Add, Sub, Mul). And then it finds the unique
26+
// definition for the machine operands corresponding to the instruction.
27+
//
28+
// Example:
29+
// MachineInstruction *MI be the HVX vadd instruction
30+
// MI -> $v0 = V6_vadd_sf $v1, $v2
31+
// MachineOperand *DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg());
32+
// MachineOperand *DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg());
33+
//
34+
// In the above example, DefMI1 and DefMI2 gives the unique definitions
35+
// corresponding to the operands($v1 and &v2 respectively) of instruction MI.
36+
//
37+
// If both of the definitions are not conversion instructions(V6_vconv_sf_qf32,
38+
// V6_vconv_hf_qf16), then it will skip optimizing the current instruction and
39+
// iterates over next instruction.
40+
//
41+
// If one the definitions is conversion instruction then our pass will replace
42+
// the arithmetic instruction with its corresponding mix variant.
43+
// In the above example, if $v1 is conversion instruction
44+
// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3
45+
// After Transformation:
46+
// MI -> $v0 = V6_vadd_qf32_mix $v3, $v2 ($v1 is replaced with $v3)
47+
//
48+
// If both the definitions are conversion instructions then the instruction will
49+
// be replaced with its qf variant
50+
// In the above example, if $v1 and $v2 are conversion instructions
51+
// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3
52+
// DefMI2 -> $v2 = V6_vconv_sf_qf32 $v4
53+
// After Transformation:
54+
// MI -> $v0 = V6_vadd_qf32 $v3, $v4 ($v1 is replaced with $v3, $v2 is replaced
55+
// with $v4)
56+
//
57+
// Currently, in this pass, we are not handling the case when the definitions
58+
// are PHI inst.
59+
//
60+
//===----------------------------------------------------------------------===//
61+
#define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass"
62+
63+
#include "Hexagon.h"
64+
#include "HexagonInstrInfo.h"
65+
#include "HexagonSubtarget.h"
66+
#include "llvm/ADT/SmallVector.h"
67+
#include "llvm/ADT/StringRef.h"
68+
#include "llvm/CodeGen/MachineBasicBlock.h"
69+
#include "llvm/CodeGen/MachineFunction.h"
70+
#include "llvm/CodeGen/MachineFunctionPass.h"
71+
#include "llvm/CodeGen/MachineInstr.h"
72+
#include "llvm/CodeGen/MachineOperand.h"
73+
#include "llvm/CodeGen/Passes.h"
74+
#include "llvm/Pass.h"
75+
#include "llvm/Support/CommandLine.h"
76+
#include "llvm/Support/Debug.h"
77+
#include "llvm/Support/raw_ostream.h"
78+
#include <map>
79+
#include <vector>
80+
81+
#define DEBUG_TYPE "hexagon-qfp-optimizer"
82+
83+
using namespace llvm;
84+
85+
cl::opt<bool>
86+
DisableQFOptimizer("disable-qfp-opt", cl::init(false),
87+
cl::desc("Disable optimization of Qfloat operations."));
88+
89+
std::vector<unsigned short> QFPInst = {
90+
Hexagon::V6_vadd_hf, Hexagon::V6_vadd_qf16,
91+
Hexagon::V6_vadd_qf16_mix, Hexagon::V6_vadd_qf32,
92+
Hexagon::V6_vadd_qf32_mix, Hexagon::V6_vadd_sf,
93+
Hexagon::V6_vconv_hf_qf16, Hexagon::V6_vconv_hf_qf32,
94+
Hexagon::V6_vconv_sf_qf32, Hexagon::V6_vmpy_qf16,
95+
Hexagon::V6_vmpy_qf16_hf, Hexagon::V6_vmpy_qf16_mix_hf,
96+
Hexagon::V6_vmpy_qf32, Hexagon::V6_vmpy_qf32_hf,
97+
Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16,
98+
Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vsub_hf,
99+
Hexagon::V6_vsub_qf16, Hexagon::V6_vsub_qf16_mix,
100+
Hexagon::V6_vsub_qf32, Hexagon::V6_vsub_qf32_mix,
101+
Hexagon::V6_vsub_sf};
102+
103+
std::map<unsigned short, unsigned short> QFPInstMap{
104+
{Hexagon::V6_vadd_hf, Hexagon::V6_vadd_qf16_mix},
105+
{Hexagon::V6_vadd_qf16_mix, Hexagon::V6_vadd_qf16},
106+
{Hexagon::V6_vadd_sf, Hexagon::V6_vadd_qf32_mix},
107+
{Hexagon::V6_vadd_qf32_mix, Hexagon::V6_vadd_qf32},
108+
{Hexagon::V6_vsub_hf, Hexagon::V6_vsub_qf16_mix},
109+
{Hexagon::V6_vsub_qf16_mix, Hexagon::V6_vsub_qf16},
110+
{Hexagon::V6_vsub_sf, Hexagon::V6_vsub_qf32_mix},
111+
{Hexagon::V6_vsub_qf32_mix, Hexagon::V6_vsub_qf32},
112+
{Hexagon::V6_vmpy_qf16_hf, Hexagon::V6_vmpy_qf16_mix_hf},
113+
{Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16},
114+
{Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf},
115+
{Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16},
116+
{Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}};
117+
118+
namespace llvm {
119+
120+
FunctionPass *createHexagonQFPoptimizer();
121+
void initializeHexagonQFPoptimizerPass(PassRegistry &);
122+
123+
} // namespace llvm
124+
125+
namespace {
126+
127+
struct HexagonQFPoptimizer : public MachineFunctionPass {
128+
public:
129+
static char ID;
130+
131+
HexagonQFPoptimizer() : MachineFunctionPass(ID) {}
132+
133+
bool runOnMachineFunction(MachineFunction &MF) override;
134+
135+
bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB);
136+
137+
StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; }
138+
139+
void getAnalysisUsage(AnalysisUsage &AU) const override {
140+
AU.setPreservesCFG();
141+
MachineFunctionPass::getAnalysisUsage(AU);
142+
}
143+
144+
private:
145+
const HexagonSubtarget *HST = nullptr;
146+
const HexagonInstrInfo *HII = nullptr;
147+
const MachineRegisterInfo *MRI = nullptr;
148+
};
149+
150+
char HexagonQFPoptimizer::ID = 0;
151+
} // namespace
152+
153+
INITIALIZE_PASS(HexagonQFPoptimizer, "hexagon-qfp-optimizer",
154+
HEXAGON_QFP_OPTIMIZER, false, false)
155+
156+
FunctionPass *llvm::createHexagonQFPoptimizer() {
157+
return new HexagonQFPoptimizer();
158+
}
159+
160+
bool HexagonQFPoptimizer::optimizeQfp(MachineInstr *MI,
161+
MachineBasicBlock *MBB) {
162+
if (MI->getNumOperands() < 3)
163+
return false;
164+
165+
unsigned Op0F = 0;
166+
unsigned Op1F = 0;
167+
unsigned short InstTy = QFPInstMap[MI->getOpcode()];
168+
169+
// Get the reaching defs of MI, DefMI1 and DefMI2
170+
MachineInstr *DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg());
171+
MachineInstr *DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg());
172+
MachineOperand &Res = MI->getOperand(0);
173+
MachineInstr *Inst1 = nullptr;
174+
MachineInstr *Inst2 = nullptr;
175+
LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump();
176+
DefMI2->dump());
177+
178+
// Get the reaching defs of DefMI
179+
if (DefMI1->getNumOperands() > 1 && DefMI1->getOperand(1).isReg() &&
180+
DefMI1->getOperand(1).getReg().isVirtual())
181+
Inst1 = MRI->getVRegDef(DefMI1->getOperand(1).getReg());
182+
183+
if (DefMI2->getNumOperands() > 1 && DefMI2->getOperand(1).isReg() &&
184+
DefMI2->getOperand(1).getReg().isVirtual())
185+
Inst2 = MRI->getVRegDef(DefMI2->getOperand(1).getReg());
186+
187+
unsigned Def1OP = DefMI1->getOpcode();
188+
unsigned Def2OP = DefMI2->getOpcode();
189+
190+
MachineInstrBuilder MIB;
191+
// Check if the both the reaching defs of MI and qf to sf/hf conversions
192+
if ((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
193+
Def2OP == Hexagon::V6_vconv_sf_qf32) ||
194+
(Def1OP == Hexagon::V6_vconv_hf_qf16 &&
195+
Def2OP == Hexagon::V6_vconv_hf_qf16)) {
196+
197+
// If the reaching defs of DefMI are W register type, we return
198+
if ((Inst1 && MRI->getRegClass(Inst1->getOperand(0).getReg()) ==
199+
&Hexagon::HvxWRRegClass) ||
200+
(Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
201+
&Hexagon::HvxWRRegClass))
202+
return false;
203+
204+
// Analyze the use operands of the conversion to get their KILL status
205+
MachineOperand &Src1 = DefMI1->getOperand(1);
206+
MachineOperand &Src2 = DefMI2->getOperand(1);
207+
208+
Op0F = getKillRegState(Src1.isKill());
209+
Src1.setIsKill(false);
210+
211+
Op1F = getKillRegState(Src2.isKill());
212+
Src2.setIsKill(false);
213+
214+
if (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)
215+
InstTy = QFPInstMap[QFPInstMap[MI->getOpcode()]];
216+
217+
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
218+
.addReg(Src1.getReg(), Op0F, Src1.getSubReg())
219+
.addReg(Src2.getReg(), Op1F, Src2.getSubReg());
220+
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
221+
return true;
222+
223+
// Check if left operand's reaching def is a conversion to sf/hf
224+
} else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
225+
Def2OP != Hexagon::V6_vconv_sf_qf32) ||
226+
(Def1OP == Hexagon::V6_vconv_hf_qf16 &&
227+
Def2OP != Hexagon::V6_vconv_hf_qf16)) &&
228+
!DefMI2->isPHI() &&
229+
(MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
230+
231+
if (Inst1 && MRI->getRegClass(Inst1->getOperand(0).getReg()) ==
232+
&Hexagon::HvxWRRegClass)
233+
return false;
234+
235+
MachineOperand &Src1 = DefMI1->getOperand(1);
236+
MachineOperand &Src2 = MI->getOperand(2);
237+
238+
Op0F = getKillRegState(Src1.isKill());
239+
Src1.setIsKill(false);
240+
Op1F = getKillRegState(Src2.isKill());
241+
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
242+
.addReg(Src1.getReg(), Op0F, Src1.getSubReg())
243+
.addReg(Src2.getReg(), Op1F, Src2.getSubReg());
244+
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
245+
return true;
246+
247+
// Check if right operand's reaching def is a conversion tp sf/hf
248+
} else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 &&
249+
Def2OP == Hexagon::V6_vconv_sf_qf32) ||
250+
(Def1OP != Hexagon::V6_vconv_hf_qf16 &&
251+
Def2OP == Hexagon::V6_vconv_hf_qf16)) &&
252+
!DefMI1->isPHI() &&
253+
(MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
254+
// The second operand of original instruction is converted.
255+
// In "mix" instructions, "qf" operand is always the first operand.
256+
257+
// Caveat: vsub is not commutative w.r.t operands.
258+
if (InstTy == Hexagon::V6_vsub_qf16_mix ||
259+
InstTy == Hexagon::V6_vsub_qf32_mix)
260+
return false;
261+
262+
if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
263+
&Hexagon::HvxWRRegClass)
264+
return false;
265+
266+
MachineOperand &Src1 = MI->getOperand(1);
267+
MachineOperand &Src2 = DefMI2->getOperand(1);
268+
269+
Op1F = getKillRegState(Src2.isKill());
270+
Src2.setIsKill(false);
271+
Op0F = getKillRegState(Src1.isKill());
272+
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
273+
.addReg(Src2.getReg(), Op1F,
274+
Src2.getSubReg()) // Notice the operands are flipped.
275+
.addReg(Src1.getReg(), Op0F, Src1.getSubReg());
276+
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
277+
return true;
278+
}
279+
280+
return false;
281+
}
282+
283+
bool HexagonQFPoptimizer::runOnMachineFunction(MachineFunction &MF) {
284+
285+
bool Changed = false;
286+
287+
if (DisableQFOptimizer)
288+
return Changed;
289+
290+
HST = &MF.getSubtarget<HexagonSubtarget>();
291+
if (!HST->useHVXV68Ops() || !HST->usePackets() ||
292+
skipFunction(MF.getFunction()))
293+
return false;
294+
HII = HST->getInstrInfo();
295+
MRI = &MF.getRegInfo();
296+
297+
MachineFunction::iterator MBBI = MF.begin();
298+
LLVM_DEBUG(dbgs() << "\n=== Running QFPOptimzer Pass for : " << MF.getName()
299+
<< " Optimize intermediate conversions ===\n");
300+
while (MBBI != MF.end()) {
301+
MachineBasicBlock *MBB = &*MBBI;
302+
MachineBasicBlock::iterator MII = MBBI->instr_begin();
303+
while (MII != MBBI->instr_end()) {
304+
MachineInstr *MI = &*MII;
305+
++MII; // As MI might be removed.
306+
307+
if (llvm::find(QFPInst, MI->getOpcode()) != QFPInst.end())
308+
if (MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 &&
309+
MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) {
310+
LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump());
311+
if (optimizeQfp(MI, MBB)) {
312+
MI->eraseFromParent();
313+
LLVM_DEBUG(dbgs() << "\t....Removing....");
314+
Changed = true;
315+
}
316+
}
317+
}
318+
++MBBI;
319+
}
320+
return Changed;
321+
}

llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,8 @@ LLVMInitializeHexagonTarget() {
220220
initializeHexagonPeepholePass(PR);
221221
initializeHexagonSplitConst32AndConst64Pass(PR);
222222
initializeHexagonVectorPrintPass(PR);
223+
224+
initializeHexagonQFPoptimizerPass(PR);
223225
}
224226

225227
HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
@@ -386,6 +388,7 @@ bool HexagonPassConfig::addInstSelector() {
386388
addPass(createHexagonGenInsert());
387389
if (EnableEarlyIf)
388390
addPass(createHexagonEarlyIfConversion());
391+
addPass(createHexagonQFPoptimizer());
389392
}
390393

391394
return false;
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
; RUN: llc -mtriple=hexagon -mattr=+hvxv68,+hvx,+hvx-length128b < %s | FileCheck %s
2+
3+
; Test that the Qfloat optimization pass doesn't crash due to an invalid
4+
; instructions.
5+
6+
; CHECK: v{{[0-9]+}}.hf = v{{[0-9]:[0-9]}}.qf32
7+
8+
define void @test() local_unnamed_addr #0 {
9+
entry:
10+
br label %for.body
11+
12+
for.body:
13+
%optr.068 = phi <32 x i32>* [ %ptr, %entry ], [ %incdec.ptr6, %for.body ]
14+
%0 = tail call <32 x i32> @llvm.hexagon.V6.vconv.hf.qf32.128B(<64 x i32> %input64) #2
15+
%1 = tail call <32 x i32> @llvm.hexagon.V6.vdealh.128B(<32 x i32> %0) #2
16+
%2 = tail call <128 x i1> @llvm.hexagon.V6.vgth.128B(<32 x i32> %input32a, <32 x i32> %1) #2
17+
%3 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %2, <32 x i32> %input32b, <32 x i32> %input32c) #2
18+
%4 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %3, <32 x i32> %input32d) #2
19+
%5 = tail call <32 x i32> @llvm.hexagon.V6.vpackhub.sat.128B(<32 x i32> %input32e, <32 x i32> %4) #2
20+
store <32 x i32> %5, <32 x i32>* %optr.068, align 1
21+
%incdec.ptr6 = getelementptr inbounds <32 x i32>, <32 x i32>* %optr.068, i32 1
22+
br label %for.body
23+
}
24+
25+
declare <32 x i32> @llvm.hexagon.V6.vdealh.128B(<32 x i32>) #1
26+
declare <32 x i32> @llvm.hexagon.V6.vconv.hf.qf32.128B(<64 x i32>) #1
27+
declare <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32>, <32 x i32>) #1
28+
declare <32 x i32> @llvm.hexagon.V6.vpackhub.sat.128B(<32 x i32>, <32 x i32>) #1
29+
declare <128 x i1> @llvm.hexagon.V6.vgth.128B(<32 x i32>, <32 x i32>) #1
30+
declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1

0 commit comments

Comments
 (0)