Skip to content

Commit f71503f

Browse files
committed
Hexagon QFP Optimizer
Patch By: Fateme Hosseini Co-authored-by: Rahul Utkoor <[email protected]> Co-authored-by: Brendon Cahoon <[email protected]> Co-authored-by: abhikran <[email protected]> Co-authored-by: Sumanth Gundapaneni <[email protected]> Co-authored-by: Ikhlas Ajbar <[email protected]> Co-authored-by: Anirudh Sundar <[email protected]> Co-authored-by: Yashas Andaluri <[email protected]> Co-authored-by: quic-santdas <[email protected]> Change-Id: If5a2262fd017e3e959377df35dcd3e6fc9490e00
1 parent d58b5a6 commit f71503f

File tree

12 files changed

+666
-0
lines changed

12 files changed

+666
-0
lines changed

llvm/lib/Target/Hexagon/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ add_llvm_target(HexagonCodeGen
5454
HexagonOptAddrMode.cpp
5555
HexagonOptimizeSZextends.cpp
5656
HexagonPeephole.cpp
57+
HexagonQFPoptimizer.cpp
5758
HexagonRDFOpt.cpp
5859
HexagonRegisterInfo.cpp
5960
HexagonSelectionDAGInfo.cpp

llvm/lib/Target/Hexagon/Hexagon.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ void initializeHexagonPeepholePass(PassRegistry &);
6767
void initializeHexagonSplitConst32AndConst64Pass(PassRegistry &);
6868
void initializeHexagonVectorPrintPass(PassRegistry &);
6969

70+
void initializeHexagonQFPoptimizerPass(PassRegistry &);
71+
7072
Pass *createHexagonLoopIdiomPass();
7173
Pass *createHexagonVectorLoopCarriedReuseLegacyPass();
7274

@@ -112,6 +114,7 @@ FunctionPass *createHexagonVectorCombineLegacyPass();
112114
FunctionPass *createHexagonVectorPrint();
113115
FunctionPass *createHexagonVExtract();
114116
FunctionPass *createHexagonExpandCondsets();
117+
FunctionPass *createHexagonQFPoptimizer();
115118

116119
} // end namespace llvm;
117120

Lines changed: 323 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,323 @@
1+
//===----- HexagonQFPOptimizer.cpp - Qualcomm-FP to IEEE-FP conversions
2+
// optimizer ------------------===//
3+
//
4+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
//
8+
//===----------------------------------------------------------------------===//
9+
//
10+
// Basic infrastructure for optimizing intermediate conversion instructions
11+
// generated while performing vector floating point operations.
12+
// Currently run at the starting of the code generation for Hexagon, cleans
13+
// up redundant conversion instructions and replaces the uses of conversion
14+
// with appropriate machine operand. Liveness is preserved after this pass.
15+
//
16+
// @note: The redundant conversion instructions are not eliminated in this pass.
17+
// In this pass, we are only trying to replace the uses of conversion
18+
// instructions with its appropriate QFP instruction. We are leaving the job to
19+
// Dead instruction Elimination pass to remove redundant conversion
20+
// instructions.
21+
//
22+
// Brief overview of working of this QFP optimizer.
23+
// This version of Hexagon QFP optimizer basically iterates over each
24+
// instruction, checks whether if it belongs to hexagon floating point HVX
25+
// arithmetic instruction category(Add, Sub, Mul). And then it finds the unique
26+
// definition for the machine operands corresponding to the instruction.
27+
//
28+
// Example:
29+
// MachineInstruction *MI be the HVX vadd instruction
30+
// MI -> $v0 = V6_vadd_sf $v1, $v2
31+
// MachineOperand *DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg());
32+
// MachineOperand *DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg());
33+
//
34+
// In the above example, DefMI1 and DefMI2 gives the unique definitions
35+
// corresponding to the operands($v1 and &v2 respectively) of instruction MI.
36+
//
37+
// If both of the definitions are not conversion instructions(V6_vconv_sf_qf32,
38+
// V6_vconv_hf_qf16), then it will skip optimizing the current instruction and
39+
// iterates over next instruction.
40+
//
41+
// If one the definitions is conversion instruction then our pass will replace
42+
// the arithmetic instruction with its corresponding mix variant.
43+
// In the above example, if $v1 is conversion instruction
44+
// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3
45+
// After Transformation:
46+
// MI -> $v0 = V6_vadd_qf32_mix $v3, $v2 ($v1 is replaced with $v3)
47+
//
48+
// If both the definitions are conversion instructions then the instruction will
49+
// be replaced with its qf variant
50+
// In the above example, if $v1 and $v2 are conversion instructions
51+
// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3
52+
// DefMI2 -> $v2 = V6_vconv_sf_qf32 $v4
53+
// After Transformation:
54+
// MI -> $v0 = V6_vadd_qf32 $v3, $v4 ($v1 is replaced with $v3, $v2 is replaced
55+
// with $v4)
56+
//
57+
// Currently, in this pass, we are not handling the case when the definitions
58+
// are PHI inst.
59+
//
60+
//===----------------------------------------------------------------------===//
61+
#define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass"
62+
63+
#include "Hexagon.h"
64+
#include "HexagonInstrInfo.h"
65+
#include "HexagonSubtarget.h"
66+
#include "llvm/ADT/SmallVector.h"
67+
#include "llvm/ADT/StringRef.h"
68+
#include "llvm/CodeGen/MachineBasicBlock.h"
69+
#include "llvm/CodeGen/MachineFunction.h"
70+
#include "llvm/CodeGen/MachineFunctionPass.h"
71+
#include "llvm/CodeGen/MachineInstr.h"
72+
#include "llvm/CodeGen/MachineOperand.h"
73+
#include "llvm/CodeGen/Passes.h"
74+
#include "llvm/Pass.h"
75+
#include "llvm/Support/CommandLine.h"
76+
#include "llvm/Support/Debug.h"
77+
#include "llvm/Support/raw_ostream.h"
78+
#include <map>
79+
#include <vector>
80+
81+
#define DEBUG_TYPE "hexagon-qfp-optimizer"
82+
83+
using namespace llvm;
84+
85+
cl::opt<bool>
86+
DisableQFOptimizer("disable-qfp-opt", cl::init(false),
87+
cl::desc(
88+
"Disable optimization of Qfloat operations."));
89+
90+
std::vector<unsigned short> QFPInst = {
91+
Hexagon::V6_vadd_hf, Hexagon::V6_vadd_qf16,
92+
Hexagon::V6_vadd_qf16_mix, Hexagon::V6_vadd_qf32,
93+
Hexagon::V6_vadd_qf32_mix, Hexagon::V6_vadd_sf,
94+
Hexagon::V6_vconv_hf_qf16, Hexagon::V6_vconv_hf_qf32,
95+
Hexagon::V6_vconv_sf_qf32, Hexagon::V6_vmpy_qf16,
96+
Hexagon::V6_vmpy_qf16_hf, Hexagon::V6_vmpy_qf16_mix_hf,
97+
Hexagon::V6_vmpy_qf32, Hexagon::V6_vmpy_qf32_hf,
98+
Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16,
99+
Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vsub_hf,
100+
Hexagon::V6_vsub_qf16, Hexagon::V6_vsub_qf16_mix,
101+
Hexagon::V6_vsub_qf32, Hexagon::V6_vsub_qf32_mix,
102+
Hexagon::V6_vsub_sf};
103+
104+
std::map<unsigned short, unsigned short> QFPInstMap{
105+
{Hexagon::V6_vadd_hf, Hexagon::V6_vadd_qf16_mix},
106+
{Hexagon::V6_vadd_qf16_mix, Hexagon::V6_vadd_qf16},
107+
{Hexagon::V6_vadd_sf, Hexagon::V6_vadd_qf32_mix},
108+
{Hexagon::V6_vadd_qf32_mix, Hexagon::V6_vadd_qf32},
109+
{Hexagon::V6_vsub_hf, Hexagon::V6_vsub_qf16_mix},
110+
{Hexagon::V6_vsub_qf16_mix, Hexagon::V6_vsub_qf16},
111+
{Hexagon::V6_vsub_sf, Hexagon::V6_vsub_qf32_mix},
112+
{Hexagon::V6_vsub_qf32_mix, Hexagon::V6_vsub_qf32},
113+
{Hexagon::V6_vmpy_qf16_hf, Hexagon::V6_vmpy_qf16_mix_hf},
114+
{Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16},
115+
{Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf},
116+
{Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16},
117+
{Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}};
118+
119+
namespace llvm {
120+
121+
FunctionPass *createHexagonQFPoptimizer();
122+
void initializeHexagonQFPoptimizerPass(PassRegistry &);
123+
124+
} // namespace llvm
125+
126+
namespace {
127+
128+
struct HexagonQFPoptimizer : public MachineFunctionPass {
129+
public:
130+
static char ID;
131+
132+
HexagonQFPoptimizer() : MachineFunctionPass(ID) {}
133+
134+
bool runOnMachineFunction(MachineFunction &MF) override;
135+
136+
bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB);
137+
138+
StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; }
139+
140+
void getAnalysisUsage(AnalysisUsage &AU) const override {
141+
AU.setPreservesCFG();
142+
MachineFunctionPass::getAnalysisUsage(AU);
143+
}
144+
145+
private:
146+
const HexagonSubtarget *HST = nullptr;
147+
const HexagonInstrInfo *HII = nullptr;
148+
const MachineRegisterInfo *MRI = nullptr;
149+
};
150+
151+
char HexagonQFPoptimizer::ID = 0;
152+
} // namespace
153+
154+
INITIALIZE_PASS(HexagonQFPoptimizer, "hexagon-qfp-optimizer",
155+
HEXAGON_QFP_OPTIMIZER, false, false)
156+
157+
FunctionPass *llvm::createHexagonQFPoptimizer() {
158+
return new HexagonQFPoptimizer();
159+
}
160+
161+
bool HexagonQFPoptimizer::optimizeQfp(MachineInstr *MI,
162+
MachineBasicBlock *MBB) {
163+
if (MI->getNumOperands() < 3)
164+
return false;
165+
166+
unsigned Op0F = 0;
167+
unsigned Op1F = 0;
168+
unsigned short InstTy = QFPInstMap[MI->getOpcode()];
169+
170+
// Get the reaching defs of MI, DefMI1 and DefMI2
171+
MachineInstr *DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg());
172+
MachineInstr *DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg());
173+
MachineOperand &Res = MI->getOperand(0);
174+
MachineInstr *Inst1 = nullptr;
175+
MachineInstr *Inst2 = nullptr;
176+
LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: ";
177+
DefMI1->dump(); DefMI2->dump());
178+
179+
// Get the reaching defs of DefMI
180+
if (DefMI1->getNumOperands() > 1 && DefMI1->getOperand(1).isReg() &&
181+
DefMI1->getOperand(1).getReg().isVirtual())
182+
Inst1 = MRI->getVRegDef(DefMI1->getOperand(1).getReg());
183+
184+
if (DefMI2->getNumOperands() > 1 && DefMI2->getOperand(1).isReg() &&
185+
DefMI2->getOperand(1).getReg().isVirtual())
186+
Inst2 = MRI->getVRegDef(DefMI2->getOperand(1).getReg());
187+
188+
unsigned Def1OP = DefMI1->getOpcode();
189+
unsigned Def2OP = DefMI2->getOpcode();
190+
191+
MachineInstrBuilder MIB;
192+
// Check if the both the reaching defs of MI and qf to sf/hf conversions
193+
if ((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
194+
Def2OP == Hexagon::V6_vconv_sf_qf32) ||
195+
(Def1OP == Hexagon::V6_vconv_hf_qf16 &&
196+
Def2OP == Hexagon::V6_vconv_hf_qf16)) {
197+
198+
// If the reaching defs of DefMI are W register type, we return
199+
if ((Inst1 && MRI->getRegClass(Inst1->getOperand(0).getReg()) ==
200+
&Hexagon::HvxWRRegClass) || (Inst2 &&
201+
MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
202+
&Hexagon::HvxWRRegClass))
203+
return false;
204+
205+
// Analyze the use operands of the conversion to get their KILL status
206+
MachineOperand &Src1 = DefMI1->getOperand(1);
207+
MachineOperand &Src2 = DefMI2->getOperand(1);
208+
209+
Op0F = getKillRegState(Src1.isKill());
210+
Src1.setIsKill(false);
211+
212+
Op1F = getKillRegState(Src2.isKill());
213+
Src2.setIsKill(false);
214+
215+
if (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)
216+
InstTy = QFPInstMap[QFPInstMap[MI->getOpcode()]];
217+
218+
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
219+
HII->get(InstTy), Res.getReg())
220+
.addReg(Src1.getReg(), Op0F, Src1.getSubReg())
221+
.addReg(Src2.getReg(), Op1F, Src2.getSubReg());
222+
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
223+
return true;
224+
225+
// Check if left operand's reaching def is a conversion to sf/hf
226+
} else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
227+
Def2OP != Hexagon::V6_vconv_sf_qf32) ||
228+
(Def1OP == Hexagon::V6_vconv_hf_qf16 &&
229+
Def2OP != Hexagon::V6_vconv_hf_qf16)) &&
230+
!DefMI2->isPHI() &&
231+
(MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
232+
233+
if (Inst1 && MRI->getRegClass(Inst1->getOperand(0).getReg()) ==
234+
&Hexagon::HvxWRRegClass)
235+
return false;
236+
237+
MachineOperand &Src1 = DefMI1->getOperand(1);
238+
MachineOperand &Src2 = MI->getOperand(2);
239+
240+
Op0F = getKillRegState(Src1.isKill());
241+
Src1.setIsKill(false);
242+
Op1F = getKillRegState(Src2.isKill());
243+
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
244+
.addReg(Src1.getReg(), Op0F, Src1.getSubReg())
245+
.addReg(Src2.getReg(), Op1F, Src2.getSubReg());
246+
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
247+
return true;
248+
249+
// Check if right operand's reaching def is a conversion tp sf/hf
250+
} else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 &&
251+
Def2OP == Hexagon::V6_vconv_sf_qf32) ||
252+
(Def1OP != Hexagon::V6_vconv_hf_qf16 &&
253+
Def2OP == Hexagon::V6_vconv_hf_qf16)) &&
254+
!DefMI1->isPHI() &&
255+
(MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
256+
// The second operand of original instruction is converted.
257+
// In "mix" instructions, "qf" operand is always the first operand.
258+
259+
// Caveat: vsub is not commutative w.r.t operands.
260+
if (InstTy == Hexagon::V6_vsub_qf16_mix ||
261+
InstTy == Hexagon::V6_vsub_qf32_mix)
262+
return false;
263+
264+
if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
265+
&Hexagon::HvxWRRegClass)
266+
return false;
267+
268+
MachineOperand &Src1 = MI->getOperand(1);
269+
MachineOperand &Src2 = DefMI2->getOperand(1);
270+
271+
Op1F = getKillRegState(Src2.isKill());
272+
Src2.setIsKill(false);
273+
Op0F = getKillRegState(Src1.isKill());
274+
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
275+
.addReg(Src2.getReg(), Op1F,
276+
Src2.getSubReg()) // Notice the operands are flipped.
277+
.addReg(Src1.getReg(), Op0F, Src1.getSubReg());
278+
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
279+
return true;
280+
}
281+
282+
return false;
283+
}
284+
285+
bool HexagonQFPoptimizer::runOnMachineFunction(MachineFunction &MF) {
286+
287+
bool Changed = false;
288+
289+
if (DisableQFOptimizer)
290+
return Changed;
291+
292+
HST = &MF.getSubtarget<HexagonSubtarget>();
293+
if (!HST->useHVXV68Ops() || !HST->usePackets() ||
294+
skipFunction(MF.getFunction()))
295+
return false;
296+
HII = HST->getInstrInfo();
297+
MRI = &MF.getRegInfo();
298+
299+
MachineFunction::iterator MBBI = MF.begin();
300+
LLVM_DEBUG(dbgs() << "\n=== Running QFPOptimzer Pass for : " <<
301+
MF.getName() << " Optimize intermediate conversions ===\n");
302+
while (MBBI != MF.end()) {
303+
MachineBasicBlock *MBB = &*MBBI;
304+
MachineBasicBlock::iterator MII = MBBI->instr_begin();
305+
while (MII != MBBI->instr_end()) {
306+
MachineInstr *MI = &*MII;
307+
++MII; // As MI might be removed.
308+
309+
if (llvm::find(QFPInst, MI->getOpcode()) != QFPInst.end())
310+
if (MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 &&
311+
MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) {
312+
LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: ";MI->dump());
313+
if (optimizeQfp(MI, MBB)) {
314+
MI->eraseFromParent();
315+
LLVM_DEBUG(dbgs() << "\t....Removing....");
316+
Changed = true;
317+
}
318+
}
319+
}
320+
++MBBI;
321+
}
322+
return Changed;
323+
}

llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,8 @@ LLVMInitializeHexagonTarget() {
220220
initializeHexagonPeepholePass(PR);
221221
initializeHexagonSplitConst32AndConst64Pass(PR);
222222
initializeHexagonVectorPrintPass(PR);
223+
224+
initializeHexagonQFPoptimizerPass(PR);
223225
}
224226

225227
HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
@@ -386,6 +388,7 @@ bool HexagonPassConfig::addInstSelector() {
386388
addPass(createHexagonGenInsert());
387389
if (EnableEarlyIf)
388390
addPass(createHexagonEarlyIfConversion());
391+
addPass(createHexagonQFPoptimizer());
389392
}
390393

391394
return false;
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
; RUN: llc -mtriple=hexagon -mattr=+hvxv68,+hvx,+hvx-length128b < %s | FileCheck %s
2+
3+
; Test that the Qfloat optimization pass doesn't crash due to an invalid
4+
; instructions.
5+
6+
; CHECK: v{{[0-9]+}}.hf = v{{[0-9]:[0-9]}}.qf32
7+
8+
define void @test() local_unnamed_addr #0 {
9+
entry:
10+
br label %for.body
11+
12+
for.body:
13+
%optr.068 = phi <32 x i32>* [ undef, %entry ], [ %incdec.ptr6, %for.body ]
14+
%0 = tail call <32 x i32> @llvm.hexagon.V6.vconv.hf.qf32.128B(<64 x i32> undef) #2
15+
%1 = tail call <32 x i32> @llvm.hexagon.V6.vdealh.128B(<32 x i32> %0) #2
16+
%2 = tail call <128 x i1> @llvm.hexagon.V6.vgth.128B(<32 x i32> undef, <32 x i32> %1) #2
17+
%3 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %2, <32 x i32> undef, <32 x i32> undef) #2
18+
%4 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %3, <32 x i32> undef) #2
19+
%5 = tail call <32 x i32> @llvm.hexagon.V6.vpackhub.sat.128B(<32 x i32> undef, <32 x i32> %4) #2
20+
store <32 x i32> %5, <32 x i32>* %optr.068, align 1
21+
%incdec.ptr6 = getelementptr inbounds <32 x i32>, <32 x i32>* %optr.068, i32 1
22+
br label %for.body
23+
}
24+
25+
declare <32 x i32> @llvm.hexagon.V6.vdealh.128B(<32 x i32>) #1
26+
declare <32 x i32> @llvm.hexagon.V6.vconv.hf.qf32.128B(<64 x i32>) #1
27+
declare <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32>, <32 x i32>) #1
28+
declare <32 x i32> @llvm.hexagon.V6.vpackhub.sat.128B(<32 x i32>, <32 x i32>) #1
29+
declare <128 x i1> @llvm.hexagon.V6.vgth.128B(<32 x i32>, <32 x i32>) #1
30+
declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
31+
32+

0 commit comments

Comments
 (0)