Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,12 @@ class TargetTransformInfo {
/// this factor, it is very likely to be predicted correctly.
BranchProbability getPredictableBranchThreshold() const;

/// Returns estimated penalty of a branch misprediction in latency. Indicates
/// how aggressive the target wants for eliminating unpredictable branches. A
/// zero return value means extra optimization applied to them should be
/// minimal.
InstructionCost getBranchMispredictPenalty() const;

/// Return true if branch divergence exists.
///
/// Branch divergence has a significantly negative impact on GPU performance
Expand Down Expand Up @@ -1832,6 +1838,7 @@ class TargetTransformInfo::Concept {
ArrayRef<const Value *> Operands,
TargetCostKind CostKind) = 0;
virtual BranchProbability getPredictableBranchThreshold() = 0;
virtual InstructionCost getBranchMispredictPenalty() = 0;
virtual bool hasBranchDivergence(const Function *F = nullptr) = 0;
virtual bool isSourceOfDivergence(const Value *V) = 0;
virtual bool isAlwaysUniform(const Value *V) = 0;
Expand Down Expand Up @@ -2243,6 +2250,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
BranchProbability getPredictableBranchThreshold() override {
return Impl.getPredictableBranchThreshold();
}
InstructionCost getBranchMispredictPenalty() override {
return Impl.getBranchMispredictPenalty();
}
bool hasBranchDivergence(const Function *F = nullptr) override {
return Impl.hasBranchDivergence(F);
}
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ class TargetTransformInfoImplBase {
return BranchProbability(99, 100);
}

InstructionCost getBranchMispredictPenalty() const { return 0; }

bool hasBranchDivergence(const Function *F = nullptr) const { return false; }

bool isSourceOfDivergence(const Value *V) const { return false; }
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ struct SimplifyCFGOptions {
bool SinkCommonInsts = false;
bool SimplifyCondBranch = true;
bool SpeculateBlocks = true;
bool SpeculateUnpredictables = false;

AssumptionCache *AC = nullptr;

Expand Down Expand Up @@ -75,6 +76,10 @@ struct SimplifyCFGOptions {
SpeculateBlocks = B;
return *this;
}
SimplifyCFGOptions &speculateUnpredictables(bool B) {
SpeculateUnpredictables = B;
return *this;
}
};

} // namespace llvm
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,10 @@ BranchProbability TargetTransformInfo::getPredictableBranchThreshold() const {
: TTIImpl->getPredictableBranchThreshold();
}

InstructionCost TargetTransformInfo::getBranchMispredictPenalty() const {
return TTIImpl->getBranchMispredictPenalty();
}

bool TargetTransformInfo::hasBranchDivergence(const Function *F) const {
return TTIImpl->hasBranchDivergence(F);
}
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Passes/PassBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,8 @@ Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {
Result.hoistCommonInsts(Enable);
} else if (ParamName == "sink-common-insts") {
Result.sinkCommonInsts(Enable);
} else if (ParamName == "speculate-unpredictables") {
Result.speculateUnpredictables(Enable);
} else if (Enable && ParamName.consume_front("bonus-inst-threshold=")) {
APInt BonusInstThreshold;
if (ParamName.getAsInteger(0, BonusInstThreshold))
Expand Down
12 changes: 7 additions & 5 deletions llvm/lib/Passes/PassBuilderPipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1505,8 +1505,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,

// LoopSink (and other loop passes since the last simplifyCFG) might have
// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
OptimizePM.addPass(
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
.convertSwitchRangeToICmp(true)
.speculateUnpredictables(true)));

// Add the core optimizing pipeline.
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
Expand Down Expand Up @@ -2024,9 +2025,10 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
LateFPM.addPass(DivRemPairsPass());

// Delete basic blocks, which optimization passes may have killed.
LateFPM.addPass(SimplifyCFGPass(
SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts(
true)));
LateFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
.convertSwitchRangeToICmp(true)
.hoistCommonInsts(true)
.speculateUnpredictables(true)));
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));

// Drop bodies of available eternally objects to improve GlobalDCE.
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6756,3 +6756,8 @@ InstructionCost X86TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
return AM.Scale != 0;
return -1;
}

InstructionCost X86TTIImpl::getBranchMispredictPenalty() const {
// TODO: Hook MispredictPenalty of SchedMachineModel into this.
return 14;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How did you arrive at the value 14? Why is the TODO a TODO and not implemented in this PR?

}
2 changes: 2 additions & 0 deletions llvm/lib/Target/X86/X86TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,8 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
bool supportsEfficientVectorElementLoadStore() const;
bool enableInterleavedAccessVectorization();

InstructionCost getBranchMispredictPenalty() const;

private:
bool supportsGather() const;
InstructionCost getGSVectorCost(unsigned Opcode, TTI::TargetCostKind CostKind,
Expand Down
9 changes: 8 additions & 1 deletion llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ static cl::opt<bool> UserSinkCommonInsts(
"sink-common-insts", cl::Hidden, cl::init(false),
cl::desc("Sink common instructions (default = false)"));

static cl::opt<bool> UserSpeculateUnpredictables(
"speculate-unpredictables", cl::Hidden, cl::init(false),
cl::desc("Speculate unpredictable branches (default = false)"));

STATISTIC(NumSimpl, "Number of blocks simplified");

Expand Down Expand Up @@ -325,6 +328,8 @@ static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) {
Options.HoistCommonInsts = UserHoistCommonInsts;
if (UserSinkCommonInsts.getNumOccurrences())
Options.SinkCommonInsts = UserSinkCommonInsts;
if (UserSpeculateUnpredictables.getNumOccurrences())
Options.SpeculateUnpredictables = UserSpeculateUnpredictables;
}

SimplifyCFGPass::SimplifyCFGPass() {
Expand All @@ -351,7 +356,9 @@ void SimplifyCFGPass::printPipeline(
OS << (Options.HoistCommonInsts ? "" : "no-") << "hoist-common-insts;";
OS << (Options.SinkCommonInsts ? "" : "no-") << "sink-common-insts;";
OS << (Options.SpeculateBlocks ? "" : "no-") << "speculate-blocks;";
OS << (Options.SimplifyCondBranch ? "" : "no-") << "simplify-cond-branch";
OS << (Options.SimplifyCondBranch ? "" : "no-") << "simplify-cond-branch;";
OS << (Options.SpeculateUnpredictables ? "" : "no-")
<< "speculate-unpredictables";
OS << '>';
}

Expand Down
16 changes: 11 additions & 5 deletions llvm/lib/Transforms/Utils/SimplifyCFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3476,7 +3476,8 @@ static bool FoldCondBranchOnValueKnownInPredecessor(BranchInst *BI,
/// Given a BB that starts with the specified two-entry PHI node,
/// see if we can eliminate it.
static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
DomTreeUpdater *DTU, const DataLayout &DL) {
DomTreeUpdater *DTU, const DataLayout &DL,
bool SpeculateUnpredictables) {
// Ok, this is a two entry PHI node. Check to see if this is a simple "if
// statement", which has a very simple dominance structure. Basically, we
// are trying to find the condition that is being branched on, which
Expand Down Expand Up @@ -3508,7 +3509,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// jump to one specific 'then' block (if we have two of them).
// It isn't beneficial to speculatively execute the code
// from the block that we know is predictably not entered.
if (!DomBI->getMetadata(LLVMContext::MD_unpredictable)) {
bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
if (!IsUnpredictable) {
uint64_t TWeight, FWeight;
if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
(TWeight + FWeight) != 0) {
Expand Down Expand Up @@ -3551,6 +3553,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
InstructionCost Cost = 0;
InstructionCost Budget =
TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
if (SpeculateUnpredictables && IsUnpredictable)
Budget += TTI.getBranchMispredictPenalty();

bool Changed = false;
for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
Expand Down Expand Up @@ -3620,8 +3624,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
[](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
return Changed;

LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond
<< " T: " << IfTrue->getName()
LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
if (IsUnpredictable) dbgs() << " (unpredictable)";
dbgs() << " T: " << IfTrue->getName()
<< " F: " << IfFalse->getName() << "\n");

// If we can still promote the PHI nodes after this gauntlet of tests,
Expand Down Expand Up @@ -7814,7 +7819,8 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
// eliminate it, do so now.
if (auto *PN = dyn_cast<PHINode>(BB->begin()))
if (PN->getNumIncomingValues() == 2)
if (FoldTwoEntryPHINode(PN, TTI, DTU, DL))
if (FoldTwoEntryPHINode(PN, TTI, DTU, DL,
Options.SpeculateUnpredictables))
return true;
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Other/new-pm-print-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@
; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(print<stack-lifetime><may>,print<stack-lifetime><must>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-17
; CHECK-17: function(print<stack-lifetime><may>,print<stack-lifetime><must>)

; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-18
; CHECK-18: function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;no-switch-range-to-icmp;switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch>)
; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-18
; CHECK-18: function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;no-switch-range-to-icmp;switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)

; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only>,loop-vectorize<interleave-forced-only;vectorize-forced-only>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-19
; CHECK-19: function(loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,loop-vectorize<interleave-forced-only;vectorize-forced-only;>)
Expand Down
100 changes: 100 additions & 0 deletions llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-unpredictable.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
; Two-entry phi nodes with unpredictable conditions may get increased budget for folding.
; RUN: opt < %s -S -passes=simplifycfg | FileCheck --check-prefix=CHECK-NOFOLD %s
; RUN: opt < %s -S -passes='simplifycfg<speculate-unpredictables>' | FileCheck --check-prefix=CHECK-NOFOLD %s
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s -S -passes=simplifycfg | FileCheck --check-prefix=CHECK-NOFOLD %s
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s -S -passes='simplifycfg<speculate-unpredictables>' | FileCheck --check-prefix=CHECK-FOLD %s

define { <2 x float>, <2 x float> } @foo(float %arg, <2 x float> %arg1, <2 x float> %arg2) #0 {
; CHECK-NOFOLD-LABEL: define { <2 x float>, <2 x float> } @foo(
; CHECK-NOFOLD-SAME: float [[ARG:%.*]], <2 x float> [[ARG1:%.*]], <2 x float> [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NOFOLD-NEXT: [[BB:.*]]:
; CHECK-NOFOLD-NEXT: [[I:%.*]] = fcmp fast ogt float [[ARG]], 0x3F747AE140000000
; CHECK-NOFOLD-NEXT: br i1 [[I]], label %[[BB3:.*]], label %[[BB20:.*]], !unpredictable [[META0:![0-9]+]]
; CHECK-NOFOLD: [[BB3]]:
; CHECK-NOFOLD-NEXT: [[I4:%.*]] = extractelement <2 x float> [[ARG1]], i64 0
; CHECK-NOFOLD-NEXT: [[I5:%.*]] = fmul fast float [[I4]], [[I4]]
; CHECK-NOFOLD-NEXT: [[I6:%.*]] = extractelement <2 x float> [[ARG1]], i64 1
; CHECK-NOFOLD-NEXT: [[I7:%.*]] = fmul fast float [[I6]], [[I6]]
; CHECK-NOFOLD-NEXT: [[I8:%.*]] = fadd fast float [[I7]], [[I5]]
; CHECK-NOFOLD-NEXT: [[I9:%.*]] = extractelement <2 x float> [[ARG2]], i64 0
; CHECK-NOFOLD-NEXT: [[I10:%.*]] = fmul fast float [[I9]], [[I9]]
; CHECK-NOFOLD-NEXT: [[I11:%.*]] = fadd fast float [[I8]], [[I10]]
; CHECK-NOFOLD-NEXT: [[I12:%.*]] = tail call fast noundef float @llvm.sqrt.f32(float [[I11]])
; CHECK-NOFOLD-NEXT: [[I13:%.*]] = fdiv fast float 0x3FEFD70A40000000, [[I12]]
; CHECK-NOFOLD-NEXT: [[I14:%.*]] = fmul fast float [[I13]], [[I4]]
; CHECK-NOFOLD-NEXT: [[I15:%.*]] = insertelement <2 x float> poison, float [[I14]], i64 0
; CHECK-NOFOLD-NEXT: [[I16:%.*]] = fmul fast float [[I13]], [[I6]]
; CHECK-NOFOLD-NEXT: [[I17:%.*]] = insertelement <2 x float> [[I15]], float [[I16]], i64 1
; CHECK-NOFOLD-NEXT: [[I18:%.*]] = fmul fast float [[I13]], [[I9]]
; CHECK-NOFOLD-NEXT: [[I19:%.*]] = insertelement <2 x float> [[ARG2]], float [[I18]], i64 0
; CHECK-NOFOLD-NEXT: br label %[[BB20]]
; CHECK-NOFOLD: [[BB20]]:
; CHECK-NOFOLD-NEXT: [[I21:%.*]] = phi nsz <2 x float> [ [[I17]], %[[BB3]] ], [ zeroinitializer, %[[BB]] ]
; CHECK-NOFOLD-NEXT: [[I22:%.*]] = phi nsz <2 x float> [ [[I19]], %[[BB3]] ], [ zeroinitializer, %[[BB]] ]
; CHECK-NOFOLD-NEXT: [[I23:%.*]] = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> [[I21]], 0
; CHECK-NOFOLD-NEXT: [[I24:%.*]] = insertvalue { <2 x float>, <2 x float> } [[I23]], <2 x float> [[I22]], 1
; CHECK-NOFOLD-NEXT: ret { <2 x float>, <2 x float> } [[I24]]
;
; CHECK-FOLD-LABEL: define { <2 x float>, <2 x float> } @foo(
; CHECK-FOLD-SAME: float [[ARG:%.*]], <2 x float> [[ARG1:%.*]], <2 x float> [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-FOLD-NEXT: [[BB:.*:]]
; CHECK-FOLD-NEXT: [[I:%.*]] = fcmp fast ogt float [[ARG]], 0x3F747AE140000000
; CHECK-FOLD-NEXT: [[I4:%.*]] = extractelement <2 x float> [[ARG1]], i64 0
; CHECK-FOLD-NEXT: [[I5:%.*]] = fmul fast float [[I4]], [[I4]]
; CHECK-FOLD-NEXT: [[I6:%.*]] = extractelement <2 x float> [[ARG1]], i64 1
; CHECK-FOLD-NEXT: [[I7:%.*]] = fmul fast float [[I6]], [[I6]]
; CHECK-FOLD-NEXT: [[I8:%.*]] = fadd fast float [[I7]], [[I5]]
; CHECK-FOLD-NEXT: [[I9:%.*]] = extractelement <2 x float> [[ARG2]], i64 0
; CHECK-FOLD-NEXT: [[I10:%.*]] = fmul fast float [[I9]], [[I9]]
; CHECK-FOLD-NEXT: [[I11:%.*]] = fadd fast float [[I8]], [[I10]]
; CHECK-FOLD-NEXT: [[I12:%.*]] = tail call fast float @llvm.sqrt.f32(float [[I11]])
; CHECK-FOLD-NEXT: [[I13:%.*]] = fdiv fast float 0x3FEFD70A40000000, [[I12]]
; CHECK-FOLD-NEXT: [[I14:%.*]] = fmul fast float [[I13]], [[I4]]
; CHECK-FOLD-NEXT: [[I15:%.*]] = insertelement <2 x float> poison, float [[I14]], i64 0
; CHECK-FOLD-NEXT: [[I16:%.*]] = fmul fast float [[I13]], [[I6]]
; CHECK-FOLD-NEXT: [[I17:%.*]] = insertelement <2 x float> [[I15]], float [[I16]], i64 1
; CHECK-FOLD-NEXT: [[I18:%.*]] = fmul fast float [[I13]], [[I9]]
; CHECK-FOLD-NEXT: [[I19:%.*]] = insertelement <2 x float> [[ARG2]], float [[I18]], i64 0
; CHECK-FOLD-NEXT: [[I21:%.*]] = select nsz i1 [[I]], <2 x float> [[I17]], <2 x float> zeroinitializer, !unpredictable [[META0:![0-9]+]]
; CHECK-FOLD-NEXT: [[I22:%.*]] = select nsz i1 [[I]], <2 x float> [[I19]], <2 x float> zeroinitializer, !unpredictable [[META0]]
; CHECK-FOLD-NEXT: [[I23:%.*]] = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> [[I21]], 0
; CHECK-FOLD-NEXT: [[I24:%.*]] = insertvalue { <2 x float>, <2 x float> } [[I23]], <2 x float> [[I22]], 1
; CHECK-FOLD-NEXT: ret { <2 x float>, <2 x float> } [[I24]]
;
bb:
%i = fcmp fast ogt float %arg, 0x3F747AE140000000
br i1 %i, label %bb3, label %bb20, !unpredictable !0

bb3: ; preds = %bb
%i4 = extractelement <2 x float> %arg1, i64 0
%i5 = fmul fast float %i4, %i4
%i6 = extractelement <2 x float> %arg1, i64 1
%i7 = fmul fast float %i6, %i6
%i8 = fadd fast float %i7, %i5
%i9 = extractelement <2 x float> %arg2, i64 0
%i10 = fmul fast float %i9, %i9
%i11 = fadd fast float %i8, %i10
%i12 = tail call fast noundef float @llvm.sqrt.f32(float %i11)
%i13 = fdiv fast float 0x3FEFD70A40000000, %i12
%i14 = fmul fast float %i13, %i4
%i15 = insertelement <2 x float> poison, float %i14, i64 0
%i16 = fmul fast float %i13, %i6
%i17 = insertelement <2 x float> %i15, float %i16, i64 1
%i18 = fmul fast float %i13, %i9
%i19 = insertelement <2 x float> %arg2, float %i18, i64 0
br label %bb20

bb20: ; preds = %bb3, %bb
%i21 = phi nsz <2 x float> [ %i17, %bb3 ], [ zeroinitializer, %bb ]
%i22 = phi nsz <2 x float> [ %i19, %bb3 ], [ zeroinitializer, %bb ]
%i23 = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> %i21, 0
%i24 = insertvalue { <2 x float>, <2 x float> } %i23, <2 x float> %i22, 1
ret { <2 x float>, <2 x float> } %i24
}

declare float @llvm.sqrt.f32(float)

attributes #0 = { nounwind }

!0 = !{}