From 4ae12ca7c2dea0246c887bbacaa6c20a20aaaf36 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Thu, 29 Aug 2024 15:07:02 -0400 Subject: [PATCH 1/3] [LegacyPM][DirectX] Add the scalarizer pass for DXIL legalization --- llvm/include/llvm/InitializePasses.h | 1 + llvm/include/llvm/LinkAllPasses.h | 1 + .../llvm/Transforms/Scalar/Scalarizer.h | 14 ++++++++ llvm/lib/Target/DirectX/DXILOpLowering.cpp | 2 ++ .../Target/DirectX/DirectXTargetMachine.cpp | 4 +++ llvm/lib/Transforms/Scalar/Scalar.cpp | 1 + llvm/lib/Transforms/Scalar/Scalarizer.cpp | 32 ++++++++++++++++++- llvm/test/CodeGen/DirectX/sin.ll | 16 +++++----- 8 files changed, 62 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 6605c6fde9251..f8b9f42d9dbec 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -276,6 +276,7 @@ void initializeSafepointIRVerifierPass(PassRegistry &); void initializeSelectOptimizePass(PassRegistry &); void initializeScalarEvolutionWrapperPassPass(PassRegistry &); void initializeScalarizeMaskedMemIntrinLegacyPassPass(PassRegistry &); +void initializeScalarizerLegacyPassPass(PassRegistry&); void initializeScavengerTestPass(PassRegistry &); void initializeScopedNoAliasAAWrapperPassPass(PassRegistry &); void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 1da02153d846f..92b59a66567c9 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -130,6 +130,7 @@ struct ForcePassLinking { (void)llvm::createLowerAtomicPass(); (void)llvm::createLoadStoreVectorizerPass(); (void)llvm::createPartiallyInlineLibCallsPass(); + (void)llvm::createScalarizerPass(); (void)llvm::createSeparateConstOffsetFromGEPPass(); (void)llvm::createSpeculativeExecutionPass(); (void)llvm::createSpeculativeExecutionIfHasBranchDivergencePass(); diff --git a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h index 45e25cbf28214..7454f00c2ea35 100644 --- a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h +++ b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h @@ -19,6 +19,7 @@ #define LLVM_TRANSFORMS_SCALAR_SCALARIZER_H #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" #include namespace llvm { @@ -50,6 +51,19 @@ class ScalarizerPass : public PassInfoMixin { void setScalarizeLoadStore(bool Value) { Options.ScalarizeLoadStore = Value; } void setScalarizeMinBits(unsigned Value) { Options.ScalarizeMinBits = Value; } }; + +/// Create a legacy pass manager instance of the Scalarizer pass +FunctionPass *createScalarizerPass(); + +class ScalarizerLegacyPass : public FunctionPass { +public: + static char ID; + ScalarizerPassOptions Options; + ScalarizerLegacyPass(); + bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage& AU) const override; +}; + } #endif /* LLVM_TRANSFORMS_SCALAR_SCALARIZER_H */ diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index d98d0bfde04fc..32126612d1481 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -24,6 +24,7 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Transforms/Scalar/Scalarizer.h" #define DEBUG_TYPE "dxil-op-lower" @@ -521,6 +522,7 @@ class DXILOpLoweringLegacy : public ModulePass { static char ID; // Pass identification. void getAnalysisUsage(llvm::AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); } diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp index a29fc21042163..531e74eda04bc 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/InitializePasses.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/MC/MCSectionDXContainer.h" @@ -36,6 +37,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Transforms/Scalar/Scalarizer.h" #include using namespace llvm; @@ -44,6 +46,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() { RegisterTargetMachine X(getTheDirectXTarget()); auto *PR = PassRegistry::getPassRegistry(); initializeDXILIntrinsicExpansionLegacyPass(*PR); + initializeScalarizerLegacyPassPass(*PR); initializeDXILPrepareModulePass(*PR); initializeEmbedDXILPassPass(*PR); initializeWriteDXILPassPass(*PR); @@ -83,6 +86,7 @@ class DirectXPassConfig : public TargetPassConfig { FunctionPass *createTargetRegisterAllocator(bool) override { return nullptr; } void addCodeGenPrepare() override { addPass(createDXILIntrinsicExpansionLegacyPass()); + addPass(createScalarizerPass()); addPass(createDXILOpLoweringLegacyPass()); addPass(createDXILFinalizeLinkageLegacyPass()); addPass(createDXILTranslateMetadataLegacyPass()); diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index 7aeee1d31f7e7..fa6e671830d96 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -21,6 +21,7 @@ using namespace llvm; void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeConstantHoistingLegacyPassPass(Registry); initializeDCELegacyPassPass(Registry); + initializeScalarizerLegacyPassPass(Registry); initializeGVNLegacyPassPass(Registry); initializeEarlyCSELegacyPassPass(Registry); initializeEarlyCSEMemSSALegacyPassPass(Registry); diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index 2bed3480da1cd..ad441914428c4 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/Local.h" @@ -339,9 +340,25 @@ class ScalarizerVisitor : public InstVisitor { const bool ScalarizeLoadStore; const unsigned ScalarizeMinBits; }; - } // end anonymous namespace +ScalarizerLegacyPass::ScalarizerLegacyPass() : FunctionPass(ID) { + Options.ScalarizeVariableInsertExtract = true; + Options.ScalarizeLoadStore = true; +} + +void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage& AU) const { + AU.addRequired(); + AU.addPreserved(); +} + +char ScalarizerLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer", + "Scalarize vector operations", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer", + "Scalarize vector operations", false, false) + Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, const VectorSplit &VS, ValueVector *cachePtr) : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) { @@ -414,6 +431,19 @@ Value *Scatterer::operator[](unsigned Frag) { return CV[Frag]; } +bool ScalarizerLegacyPass::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + DominatorTree *DT = &getAnalysis().getDomTree(); + ScalarizerVisitor Impl(DT, Options); + return Impl.visit(F); +} + +FunctionPass *llvm::createScalarizerPass() { + return new ScalarizerLegacyPass(); +} + bool ScalarizerVisitor::visit(Function &F) { assert(Gathered.empty() && Scattered.empty()); diff --git a/llvm/test/CodeGen/DirectX/sin.ll b/llvm/test/CodeGen/DirectX/sin.ll index f309a36c6b8e6..79143bfa0a529 100644 --- a/llvm/test/CodeGen/DirectX/sin.ll +++ b/llvm/test/CodeGen/DirectX/sin.ll @@ -7,19 +7,19 @@ ; Function Attrs: noinline nounwind optnone define noundef float @sin_float(float noundef %a) #0 { entry: - %a.addr = alloca float, align 4 - store float %a, ptr %a.addr, align 4 - %0 = load float, ptr %a.addr, align 4 - %1 = call float @llvm.sin.f32(float %0) + %1 = call float @llvm.sin.f32(float %a) ret float %1 } ; Function Attrs: noinline nounwind optnone define noundef half @sin_half(half noundef %a) #0 { entry: - %a.addr = alloca half, align 2 - store half %a, ptr %a.addr, align 2 - %0 = load half, ptr %a.addr, align 2 - %1 = call half @llvm.sin.f16(half %0) + %1 = call half @llvm.sin.f16(half %a) ret half %1 } + +define noundef <4 x float> @sin_float4(<4 x float> noundef %a) #0 { +entry: + %2 = call <4 x float> @llvm.sin.v4f32(<4 x float> %a) + ret <4 x float> %2 +} From c554589afef069a3e957f1316898f1883e0ab636 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Wed, 4 Sep 2024 12:50:59 -0400 Subject: [PATCH 2/3] - Update tests - Modify constructor to take options --- llvm/include/llvm/InitializePasses.h | 2 +- .../llvm/Transforms/Scalar/Scalarizer.h | 15 ++---- llvm/lib/Target/DirectX/DXILOpLowering.cpp | 2 - .../Target/DirectX/DirectXTargetMachine.cpp | 7 ++- llvm/lib/Transforms/Scalar/Scalarizer.cpp | 27 ++++++---- llvm/test/CodeGen/DirectX/acos.ll | 25 ++++++++-- llvm/test/CodeGen/DirectX/asin.ll | 25 ++++++++-- llvm/test/CodeGen/DirectX/atan.ll | 25 ++++++++-- llvm/test/CodeGen/DirectX/ceil.ll | 21 +++++++- llvm/test/CodeGen/DirectX/cos.ll | 21 +++++++- llvm/test/CodeGen/DirectX/cosh.ll | 25 ++++++++-- llvm/test/CodeGen/DirectX/exp2.ll | 50 +++++++++++-------- llvm/test/CodeGen/DirectX/fabs.ll | 22 +++++++- llvm/test/CodeGen/DirectX/floor.ll | 23 ++++++++- llvm/test/CodeGen/DirectX/isinf.ll | 24 ++++----- llvm/test/CodeGen/DirectX/reversebits.ll | 21 +++++++- llvm/test/CodeGen/DirectX/round.ll | 22 +++++++- llvm/test/CodeGen/DirectX/saturate.ll | 17 +++---- llvm/test/CodeGen/DirectX/scalar-store.ll | 17 +++++++ .../DirectX/scalarization_pass_order.ll | 45 +++++++++++++++++ llvm/test/CodeGen/DirectX/sin.ll | 30 ++++++++--- llvm/test/CodeGen/DirectX/sinh.ll | 25 ++++++++-- llvm/test/CodeGen/DirectX/sqrt.ll | 25 ++++++++-- llvm/test/CodeGen/DirectX/tan.ll | 21 +++++++- llvm/test/CodeGen/DirectX/tanh.ll | 21 +++++++- llvm/test/CodeGen/DirectX/trunc.ll | 21 +++++++- llvm/tools/opt/optdriver.cpp | 1 + 27 files changed, 472 insertions(+), 108 deletions(-) create mode 100644 llvm/test/CodeGen/DirectX/scalar-store.ll create mode 100644 llvm/test/CodeGen/DirectX/scalarization_pass_order.ll diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index f8b9f42d9dbec..4352099d6dbb9 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -276,7 +276,7 @@ void initializeSafepointIRVerifierPass(PassRegistry &); void initializeSelectOptimizePass(PassRegistry &); void initializeScalarEvolutionWrapperPassPass(PassRegistry &); void initializeScalarizeMaskedMemIntrinLegacyPassPass(PassRegistry &); -void initializeScalarizerLegacyPassPass(PassRegistry&); +void initializeScalarizerLegacyPassPass(PassRegistry &); void initializeScavengerTestPass(PassRegistry &); void initializeScopedNoAliasAAWrapperPassPass(PassRegistry &); void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &); diff --git a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h index 7454f00c2ea35..4d2a1a2f889a3 100644 --- a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h +++ b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h @@ -19,12 +19,12 @@ #define LLVM_TRANSFORMS_SCALAR_SCALARIZER_H #include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" #include namespace llvm { class Function; +class FunctionPass; struct ScalarizerPassOptions { // These options correspond 1:1 to cl::opt options defined in @@ -53,17 +53,8 @@ class ScalarizerPass : public PassInfoMixin { }; /// Create a legacy pass manager instance of the Scalarizer pass -FunctionPass *createScalarizerPass(); - -class ScalarizerLegacyPass : public FunctionPass { -public: - static char ID; - ScalarizerPassOptions Options; - ScalarizerLegacyPass(); - bool runOnFunction(Function &F) override; - void getAnalysisUsage(AnalysisUsage& AU) const override; -}; - +FunctionPass *createScalarizerPass( + const ScalarizerPassOptions &Options = ScalarizerPassOptions()); } #endif /* LLVM_TRANSFORMS_SCALAR_SCALARIZER_H */ diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index 32126612d1481..d98d0bfde04fc 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -24,7 +24,6 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Transforms/Scalar/Scalarizer.h" #define DEBUG_TYPE "dxil-op-lower" @@ -522,7 +521,6 @@ class DXILOpLoweringLegacy : public ModulePass { static char ID; // Pass identification. void getAnalysisUsage(llvm::AnalysisUsage &AU) const override { AU.addRequired(); - AU.addRequired(); AU.addRequired(); AU.addPreserved(); } diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp index 531e74eda04bc..f021e24ac7e26 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -26,9 +26,9 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/InitializePasses.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCSectionDXContainer.h" #include "llvm/MC/SectionKind.h" #include "llvm/MC/TargetRegistry.h" @@ -86,7 +86,10 @@ class DirectXPassConfig : public TargetPassConfig { FunctionPass *createTargetRegisterAllocator(bool) override { return nullptr; } void addCodeGenPrepare() override { addPass(createDXILIntrinsicExpansionLegacyPass()); - addPass(createScalarizerPass()); + ScalarizerPassOptions DxilScalarOptions; + // The only non-default option we need to set is ScalarizeLoadStore. + DxilScalarOptions.ScalarizeLoadStore = true; + addPass(createScalarizerPass(DxilScalarOptions)); addPass(createDXILOpLoweringLegacyPass()); addPass(createDXILFinalizeLinkageLegacyPass()); addPass(createDXILTranslateMetadataLegacyPass()); diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index ad441914428c4..01d24335df226 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -340,16 +340,25 @@ class ScalarizerVisitor : public InstVisitor { const bool ScalarizeLoadStore; const unsigned ScalarizeMinBits; }; + +class ScalarizerLegacyPass : public FunctionPass { +public: + static char ID; + ScalarizerPassOptions Options; + ScalarizerLegacyPass() : FunctionPass(ID), Options() {} + ScalarizerLegacyPass(const ScalarizerPassOptions &Options); + bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + } // end anonymous namespace -ScalarizerLegacyPass::ScalarizerLegacyPass() : FunctionPass(ID) { - Options.ScalarizeVariableInsertExtract = true; - Options.ScalarizeLoadStore = true; -} +ScalarizerLegacyPass::ScalarizerLegacyPass(const ScalarizerPassOptions &Options) + : FunctionPass(ID), Options(Options) {} -void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage& AU) const { - AU.addRequired(); - AU.addPreserved(); +void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreserved(); } char ScalarizerLegacyPass::ID = 0; @@ -440,8 +449,8 @@ bool ScalarizerLegacyPass::runOnFunction(Function &F) { return Impl.visit(F); } -FunctionPass *llvm::createScalarizerPass() { - return new ScalarizerLegacyPass(); +FunctionPass *llvm::createScalarizerPass(const ScalarizerPassOptions &Options) { + return new ScalarizerLegacyPass(Options); } bool ScalarizerVisitor::visit(Function &F) { diff --git a/llvm/test/CodeGen/DirectX/acos.ll b/llvm/test/CodeGen/DirectX/acos.ll index cc32182395627..f4a10eb368ebf 100644 --- a/llvm/test/CodeGen/DirectX/acos.ll +++ b/llvm/test/CodeGen/DirectX/acos.ll @@ -1,20 +1,39 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for acos are generated for float and half. -define noundef float @tan_float(float noundef %a) { +define noundef float @acos_float(float noundef %a) { entry: ; CHECK:call float @dx.op.unary.f32(i32 15, float %{{.*}}) %elt.acos = call float @llvm.acos.f32(float %a) ret float %elt.acos } -define noundef half @tan_half(half noundef %a) { +define noundef half @acos_half(half noundef %a) { entry: ; CHECK:call half @dx.op.unary.f16(i32 15, half %{{.*}}) %elt.acos = call half @llvm.acos.f16(half %a) ret half %elt.acos } +define noundef <4 x float> @acos_float4(<4 x float> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.acos.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.acos.f16(half) declare float @llvm.acos.f32(float) +declare <4 x float> @llvm.acos.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/asin.ll b/llvm/test/CodeGen/DirectX/asin.ll index 06e3bab545a6a..bd948f593c24e 100644 --- a/llvm/test/CodeGen/DirectX/asin.ll +++ b/llvm/test/CodeGen/DirectX/asin.ll @@ -1,20 +1,39 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for asin are generated for float and half. -define noundef float @tan_float(float noundef %a) { +define noundef float @asin_float(float noundef %a) { entry: ; CHECK:call float @dx.op.unary.f32(i32 16, float %{{.*}}) %elt.asin = call float @llvm.asin.f32(float %a) ret float %elt.asin } -define noundef half @tan_half(half noundef %a) { +define noundef half @asin_half(half noundef %a) { entry: ; CHECK:call half @dx.op.unary.f16(i32 16, half %{{.*}}) %elt.asin = call half @llvm.asin.f16(half %a) ret half %elt.asin } +define noundef <4 x float> @asin_float4(<4 x float> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.asin.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.asin.f16(half) declare float @llvm.asin.f32(float) +declare <4 x float> @llvm.asin.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/atan.ll b/llvm/test/CodeGen/DirectX/atan.ll index d7c4cd00e286a..58899ab49bdb8 100644 --- a/llvm/test/CodeGen/DirectX/atan.ll +++ b/llvm/test/CodeGen/DirectX/atan.ll @@ -1,20 +1,39 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for atan are generated for float and half. -define noundef float @tan_float(float noundef %a) { +define noundef float @atan_float(float noundef %a) { entry: ; CHECK:call float @dx.op.unary.f32(i32 17, float %{{.*}}) %elt.atan = call float @llvm.atan.f32(float %a) ret float %elt.atan } -define noundef half @tan_half(half noundef %a) { +define noundef half @atan_half(half noundef %a) { entry: ; CHECK:call half @dx.op.unary.f16(i32 17, half %{{.*}}) %elt.atan = call half @llvm.atan.f16(half %a) ret half %elt.atan } +define noundef <4 x float> @atan_float4(<4 x float> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.atan.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.atan.f16(half) declare float @llvm.atan.f32(float) +declare <4 x float> @llvm.atan.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/ceil.ll b/llvm/test/CodeGen/DirectX/ceil.ll index 48bc5495a8e05..bd6e747c2fbf5 100644 --- a/llvm/test/CodeGen/DirectX/ceil.ll +++ b/llvm/test/CodeGen/DirectX/ceil.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for ceil are generated for float and half. @@ -16,5 +16,24 @@ entry: ret half %elt.ceil } +define noundef <4 x float> @ceil_float4(<4 x float> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.ceil.f16(half) declare float @llvm.ceil.f32(float) +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/cos.ll b/llvm/test/CodeGen/DirectX/cos.ll index 72f4bfca23f9d..85f5db25570b9 100644 --- a/llvm/test/CodeGen/DirectX/cos.ll +++ b/llvm/test/CodeGen/DirectX/cos.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for cos are generated for float and half. @@ -16,5 +16,24 @@ entry: ret half %elt.cos } +define noundef <4 x float> @cos_float4(<4 x float> noundef %a) #0 { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.cos.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.cos.f16(half) declare float @llvm.cos.f32(float) +declare <4 x float> @llvm.cos.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/cosh.ll b/llvm/test/CodeGen/DirectX/cosh.ll index 91aaf893f3997..670a8a3eae086 100644 --- a/llvm/test/CodeGen/DirectX/cosh.ll +++ b/llvm/test/CodeGen/DirectX/cosh.ll @@ -1,20 +1,39 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for cosh are generated for float and half. -define noundef float @tan_float(float noundef %a) { +define noundef float @cosh_float(float noundef %a) { entry: ; CHECK:call float @dx.op.unary.f32(i32 18, float %{{.*}}) %elt.cosh = call float @llvm.cosh.f32(float %a) ret float %elt.cosh } -define noundef half @tan_half(half noundef %a) { +define noundef half @cosh_half(half noundef %a) { entry: ; CHECK:call half @dx.op.unary.f16(i32 18, half %{{.*}}) %elt.cosh = call half @llvm.cosh.f16(half %a) ret half %elt.cosh } +define noundef <4 x float> @cosh_float4(<4 x float> noundef %a) #0 { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.cosh.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.cosh.f16(half) declare float @llvm.cosh.f32(float) +declare <4 x float> @llvm.cosh.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/exp2.ll b/llvm/test/CodeGen/DirectX/exp2.ll index b70b87dedc4d1..6d16af6a5413e 100644 --- a/llvm/test/CodeGen/DirectX/exp2.ll +++ b/llvm/test/CodeGen/DirectX/exp2.ll @@ -1,31 +1,39 @@ -; RUN: opt -S -dxil-op-lower < %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.7-library %s | FileCheck %s ; Make sure dxil operation function calls for exp2 are generated for float and half. -; CHECK:call float @dx.op.unary.f32(i32 21, float %{{.*}}) -; CHECK:call half @dx.op.unary.f16(i32 21, half %{{.*}}) -target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" -target triple = "dxil-pc-shadermodel6.7-library" - -; Function Attrs: noinline nounwind optnone -define noundef float @exp2_float(float noundef %a) #0 { +define noundef float @exp2_float(float noundef %a) { entry: - %a.addr = alloca float, align 4 - store float %a, ptr %a.addr, align 4 - %0 = load float, ptr %a.addr, align 4 - %elt.exp2 = call float @llvm.exp2.f32(float %0) + ; CHECK:call float @dx.op.unary.f32(i32 21, float %{{.*}}) + %elt.exp2 = call float @llvm.exp2.f32(float %a) ret float %elt.exp2 } -; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn -declare float @llvm.exp2.f32(float) #1 - -; Function Attrs: noinline nounwind optnone -define noundef half @exp2_half(half noundef %a) #0 { +define noundef half @exp2_half(half noundef %a) { entry: - %a.addr = alloca half, align 2 - store half %a, ptr %a.addr, align 2 - %0 = load half, ptr %a.addr, align 2 - %elt.exp2 = call half @llvm.exp2.f16(half %0) + ; CHECK:call half @dx.op.unary.f16(i32 21, half %{{.*}}) + %elt.exp2 = call half @llvm.exp2.f16(half %a) ret half %elt.exp2 } + +define noundef <4 x float> @exp2_float4(<4 x float> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 21, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 21, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 21, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 21, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + +declare float @llvm.exp2.f32(float) +declare half @llvm.exp2.f16(half) +declare <4 x float> @llvm.exp2.v4f32(<4 x float> %a) diff --git a/llvm/test/CodeGen/DirectX/fabs.ll b/llvm/test/CodeGen/DirectX/fabs.ll index becbdf8d68aeb..6d903f1c927ac 100644 --- a/llvm/test/CodeGen/DirectX/fabs.ll +++ b/llvm/test/CodeGen/DirectX/fabs.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for abs are generated for float, half, and double. @@ -27,6 +27,26 @@ entry: ret double %elt.abs } +; CHECK-LABEL: fabs_float4 +define noundef <4 x float> @fabs_float4(<4 x float> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 6, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 6, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 6, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 6, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.fabs.f16(half) declare float @llvm.fabs.f32(float) declare double @llvm.fabs.f64(double) +declare <4 x float> @llvm.fabs.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/floor.ll b/llvm/test/CodeGen/DirectX/floor.ll index f79f160e51e3b..8ad81e1459a5b 100644 --- a/llvm/test/CodeGen/DirectX/floor.ll +++ b/llvm/test/CodeGen/DirectX/floor.ll @@ -2,19 +2,38 @@ ; Make sure dxil operation function calls for floor are generated for float and half. -define noundef float @floor_float(float noundef %a) #0 { +define noundef float @floor_float(float noundef %a) { entry: ; CHECK:call float @dx.op.unary.f32(i32 27, float %{{.*}}) %elt.floor = call float @llvm.floor.f32(float %a) ret float %elt.floor } -define noundef half @floor_half(half noundef %a) #0 { +define noundef half @floor_half(half noundef %a) { entry: ; CHECK:call half @dx.op.unary.f16(i32 27, half %{{.*}}) %elt.floor = call half @llvm.floor.f16(half %a) ret half %elt.floor } +define noundef <4 x float> @floor_float4(<4 x float> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.floor.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.floor.f16(half) declare float @llvm.floor.f32(float) +declare <4 x float> @llvm.floor.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/isinf.ll b/llvm/test/CodeGen/DirectX/isinf.ll index 295776b089347..03a00c40498d5 100644 --- a/llvm/test/CodeGen/DirectX/isinf.ll +++ b/llvm/test/CodeGen/DirectX/isinf.ll @@ -1,25 +1,21 @@ ; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for isinf are generated for float and half. -; CHECK: call i1 @dx.op.isSpecialFloat.f32(i32 9, float %{{.*}}) -; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half %{{.*}}) -; Function Attrs: noinline nounwind optnone -define noundef i1 @isinf_float(float noundef %a) #0 { +define noundef i1 @isinf_float(float noundef %a) { entry: - %a.addr = alloca float, align 4 - store float %a, ptr %a.addr, align 4 - %0 = load float, ptr %a.addr, align 4 - %dx.isinf = call i1 @llvm.dx.isinf.f32(float %0) + ; CHECK: call i1 @dx.op.isSpecialFloat.f32(i32 9, float %{{.*}}) + %dx.isinf = call i1 @llvm.dx.isinf.f32(float %a) ret i1 %dx.isinf } -; Function Attrs: noinline nounwind optnone -define noundef i1 @isinf_half(half noundef %p0) #0 { +define noundef i1 @isinf_half(half noundef %a) { entry: - %p0.addr = alloca half, align 2 - store half %p0, ptr %p0.addr, align 2 - %0 = load half, ptr %p0.addr, align 2 - %dx.isinf = call i1 @llvm.dx.isinf.f16(half %0) + ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half %{{.*}}) + %dx.isinf = call i1 @llvm.dx.isinf.f16(half %a) ret i1 %dx.isinf } + + +declare i1 @llvm.dx.isinf.f16(half) +declare i1 @llvm.dx.isinf.f32(float) diff --git a/llvm/test/CodeGen/DirectX/reversebits.ll b/llvm/test/CodeGen/DirectX/reversebits.ll index 1ade57b40100f..b5530d0850e66 100644 --- a/llvm/test/CodeGen/DirectX/reversebits.ll +++ b/llvm/test/CodeGen/DirectX/reversebits.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for reversebits are generated for all integer types. @@ -26,6 +26,25 @@ entry: ret i64 %elt.bitreverse } +define noundef <4 x i32> @round_int324(<4 x i32> noundef %a) #0 { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee3]]) + ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie3]], i64 3 + %2 = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a) + ret <4 x i32> %2 +} + declare i16 @llvm.bitreverse.i16(i16) declare i32 @llvm.bitreverse.i32(i32) declare i64 @llvm.bitreverse.i64(i64) +declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) diff --git a/llvm/test/CodeGen/DirectX/round.ll b/llvm/test/CodeGen/DirectX/round.ll index db953fb29c204..b08cbac5f42e9 100644 --- a/llvm/test/CodeGen/DirectX/round.ll +++ b/llvm/test/CodeGen/DirectX/round.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for round are generated for float and half. @@ -18,5 +18,25 @@ entry: ret float %elt.roundeven } +define noundef <4 x float> @round_float4(<4 x float> noundef %a) #0 { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + + declare half @llvm.roundeven.f16(half) declare float @llvm.roundeven.f32(float) +declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/saturate.ll b/llvm/test/CodeGen/DirectX/saturate.ll index a8557351756f2..404cab7b665d0 100644 --- a/llvm/test/CodeGen/DirectX/saturate.ll +++ b/llvm/test/CodeGen/DirectX/saturate.ll @@ -2,7 +2,7 @@ ; Make sure the intrinsic dx.saturate is to appropriate DXIL op for half/float/double data types. ; CHECK-LABEL: test_saturate_half -define noundef half @test_saturate_half(half noundef %p0) #0 { +define noundef half @test_saturate_half(half noundef %p0) { entry: ; CHECK: call half @dx.op.unary.f16(i32 7, half %p0) %hlsl.saturate = call half @llvm.dx.saturate.f16(half %p0) @@ -10,11 +10,8 @@ entry: ret half %hlsl.saturate } -; Function Attrs: nocallback nofree nosync nounwind willreturn -declare half @llvm.dx.saturate.f16(half) #1 - ; CHECK-LABEL: test_saturate_float -define noundef float @test_saturate_float(float noundef %p0) #0 { +define noundef float @test_saturate_float(float noundef %p0) { entry: ; CHECK: call float @dx.op.unary.f32(i32 7, float %p0) %hlsl.saturate = call float @llvm.dx.saturate.f32(float %p0) @@ -22,11 +19,8 @@ entry: ret float %hlsl.saturate } -; Function Attrs: nocallback nofree nosync nounwind willreturn -declare float @llvm.dx.saturate.f32(float) #1 - ; CHECK-LABEL: test_saturate_double -define noundef double @test_saturate_double(double noundef %p0) #0 { +define noundef double @test_saturate_double(double noundef %p0) { entry: ; CHECK: call double @dx.op.unary.f64(i32 7, double %p0) %hlsl.saturate = call double @llvm.dx.saturate.f64(double %p0) @@ -34,6 +28,7 @@ entry: ret double %hlsl.saturate } -; Function Attrs: nocallback nofree nosync nounwind willreturn -declare double @llvm.dx.saturate.f64(double) #1 +declare half @llvm.dx.saturate.f16(half) +declare float @llvm.dx.saturate.f32(float) +declare double @llvm.dx.saturate.f64(double) diff --git a/llvm/test/CodeGen/DirectX/scalar-store.ll b/llvm/test/CodeGen/DirectX/scalar-store.ll new file mode 100644 index 0000000000000..b970a2842e5a8 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/scalar-store.ll @@ -0,0 +1,17 @@ +; RUN: opt -S -scalarizer -scalarize-load-store -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s + +@"sharedData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16 +; CHECK-LABEL: store_test +define void @store_test () local_unnamed_addr { + ; CHECK: store float 1.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} + ; CHECK: store float 2.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} + ; CHECK: store float 3.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} + ; CHECK: store float 2.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} + ; CHECK: store float 4.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} + ; CHECK: store float 6.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} + + store <3 x float> , ptr addrspace(3) @"sharedData", align 16 + store <3 x float> , ptr addrspace(3) getelementptr inbounds (i8, ptr addrspace(3) @"sharedData", i32 16), align 16 + ret void + } diff --git a/llvm/test/CodeGen/DirectX/scalarization_pass_order.ll b/llvm/test/CodeGen/DirectX/scalarization_pass_order.ll new file mode 100644 index 0000000000000..f33c2a7ccdcef --- /dev/null +++ b/llvm/test/CodeGen/DirectX/scalarization_pass_order.ll @@ -0,0 +1,45 @@ +; RUN: llc -mtriple=dxil-pc-shadermodel6.3-library -debug-pass=Structure < %s -o /dev/null 2>&1 | \ +; RUN: grep -v "Verify generated machine code" | FileCheck %s +; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s --check-prefixes=CHECKIR +; CHECK-LABEL: Pass Arguments: +; CHECK-NEXT: Target Library Information +; CHECK-NEXT: ModulePass Manager +; CHECK-NEXT: DXIL Intrinsic Expansion +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Scalarize vector operations +; CHECK-NEXT: DXIL Intrinsic Expansion +; CHECK-NEXT: DXIL Resource analysis +; CHECK-NEXT: DXIL Op Lowering +; CHECK-NEXT: DXIL Finalize Linkage +; CHECK-NEXT: DXIL Resource analysis +; CHECK-NEXT: DXIL resource Information +; CHECK-NEXT: DXIL Shader Flag Analysis +; CHECK-NEXT: DXIL Translate Metadata +; CHECK-NEXT: DXIL Prepare Module +; CHECK-NEXT: DXIL Resource analysis +; CHECK-NEXT: DXIL Metadata Pretty Printer +; CHECK-NEXT: Print Module IR +; CHECKIR: target triple = "dxilv1.3-pc-shadermodel6.3-library" +; CHECKIR-LABEL: cos_sin_float_test +define noundef <4 x float> @cos_sin_float_test(<4 x float> noundef %a) { + ; CHECKIR: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECKIR: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee0]]) + ; CHECKIR: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECKIR: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee1]]) + ; CHECKIR: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECKIR: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee2]]) + ; CHECKIR: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECKIR: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee3]]) + ; CHECKIR: [[ie4:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie0]]) + ; CHECKIR: [[ie5:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie1]]) + ; CHECKIR: [[ie6:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie2]]) + ; CHECKIR: [[ie7:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie3]]) + ; CHECKIR: insertelement <4 x float> poison, float [[ie4]], i64 0 + ; CHECKIR: insertelement <4 x float> %{{.*}}, float [[ie5]], i64 1 + ; CHECKIR: insertelement <4 x float> %{{.*}}, float [[ie6]], i64 2 + ; CHECKIR: insertelement <4 x float> %{{.*}}, float [[ie7]], i64 3 + %2 = tail call <4 x float> @llvm.sin.v4f32(<4 x float> %a) + %3 = tail call <4 x float> @llvm.cos.v4f32(<4 x float> %2) + ret <4 x float> %3 +} diff --git a/llvm/test/CodeGen/DirectX/sin.ll b/llvm/test/CodeGen/DirectX/sin.ll index 79143bfa0a529..a0b0d2675e3b7 100644 --- a/llvm/test/CodeGen/DirectX/sin.ll +++ b/llvm/test/CodeGen/DirectX/sin.ll @@ -1,25 +1,39 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for sin are generated for float and half. -; CHECK:call float @dx.op.unary.f32(i32 13, float %{{.*}}) -; CHECK:call half @dx.op.unary.f16(i32 13, half %{{.*}}) -; Function Attrs: noinline nounwind optnone -define noundef float @sin_float(float noundef %a) #0 { +define noundef float @sin_float(float noundef %a) { entry: + ; CHECK:call float @dx.op.unary.f32(i32 13, float %{{.*}}) %1 = call float @llvm.sin.f32(float %a) ret float %1 } -; Function Attrs: noinline nounwind optnone -define noundef half @sin_half(half noundef %a) #0 { +define noundef half @sin_half(half noundef %a) { entry: + ; CHECK:call half @dx.op.unary.f16(i32 13, half %{{.*}}) %1 = call half @llvm.sin.f16(half %a) ret half %1 } -define noundef <4 x float> @sin_float4(<4 x float> noundef %a) #0 { +define noundef <4 x float> @sin_float4(<4 x float> noundef %a) { entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 %2 = call <4 x float> @llvm.sin.v4f32(<4 x float> %a) ret <4 x float> %2 } + +declare half @llvm.sin.f16(half) +declare float @llvm.sin.f32(float) +declare <4 x float> @llvm.sin.v4f32(<4 x float>) \ No newline at end of file diff --git a/llvm/test/CodeGen/DirectX/sinh.ll b/llvm/test/CodeGen/DirectX/sinh.ll index d4d3eda9eccb6..deba726e8d9ad 100644 --- a/llvm/test/CodeGen/DirectX/sinh.ll +++ b/llvm/test/CodeGen/DirectX/sinh.ll @@ -1,20 +1,39 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for sinh are generated for float and half. -define noundef float @tan_float(float noundef %a) { +define noundef float @sinh_float(float noundef %a) { entry: ; CHECK:call float @dx.op.unary.f32(i32 19, float %{{.*}}) %elt.sinh = call float @llvm.sinh.f32(float %a) ret float %elt.sinh } -define noundef half @tan_half(half noundef %a) { +define noundef half @sinh_half(half noundef %a) { entry: ; CHECK:call half @dx.op.unary.f16(i32 19, half %{{.*}}) %elt.sinh = call half @llvm.sinh.f16(half %a) ret half %elt.sinh } +define noundef <4 x float> @sinh_float4(<4 x float> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.sinh.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.sinh.f16(half) declare float @llvm.sinh.f32(float) +declare <4 x float> @llvm.sinh.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/sqrt.ll b/llvm/test/CodeGen/DirectX/sqrt.ll index 792fbc8d0614d..e2955b4efa2ec 100644 --- a/llvm/test/CodeGen/DirectX/sqrt.ll +++ b/llvm/test/CodeGen/DirectX/sqrt.ll @@ -1,20 +1,39 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for sqrt are generated for float and half. -define noundef float @sqrt_float(float noundef %a) #0 { +define noundef float @sqrt_float(float noundef %a) { entry: ; CHECK:call float @dx.op.unary.f32(i32 24, float %{{.*}}) %elt.sqrt = call float @llvm.sqrt.f32(float %a) ret float %elt.sqrt } -define noundef half @sqrt_half(half noundef %a) #0 { +define noundef half @sqrt_half(half noundef %a) { entry: ; CHECK:call half @dx.op.unary.f16(i32 24, half %{{.*}}) %elt.sqrt = call half @llvm.sqrt.f16(half %a) ret half %elt.sqrt } +define noundef <4 x float> @sqrt_float4(<4 x float> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.sqrt.f16(half) declare float @llvm.sqrt.f32(float) +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/tan.ll b/llvm/test/CodeGen/DirectX/tan.ll index 6f7beb592339a..cf6965a95c04e 100644 --- a/llvm/test/CodeGen/DirectX/tan.ll +++ b/llvm/test/CodeGen/DirectX/tan.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for tan are generated for float and half. @@ -16,5 +16,24 @@ entry: ret half %elt.tan } +define noundef <4 x float> @tan_float4(<4 x float> noundef %a) #0 { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.tan.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.tan.f16(half) declare float @llvm.tan.f32(float) +declare <4 x float> @llvm.tan.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/tanh.ll b/llvm/test/CodeGen/DirectX/tanh.ll index e6642d9a74c8a..54ec6f29fa0c3 100644 --- a/llvm/test/CodeGen/DirectX/tanh.ll +++ b/llvm/test/CodeGen/DirectX/tanh.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for tanh are generated for float and half. @@ -16,5 +16,24 @@ entry: ret half %elt.tanh } +define noundef <4 x float> @tanh_float4(<4 x float> noundef %a) #0 { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.tanh.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.tanh.f16(half) declare float @llvm.tanh.f32(float) +declare <4 x float> @llvm.tanh.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/trunc.ll b/llvm/test/CodeGen/DirectX/trunc.ll index f00b737da4dbb..6d9c222595c44 100644 --- a/llvm/test/CodeGen/DirectX/trunc.ll +++ b/llvm/test/CodeGen/DirectX/trunc.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for trunc are generated for float and half. @@ -16,5 +16,24 @@ entry: ret half %elt.trunc } +define noundef <4 x float> @trunc_float4(<4 x float> noundef %a) #0 { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.trunc.f16(half) declare float @llvm.trunc.f32(float) +declare <4 x float> @llvm.trunc.v4f32(<4 x float>) diff --git a/llvm/tools/opt/optdriver.cpp b/llvm/tools/opt/optdriver.cpp index 1bdfa71830ba2..c5bc7b43e0331 100644 --- a/llvm/tools/opt/optdriver.cpp +++ b/llvm/tools/opt/optdriver.cpp @@ -375,6 +375,7 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) { "fix-irreducible", "expand-large-fp-convert", "callbrprepare", + "scalarizer", }; for (const auto &P : PassNamePrefix) if (Pass.starts_with(P)) From 070ee3688125e92d150707af0341ac6df47871d8 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Fri, 6 Sep 2024 13:42:24 -0400 Subject: [PATCH 3/3] address pr comments --- .../Target/DirectX/DirectXTargetMachine.cpp | 1 - llvm/test/CodeGen/DirectX/llc-pipeline.ll | 25 +++++++++++ .../DirectX/scalarization_pass_order.ll | 45 ------------------- .../CodeGen/DirectX/scalarize-two-calls.ll | 25 +++++++++++ llvm/test/CodeGen/DirectX/sin.ll | 2 +- 5 files changed, 51 insertions(+), 47 deletions(-) create mode 100644 llvm/test/CodeGen/DirectX/llc-pipeline.ll delete mode 100644 llvm/test/CodeGen/DirectX/scalarization_pass_order.ll create mode 100644 llvm/test/CodeGen/DirectX/scalarize-two-calls.ll diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp index f021e24ac7e26..606022a9835f0 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -87,7 +87,6 @@ class DirectXPassConfig : public TargetPassConfig { void addCodeGenPrepare() override { addPass(createDXILIntrinsicExpansionLegacyPass()); ScalarizerPassOptions DxilScalarOptions; - // The only non-default option we need to set is ScalarizeLoadStore. DxilScalarOptions.ScalarizeLoadStore = true; addPass(createScalarizerPass(DxilScalarOptions)); addPass(createDXILOpLoweringLegacyPass()); diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll new file mode 100644 index 0000000000000..36610bef719bf --- /dev/null +++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll @@ -0,0 +1,25 @@ +; RUN: llc -mtriple=dxil-pc-shadermodel6.3-library -debug-pass=Structure < %s -o /dev/null 2>&1 | \ +; RUN: grep -v "Verify generated machine code" | FileCheck %s + +; REQUIRES: asserts + +; CHECK-LABEL: Pass Arguments: +; CHECK-NEXT: Target Library Information +; CHECK-NEXT: ModulePass Manager +; CHECK-NEXT: DXIL Intrinsic Expansion +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Scalarize vector operations +; CHECK-NEXT: DXIL Intrinsic Expansion +; CHECK-NEXT: DXIL Resource analysis +; CHECK-NEXT: DXIL Op Lowering +; CHECK-NEXT: DXIL Finalize Linkage +; CHECK-NEXT: DXIL Resource analysis +; CHECK-NEXT: DXIL resource Information +; CHECK-NEXT: DXIL Shader Flag Analysis +; CHECK-NEXT: DXIL Translate Metadata +; CHECK-NEXT: DXIL Prepare Module +; CHECK-NEXT: DXIL Resource analysis +; CHECK-NEXT: DXIL Metadata Pretty Printer +; CHECK-NEXT: Print Module IR + diff --git a/llvm/test/CodeGen/DirectX/scalarization_pass_order.ll b/llvm/test/CodeGen/DirectX/scalarization_pass_order.ll deleted file mode 100644 index f33c2a7ccdcef..0000000000000 --- a/llvm/test/CodeGen/DirectX/scalarization_pass_order.ll +++ /dev/null @@ -1,45 +0,0 @@ -; RUN: llc -mtriple=dxil-pc-shadermodel6.3-library -debug-pass=Structure < %s -o /dev/null 2>&1 | \ -; RUN: grep -v "Verify generated machine code" | FileCheck %s -; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s --check-prefixes=CHECKIR -; CHECK-LABEL: Pass Arguments: -; CHECK-NEXT: Target Library Information -; CHECK-NEXT: ModulePass Manager -; CHECK-NEXT: DXIL Intrinsic Expansion -; CHECK-NEXT: FunctionPass Manager -; CHECK-NEXT: Dominator Tree Construction -; CHECK-NEXT: Scalarize vector operations -; CHECK-NEXT: DXIL Intrinsic Expansion -; CHECK-NEXT: DXIL Resource analysis -; CHECK-NEXT: DXIL Op Lowering -; CHECK-NEXT: DXIL Finalize Linkage -; CHECK-NEXT: DXIL Resource analysis -; CHECK-NEXT: DXIL resource Information -; CHECK-NEXT: DXIL Shader Flag Analysis -; CHECK-NEXT: DXIL Translate Metadata -; CHECK-NEXT: DXIL Prepare Module -; CHECK-NEXT: DXIL Resource analysis -; CHECK-NEXT: DXIL Metadata Pretty Printer -; CHECK-NEXT: Print Module IR -; CHECKIR: target triple = "dxilv1.3-pc-shadermodel6.3-library" -; CHECKIR-LABEL: cos_sin_float_test -define noundef <4 x float> @cos_sin_float_test(<4 x float> noundef %a) { - ; CHECKIR: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 - ; CHECKIR: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee0]]) - ; CHECKIR: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 - ; CHECKIR: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee1]]) - ; CHECKIR: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 - ; CHECKIR: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee2]]) - ; CHECKIR: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 - ; CHECKIR: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee3]]) - ; CHECKIR: [[ie4:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie0]]) - ; CHECKIR: [[ie5:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie1]]) - ; CHECKIR: [[ie6:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie2]]) - ; CHECKIR: [[ie7:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie3]]) - ; CHECKIR: insertelement <4 x float> poison, float [[ie4]], i64 0 - ; CHECKIR: insertelement <4 x float> %{{.*}}, float [[ie5]], i64 1 - ; CHECKIR: insertelement <4 x float> %{{.*}}, float [[ie6]], i64 2 - ; CHECKIR: insertelement <4 x float> %{{.*}}, float [[ie7]], i64 3 - %2 = tail call <4 x float> @llvm.sin.v4f32(<4 x float> %a) - %3 = tail call <4 x float> @llvm.cos.v4f32(<4 x float> %2) - ret <4 x float> %3 -} diff --git a/llvm/test/CodeGen/DirectX/scalarize-two-calls.ll b/llvm/test/CodeGen/DirectX/scalarize-two-calls.ll new file mode 100644 index 0000000000000..a14c1de5cc420 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/scalarize-two-calls.ll @@ -0,0 +1,25 @@ +; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s + +; CHECK: target triple = "dxilv1.3-pc-shadermodel6.3-library" +; CHECK-LABEL: cos_sin_float_test +define noundef <4 x float> @cos_sin_float_test(<4 x float> noundef %a) { + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee3]]) + ; CHECK: [[ie4:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie0]]) + ; CHECK: [[ie5:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie1]]) + ; CHECK: [[ie6:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie2]]) + ; CHECK: [[ie7:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie4]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie5]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie6]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie7]], i64 3 + %2 = tail call <4 x float> @llvm.sin.v4f32(<4 x float> %a) + %3 = tail call <4 x float> @llvm.cos.v4f32(<4 x float> %2) + ret <4 x float> %3 +} diff --git a/llvm/test/CodeGen/DirectX/sin.ll b/llvm/test/CodeGen/DirectX/sin.ll index a0b0d2675e3b7..ac6b217be80e7 100644 --- a/llvm/test/CodeGen/DirectX/sin.ll +++ b/llvm/test/CodeGen/DirectX/sin.ll @@ -36,4 +36,4 @@ entry: declare half @llvm.sin.f16(half) declare float @llvm.sin.f32(float) -declare <4 x float> @llvm.sin.v4f32(<4 x float>) \ No newline at end of file +declare <4 x float> @llvm.sin.v4f32(<4 x float>)