diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index c47bf377f0ad5..2793f6bc30341 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4106,6 +4106,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (!Args.hasFlag(options::OPT_fsycl_std_optimizations, options::OPT_fno_sycl_std_optimizations, true)) CmdArgs.push_back("-fno-sycl-std-optimizations"); + else if (RawTriple.isSPIR()) { + // Set `sycl-opt` option to configure LLVM passes for SPIR target + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-sycl-opt"); + } // Pass the triple of host when doing SYCL auto AuxT = llvm::Triple(llvm::sys::getProcessTriple()); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 0062532e25dad..bd67026106d07 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -68,6 +68,10 @@ static cl::opt RunLoopRerolling("reroll-loops", cl::Hidden, cl::desc("Run the loop rerolling pass")); +static cl::opt + SYCLOptimizationMode("sycl-opt", cl::init(false), cl::Hidden, + cl::desc("Enable SYCL optimization mode.")); + static cl::opt RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass")); @@ -429,7 +433,12 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createCFGSimplificationPass()); MPM.add(createInstructionCombiningPass()); // We resume loop passes creating a second loop pipeline here. - MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + // TODO: this pass hurts performance due to promotions of induction variables + // from 32-bit value to 64-bit values. I assume it's because SPIR is a virtual + // target with unlimited # of registers and pass doesn't take into account + // that on real HW this promotion is not beneficial. + if (!SYCLOptimizationMode) + MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. addExtensionsToPM(EP_LateLoopOptimizations, MPM); MPM.add(createLoopDeletionPass()); // Delete dead loops