@@ -343,6 +343,12 @@ static cl::opt<bool> EnableStructurizerWorkarounds(
343
343
cl::desc (" Enable workarounds for the StructurizeCFG pass" ), cl::init(true ),
344
344
cl::Hidden);
345
345
346
+ static cl::opt<bool >
347
+ EnableSwLowerLDS (" amdgpu-enable-sw-lower-lds" ,
348
+ cl::desc (" Enable lowering of lds to global memory pass "
349
+ " and asan instrument resulting IR." ),
350
+ cl::init(true ), cl::Hidden);
351
+
346
352
static cl::opt<bool , true > EnableLowerModuleLDS (
347
353
" amdgpu-enable-lower-module-lds" , cl::desc(" Enable lower module lds pass" ),
348
354
cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true ),
@@ -765,6 +771,8 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
765
771
// We want to support the -lto-partitions=N option as "best effort".
766
772
// For that, we need to lower LDS earlier in the pipeline before the
767
773
// module is partitioned for codegen.
774
+ if (EnableSwLowerLDS)
775
+ PM.addPass (AMDGPUSwLowerLDSPass (*this ));
768
776
if (EnableLowerModuleLDS)
769
777
PM.addPass (AMDGPULowerModuleLDSPass (*this ));
770
778
if (EnableAMDGPUAttributor && Level != OptimizationLevel::O0)
@@ -1071,6 +1079,10 @@ void AMDGPUPassConfig::addIRPasses() {
1071
1079
// Replace OpenCL enqueued block function pointers with global variables.
1072
1080
addPass (createAMDGPUOpenCLEnqueuedBlockLoweringPass ());
1073
1081
1082
+ // Lower LDS accesses to global memory pass if address sanitizer is enabled.
1083
+ if (EnableSwLowerLDS)
1084
+ addPass (createAMDGPUSwLowerLDSLegacyPass (&TM));
1085
+
1074
1086
// Runs before PromoteAlloca so the latter can account for function uses
1075
1087
if (EnableLowerModuleLDS) {
1076
1088
addPass (createAMDGPULowerModuleLDSLegacyPass (&TM));
0 commit comments