Disable loop pass pipeline in SYCL optimization mode.

bader · bader · commit 0d7abb7054f5 · 2020-09-11T12:07:55.000+03:00
This change seems to hide issues with broadcast tests on CPU.
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -411,47 +411,50 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
   MPM.add(createCFGSimplificationPass());      // Merge & remove BBs
   MPM.add(createReassociatePass());           // Reassociate expressions
 
-  // Begin the loop pass pipeline.
-  if (EnableSimpleLoopUnswitch) {
-    // The simple loop unswitch pass relies on separate cleanup passes. Schedule
-    // them first so when we re-process a loop they run before other loop
-    // passes.
-    MPM.add(createLoopInstSimplifyPass());
-    MPM.add(createLoopSimplifyCFGPass());
-  }
-  // Rotate Loop - disable header duplication at -Oz
-  MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
-  // TODO: Investigate promotion cap for O1.
-  MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
-  if (EnableSimpleLoopUnswitch)
-    MPM.add(createSimpleLoopUnswitchLegacyPass());
-  else
-    MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
-  // FIXME: We break the loop pass pipeline here in order to do full
-  // simplify-cfg. Eventually loop-simplifycfg should be enhanced to replace the
-  // need for this.
-  MPM.add(createCFGSimplificationPass());
-  MPM.add(createInstructionCombiningPass());
-  // We resume loop passes creating a second loop pipeline here.
-  // TODO: this pass hurts performance due to promotions of induction variables
-  // from 32-bit value to 64-bit values. I assume it's because SPIR is a virtual
-  // target with unlimited # of registers and pass doesn't take into account
-  // that on real HW this promotion is not beneficial.
-  if (!SYCLOptimizationMode)
-    MPM.add(createIndVarSimplifyPass());      // Canonicalize indvars
-  MPM.add(createLoopIdiomPass());             // Recognize idioms like memset.
-  addExtensionsToPM(EP_LateLoopOptimizations, MPM);
-  MPM.add(createLoopDeletionPass());          // Delete dead loops
-
-  if (EnableLoopInterchange)
-    MPM.add(createLoopInterchangePass()); // Interchange loops
+  // Do not run loop pass pipeline in "SYCL Optimization Mode". Loop
+  // optimizations rely on TTI, which is not accurate for SPIR target.
+  if (!SYCLOptimizationMode) {
+    // Begin the loop pass pipeline.
+    if (EnableSimpleLoopUnswitch) {
+      // The simple loop unswitch pass relies on separate cleanup passes.
+      // Schedule them first so when we re-process a loop they run before other
+      // loop passes.
+      MPM.add(createLoopInstSimplifyPass());
+      MPM.add(createLoopSimplifyCFGPass());
+    }
+    // Rotate Loop - disable header duplication at -Oz
+    MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
+    // TODO: Investigate promotion cap for O1.
+    MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+    if (EnableSimpleLoopUnswitch)
+      MPM.add(createSimpleLoopUnswitchLegacyPass());
+    else
+      MPM.add(
+          createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
+    // FIXME: We break the loop pass pipeline here in order to do full
+    // simplify-cfg. Eventually loop-simplifycfg should be enhanced to replace
+    // the need for this.
+    MPM.add(createCFGSimplificationPass());
+    MPM.add(createInstructionCombiningPass());
+    // We resume loop passes creating a second loop pipeline here.
+    // TODO: this pass hurts performance due to promotions of induction
+    // variables from 32-bit value to 64-bit values. I assume it's because SPIR
+    // is a virtual target with unlimited # of registers and pass doesn't take
+    // into account that on real HW this promotion is not beneficial.
+    MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
+    MPM.add(createLoopIdiomPass());      // Recognize idioms like memset.
+    addExtensionsToPM(EP_LateLoopOptimizations, MPM);
+    MPM.add(createLoopDeletionPass()); // Delete dead loops
+
+    if (EnableLoopInterchange)
+      MPM.add(createLoopInterchangePass()); // Interchange loops
 
-  // Unroll small loops
-  if (!SYCLOptimizationMode) // TODO: disable the whole loop pass pipeline?
+    // Unroll small loops
     MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
                                        ForgetAllSCEVInLoopUnroll));
-  addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
-  // This ends the loop pass pipelines.
+    addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
+    // This ends the loop pass pipelines.
+  }
 
   if (OptLevel > 1) {
     MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
diff --git a/sycl/test/sub_group/broadcast.cpp b/sycl/test/sub_group/broadcast.cpp
@@ -1,4 +1,3 @@
-// XFAIL: cpu
 // UNSUPPORTED: cuda
 // CUDA compilation and runtime do not yet support sub-groups.
 
diff --git a/sycl/test/sub_group/broadcast_fp64.cpp b/sycl/test/sub_group/broadcast_fp64.cpp
@@ -1,4 +1,3 @@
-// XFAIL: cpu
 // UNSUPPORTED: cuda
 // CUDA compilation and runtime do not yet support sub-groups.
 

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-// XFAIL: cpu`
`2`	`1`	`// UNSUPPORTED: cuda`
`3`	`2`	`// CUDA compilation and runtime do not yet support sub-groups.`
`4`	`3`