Skip to content

Conversation

@shiltian
Copy link
Contributor

@shiltian shiltian commented Jul 2, 2025

This patch introduces AAAMDGPUUniformArgument that can infer inreg function
argument attribute. The idea is, for a function argument, if the corresponding
call site arguments are always uniform, we can mark it as inreg thus pass it
via SGPR.

In addition, this AA is also able to propagate the inreg attribute if feasible.

Copy link
Contributor Author

shiltian commented Jul 2, 2025

This stack of pull requests is managed by Graphite. Learn more about stacking.

@llvmbot
Copy link
Member

llvmbot commented Jul 2, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Shilei Tian (shiltian)

Changes

This patch introduces AAAMDGPUUniformArgument that can infer inreg function
argument attribute. The idea is, for a function argument, if the corresponding
call site arguments are always uniform, we can mark it as inreg thus pass it
via SGPR.

In addition, this AA is also able to propagate the inreg attribute if feasible.


Full diff: https://github.com/llvm/llvm-project/pull/146720.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+115-1)
  • (added) llvm/test/CodeGen/AMDGPU/aa-inreg-inference.ll (+74)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index fef22c81c9391..fa54c80490602 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -14,6 +14,7 @@
 #include "GCNSubtarget.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/Analysis/CycleAnalysis.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/IntrinsicsR600.h"
@@ -1295,6 +1296,114 @@ struct AAAMDGPUNoAGPR
 
 const char AAAMDGPUNoAGPR::ID = 0;
 
+struct AAAMDGPUUniform : public StateWrapper<BooleanState, AbstractAttribute> {
+  using Base = StateWrapper<BooleanState, AbstractAttribute>;
+  AAAMDGPUUniform(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+
+  /// Create an abstract attribute view for the position \p IRP.
+  static AAAMDGPUUniform &createForPosition(const IRPosition &IRP,
+                                            Attributor &A);
+
+  /// See AbstractAttribute::getName()
+  StringRef getName() const override { return "AAAMDGPUUniform"; }
+
+  const std::string getAsStr(Attributor *A) const override {
+    return getAssumed() ? "uniform" : "divergent";
+  }
+
+  void trackStatistics() const override {}
+
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is
+  /// AAAMDGPUUniform
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
+  /// Unique ID (due to the unique address)
+  static const char ID;
+};
+
+const char AAAMDGPUUniform::ID = 0;
+
+/// This AA is to infer the inreg attribute for a function argument.
+struct AAAMDGPUUniformArgument : public AAAMDGPUUniform {
+  AAAMDGPUUniformArgument(const IRPosition &IRP, Attributor &A)
+      : AAAMDGPUUniform(IRP, A) {}
+
+  void initialize(Attributor &A) override {
+    Argument *Arg = getAssociatedArgument();
+    CallingConv::ID CC = Arg->getParent()->getCallingConv();
+    if (Arg->hasAttribute(Attribute::InReg)) {
+      indicateOptimisticFixpoint();
+      return;
+    }
+
+    if (AMDGPU::isEntryFunctionCC(CC)) {
+      // We only use isArgPassedInSGPR on kernel entry function argument, so
+      // even if we will use SPGR for non-uniform i1 argument passing, it will
+      // not affect this.
+      if (AMDGPU::isArgPassedInSGPR(Arg))
+        indicateOptimisticFixpoint();
+      else
+        indicatePessimisticFixpoint();
+    }
+  }
+
+  ChangeStatus updateImpl(Attributor &A) override {
+    unsigned ArgNo = getAssociatedArgument()->getArgNo();
+    TargetMachine &TM =
+        static_cast<AMDGPUInformationCache &>(A.getInfoCache()).TM;
+
+    auto isUniform = [&](AbstractCallSite ACS) -> bool {
+      CallBase *CB = ACS.getInstruction();
+      Value *V = CB->getArgOperand(ArgNo);
+      if (auto *Arg = dyn_cast<Argument>(V)) {
+        auto *AA = A.getOrCreateAAFor<AAAMDGPUUniform>(
+            IRPosition::argument(*Arg), this, DepClassTy::REQUIRED);
+        return AA && AA->isValidState();
+      }
+      TargetTransformInfo TTI = TM.getTargetTransformInfo(*CB->getFunction());
+      return TTI.isAlwaysUniform(V);
+    };
+
+    bool UsedAssumedInformation = true;
+    if (!A.checkForAllCallSites(isUniform, *this, /*RequireAllCallSites=*/true,
+                                UsedAssumedInformation))
+      return indicatePessimisticFixpoint();
+
+    if (!UsedAssumedInformation)
+      return indicateOptimisticFixpoint();
+
+    return ChangeStatus::UNCHANGED;
+  }
+
+  ChangeStatus manifest(Attributor &A) override {
+    Argument *Arg = getAssociatedArgument();
+    // If the argument already has inreg attribute, we will not do anything
+    // about it.
+    if (Arg->hasAttribute(Attribute::InReg))
+      return ChangeStatus::UNCHANGED;
+    if (AMDGPU::isEntryFunctionCC(Arg->getParent()->getCallingConv()))
+      return ChangeStatus::UNCHANGED;
+    LLVMContext &Ctx = Arg->getContext();
+    return A.manifestAttrs(getIRPosition(),
+                           {Attribute::get(Ctx, Attribute::InReg)});
+  }
+};
+
+AAAMDGPUUniform &AAAMDGPUUniform::createForPosition(const IRPosition &IRP,
+                                                    Attributor &A) {
+  switch (IRP.getPositionKind()) {
+  case IRPosition::IRP_ARGUMENT:
+    return *new (A.Allocator) AAAMDGPUUniformArgument(IRP, A);
+  default:
+    llvm_unreachable("not a valid position for AAAMDGPUUniform");
+  }
+}
+
 /// Performs the final check and updates the 'amdgpu-waves-per-eu' attribute
 /// based on the finalized 'amdgpu-flat-work-group-size' attribute.
 /// Both attributes start with narrow ranges that expand during iteration.
@@ -1381,7 +1490,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
        &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
        &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
        &AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
-       &AAInstanceInfo::ID});
+       &AAInstanceInfo::ID, &AAAMDGPUUniform::ID});
 
   AttributorConfig AC(CGUpdater);
   AC.IsClosedWorldModule = Options.IsClosedWorld;
@@ -1433,6 +1542,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
         A.getOrCreateAAFor<AAAddressSpace>(
             IRPosition::value(*CmpX->getPointerOperand()));
       }
+
+      if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
+        for (auto &Arg : F->args())
+          A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(Arg));
+      }
     }
   }
 
diff --git a/llvm/test/CodeGen/AMDGPU/aa-inreg-inference.ll b/llvm/test/CodeGen/AMDGPU/aa-inreg-inference.ll
new file mode 100644
index 0000000000000..b0d8d3a15605e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/aa-inreg-inference.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s -o - | FileCheck %s
+
+@g1 = protected addrspace(1) externally_initialized global i32 0, align 4
+@g2 = protected addrspace(1) externally_initialized global i32 0, align 4
+@g3 = protected addrspace(1) externally_initialized global i32 0, align 4
+@g4 = protected addrspace(1) externally_initialized global i32 0, align 4
+
+define internal void @callee_with_always_uniform_argument(ptr addrspace(1) %x, i32 %y) {
+; CHECK-LABEL: define internal void @callee_with_always_uniform_argument(
+; CHECK-SAME: ptr addrspace(1) inreg [[X:%.*]], i32 inreg [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[X_VAL:%.*]] = load i32, ptr addrspace(1) [[X]], align 4
+; CHECK-NEXT:    store i32 [[X_VAL]], ptr addrspace(1) @g3, align 4
+; CHECK-NEXT:    store i32 [[Y]], ptr addrspace(1) @g4, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %x.val = load i32, ptr addrspace(1) %x, align 4
+  store i32 %x.val, ptr addrspace(1) @g3, align 4
+  store i32 %y, ptr addrspace(1) @g4, align 4
+  ret void
+}
+
+define amdgpu_kernel void @kernel_with_readfirstlane(ptr addrspace(1) %p, i32 %x) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_with_readfirstlane(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[P0:%.*]] = call ptr addrspace(1) @llvm.amdgcn.readfirstlane.p1(ptr addrspace(1) [[P]])
+; CHECK-NEXT:    call void @callee_with_always_uniform_argument(ptr addrspace(1) [[P0]], i32 [[X]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %p0 = call ptr addrspace(1) @llvm.amdgcn.readfirstlane.p1(ptr addrspace(1) %p)
+  call void @callee_with_always_uniform_argument(ptr addrspace(1) %p0, i32 %x)
+  ret void
+}
+
+define internal void @callee_without_always_uniform_argument(ptr addrspace(1) %x, i32 %y) {
+; CHECK-LABEL: define internal void @callee_without_always_uniform_argument(
+; CHECK-SAME: ptr addrspace(1) [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[X_VAL:%.*]] = load i32, ptr addrspace(1) [[X]], align 4
+; CHECK-NEXT:    store i32 [[X_VAL]], ptr addrspace(1) @g3, align 4
+; CHECK-NEXT:    store i32 [[Y]], ptr addrspace(1) @g4, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %x.val = load i32, ptr addrspace(1) %x, align 4
+  store i32 %x.val, ptr addrspace(1) @g3, align 4
+  store i32 %y, ptr addrspace(1) @g4, align 4
+  ret void
+}
+
+define amdgpu_kernel void @kernel_without_divergent_callsite_argument(ptr addrspace(1) %p, i32 %x) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_without_divergent_callsite_argument(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[P]], i32 [[ID_X]]
+; CHECK-NEXT:    [[D:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4
+; CHECK-NEXT:    call void @callee_without_always_uniform_argument(ptr addrspace(1) [[GEP]], i32 [[D]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+  %gep = getelementptr i32, ptr addrspace(1) %p, i32 %id.x
+  %d = load i32, ptr addrspace(1) %gep
+  call void @callee_without_always_uniform_argument(ptr addrspace(1) %gep, i32 %d)
+  ret void
+}
+
+declare ptr addrspace(1) @llvm.amdgcn.readfirstlane.p1(ptr addrspace(1))
+declare noundef i32 @llvm.amdgcn.workitem.id.x()

@shiltian
Copy link
Contributor Author

shiltian commented Jul 2, 2025

This is a trivial version of #101609, as requested by @arsenm.

@shiltian shiltian force-pushed the users/shiltian/trivial-aa-uniform-argument branch from 8dd7b01 to a3a10e9 Compare July 2, 2025 15:17
@shiltian
Copy link
Contributor Author

shiltian commented Jul 7, 2025

bump

@shiltian
Copy link
Contributor Author

@arsenm ping

@shiltian shiltian force-pushed the users/shiltian/trivial-aa-uniform-argument branch from a3a10e9 to 4aabc52 Compare July 21, 2025 14:57
@shiltian shiltian force-pushed the users/shiltian/trivial-aa-uniform-argument branch 2 times, most recently from 8533188 to 5c96d70 Compare July 21, 2025 18:59
@shiltian shiltian requested a review from arsenm July 23, 2025 15:49
@shiltian shiltian force-pushed the users/shiltian/trivial-aa-uniform-argument branch from 5c96d70 to 836e590 Compare July 28, 2025 15:22
@shiltian
Copy link
Contributor Author

shiltian commented Aug 1, 2025

ping

This patch introduces `AAAMDGPUUniformArgument` that can infer `inreg` function
argument attribute. The idea is, for a function argument, if the corresponding
call site arguments are always uniform, we can mark it as `inreg` thus pass it
via SGPR.

In addition, this AA is also able to propagate the inreg attribute if feasible.
@shiltian shiltian force-pushed the users/shiltian/trivial-aa-uniform-argument branch from 836e590 to 3b36e24 Compare August 18, 2025 22:05
@shiltian shiltian merged commit 84ab301 into main Aug 19, 2025
9 checks passed
@shiltian shiltian deleted the users/shiltian/trivial-aa-uniform-argument branch August 19, 2025 02:01
@llvm-ci
Copy link
Collaborator

llvm-ci commented Aug 19, 2025

LLVM Buildbot has detected a new failure on builder clang-hip-vega20 running on hip-vega20-0 while building llvm at step 3 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/123/builds/25473

Here is the relevant piece of the build log for the reference
Step 3 (annotate) failure: '../llvm-zorg/zorg/buildbot/builders/annotated/hip-build.sh --jobs=' (failure)
...
[59/61] Linking CXX executable External/HIP/math_h-hip-6.3.0
[60/61] Building CXX object External/HIP/CMakeFiles/TheNextWeek-hip-6.3.0.dir/workload/ray-tracing/TheNextWeek/main.cc.o
[61/61] Linking CXX executable External/HIP/TheNextWeek-hip-6.3.0
+ build_step 'Testing HIP test-suite'
+ echo '@@@BUILD_STEP Testing HIP test-suite@@@'
+ ninja check-hip-simple
@@@BUILD_STEP Testing HIP test-suite@@@
[0/1] cd /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP && /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/bin/llvm-lit -sv array-hip-6.3.0.test empty-hip-6.3.0.test with-fopenmp-hip-6.3.0.test saxpy-hip-6.3.0.test memmove-hip-6.3.0.test memset-hip-6.3.0.test split-kernel-args-hip-6.3.0.test builtin-logb-scalbn-hip-6.3.0.test TheNextWeek-hip-6.3.0.test algorithm-hip-6.3.0.test cmath-hip-6.3.0.test complex-hip-6.3.0.test math_h-hip-6.3.0.test new-hip-6.3.0.test blender.test
-- Testing: 15 tests, 15 workers --
Testing:  0.. 10.. 20.. 30.. 40.. 50.. 60.. 70.. 
FAIL: test-suite :: External/HIP/TheNextWeek-hip-6.3.0.test (13 of 15)
******************** TEST 'test-suite :: External/HIP/TheNextWeek-hip-6.3.0.test' FAILED ********************

/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/tools/timeit-target --timeout 7200 --limit-core 0 --limit-cpu 7200 --limit-file-size 209715200 --limit-rss-size 838860800 --append-exitstatus --redirect-output /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP/Output/TheNextWeek-hip-6.3.0.test.out --redirect-input /dev/null --summary /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP/Output/TheNextWeek-hip-6.3.0.test.time /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP/TheNextWeek-hip-6.3.0
cd /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP ; /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/tools/fpcmp-target /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP/Output/TheNextWeek-hip-6.3.0.test.out TheNextWeek.reference_output-hip-6.3.0

+ cd /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP
+ /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/tools/fpcmp-target /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP/Output/TheNextWeek-hip-6.3.0.test.out TheNextWeek.reference_output-hip-6.3.0
/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/tools/fpcmp-target: Comparison failed, textual difference between 'M' and 'i'

Input 1:
Running quads
image width = 400 height = 400
block size = (16, 16) grid size = (25, 25)
Start rendering by GPU.
Done.
quads_gpu.ppm and quads_ref.ppm are the same.
Running earth
image width = 400 height = 225
block size = (16, 16) grid size = (25, 15)
Start rendering by GPU.
Done.
earth_gpu.ppm and earth_ref.ppm are the same.
Running two_spheres
image width = 400 height = 225
block size = (16, 16) grid size = (25, 15)
Start rendering by GPU.
Done.
two_spheres_gpu.ppm and two_spheres_ref.ppm are the same.
Running two_perlin_spheres
image width = 400 height = 225
block size = (16, 16) grid size = (25, 15)
Start rendering by GPU.
Done.
two_perlin_spheres_gpu.ppm and two_perlin_spheres_ref.ppm are the same.
Running simple_light
image width = 400 height = 225
block size = (16, 16) grid size = (25, 15)
Start rendering by GPU.
Step 12 (Testing HIP test-suite) failure: Testing HIP test-suite (failure)
@@@BUILD_STEP Testing HIP test-suite@@@
[0/1] cd /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP && /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/bin/llvm-lit -sv array-hip-6.3.0.test empty-hip-6.3.0.test with-fopenmp-hip-6.3.0.test saxpy-hip-6.3.0.test memmove-hip-6.3.0.test memset-hip-6.3.0.test split-kernel-args-hip-6.3.0.test builtin-logb-scalbn-hip-6.3.0.test TheNextWeek-hip-6.3.0.test algorithm-hip-6.3.0.test cmath-hip-6.3.0.test complex-hip-6.3.0.test math_h-hip-6.3.0.test new-hip-6.3.0.test blender.test
-- Testing: 15 tests, 15 workers --
Testing:  0.. 10.. 20.. 30.. 40.. 50.. 60.. 70.. 
FAIL: test-suite :: External/HIP/TheNextWeek-hip-6.3.0.test (13 of 15)
******************** TEST 'test-suite :: External/HIP/TheNextWeek-hip-6.3.0.test' FAILED ********************

/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/tools/timeit-target --timeout 7200 --limit-core 0 --limit-cpu 7200 --limit-file-size 209715200 --limit-rss-size 838860800 --append-exitstatus --redirect-output /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP/Output/TheNextWeek-hip-6.3.0.test.out --redirect-input /dev/null --summary /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP/Output/TheNextWeek-hip-6.3.0.test.time /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP/TheNextWeek-hip-6.3.0
cd /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP ; /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/tools/fpcmp-target /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP/Output/TheNextWeek-hip-6.3.0.test.out TheNextWeek.reference_output-hip-6.3.0

+ cd /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP
+ /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/tools/fpcmp-target /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP/Output/TheNextWeek-hip-6.3.0.test.out TheNextWeek.reference_output-hip-6.3.0
/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/tools/fpcmp-target: Comparison failed, textual difference between 'M' and 'i'

Input 1:
Running quads
image width = 400 height = 400
block size = (16, 16) grid size = (25, 25)
Start rendering by GPU.
Done.
quads_gpu.ppm and quads_ref.ppm are the same.
Running earth
image width = 400 height = 225
block size = (16, 16) grid size = (25, 15)
Start rendering by GPU.
Done.
earth_gpu.ppm and earth_ref.ppm are the same.
Running two_spheres
image width = 400 height = 225
block size = (16, 16) grid size = (25, 15)
Start rendering by GPU.
Done.
two_spheres_gpu.ppm and two_spheres_ref.ppm are the same.
Running two_perlin_spheres
image width = 400 height = 225
block size = (16, 16) grid size = (25, 15)
Start rendering by GPU.
Done.
two_perlin_spheres_gpu.ppm and two_perlin_spheres_ref.ppm are the same.
Running simple_light
image width = 400 height = 225
block size = (16, 16) grid size = (25, 15)
Start rendering by GPU.
Done.
simple_light_gpu.ppm and simple_light_ref.ppm are the same.
Running random_spheres
image width = 400 height = 225
block size = (16, 16) grid size = (25, 15)
Start rendering by GPU.
Done.

@llvm-ci
Copy link
Collaborator

llvm-ci commented Aug 19, 2025

LLVM Buildbot has detected a new failure on builder openmp-offload-amdgpu-runtime-2 running on rocm-worker-hw-02 while building llvm at step 10 "Add check check-libc-amdgcn-amd-amdhsa".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/10/builds/11685

Here is the relevant piece of the build log for the reference
Step 10 (Add check check-libc-amdgcn-amd-amdhsa) failure: test (failure)
...
[1832/3111] Building CXX object libc/test/src/math/smoke/CMakeFiles/libc.test.src.math.smoke.bfloat16_mul_test.__hermetic__.__build__.dir/bfloat16_mul_test.cpp.o
[1833/3111] Linking CXX executable libc/test/integration/startup/gpu/libc.test.integration.startup.gpu.startup_rpc_test.__build__
[1834/3111] Building CXX object libc/test/include/CMakeFiles/libc.test.include.complex_test.__hermetic__.__build__.dir/complex_test.cpp.o
[1835/3111] Linking CXX executable libc/test/integration/src/__support/GPU/libc.test.integration.src.__support.GPU.shuffle_test.__build__
[1836/3111] Building CXX object libc/test/src/stdlib/CMakeFiles/libc.test.src.stdlib.strtoint64_test.__hermetic__.__build__.dir/strtoint64_test.cpp.o
[1837/3111] Linking CXX executable libc/test/integration/src/stdlib/gpu/libc.test.integration.src.stdlib.gpu.aligned_alloc.__build__
[1838/3111] Linking CXX executable libc/test/integration/startup/gpu/libc.test.integration.startup.gpu.startup_rpc_stream_test.__build__
[1839/3111] Linking CXX executable libc/test/integration/src/__support/GPU/libc.test.integration.src.__support.GPU.scan_reduce_test.__build__
[1840/3111] Building CXX object libc/test/src/math/smoke/CMakeFiles/libc.test.src.math.smoke.bfloat16_add_test.__hermetic__.__build__.dir/bfloat16_add_test.cpp.o
[1841/3111] Running integration test libc.test.integration.startup.gpu.startup_args_test
FAILED: libc/test/integration/startup/gpu/CMakeFiles/libc.test.integration.startup.gpu.startup_args_test /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-amdgcn-amd-amdhsa-bins/libc/test/integration/startup/gpu/CMakeFiles/libc.test.integration.startup.gpu.startup_args_test 
cd /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-amdgcn-amd-amdhsa-bins/libc/test/integration/startup/gpu && FRANCE=Paris GERMANY=Berlin /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/bin/amdhsa-loader /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-amdgcn-amd-amdhsa-bins/libc/test/integration/startup/gpu/libc.test.integration.startup.gpu.startup_args_test.__build__ 1 2 3
Memory access fault by GPU node-2 (Agent handle: 0x63012b963dd0) on address 0x732200000000. Reason: Unknown.
 #0 0x0000732289dfae40 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/bin/../lib/libLLVMSupport.so.22.0git+0x1fae40)
 #1 0x0000732289df7aef llvm::sys::RunSignalHandlers() (/home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/bin/../lib/libLLVMSupport.so.22.0git+0x1f7aef)
 #2 0x0000732289df7c42 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x0000732289442520 (/lib/x86_64-linux-gnu/libc.so.6+0x42520)
 #4 0x00007322894969fc __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
 #5 0x00007322894969fc __pthread_kill_internal ./nptl/pthread_kill.c:78:10
 #6 0x00007322894969fc pthread_kill ./nptl/pthread_kill.c:89:10
 #7 0x0000732289442476 gsignal ./signal/../sysdeps/posix/raise.c:27:6
 #8 0x00007322894287f3 abort ./stdlib/abort.c:81:7
 #9 0x000073228a483a96 (/opt/rocm/lib/libhsa-runtime64.so.1+0x83a96)
#10 0x000073228a481d9c (/opt/rocm/lib/libhsa-runtime64.so.1+0x81d9c)
#11 0x000073228a430bab (/opt/rocm/lib/libhsa-runtime64.so.1+0x30bab)
#12 0x0000732289494ac3 start_thread ./nptl/pthread_create.c:442:8
#13 0x0000732289526850 ./misc/../sysdeps/unix/sysv/linux/x86_64/clone3.S:83:0
Aborted (core dumped)
[1842/3111] Linking CXX executable libc/test/integration/src/stdlib/gpu/libc.test.integration.src.stdlib.gpu.malloc.__build__
[1843/3111] Running integration test libc.test.integration.src.stdio.sprintf_size_test_no_sprintf
FAILED: libc/test/integration/src/stdio/CMakeFiles/libc.test.integration.src.stdio.sprintf_size_test_no_sprintf /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-amdgcn-amd-amdhsa-bins/libc/test/integration/src/stdio/CMakeFiles/libc.test.integration.src.stdio.sprintf_size_test_no_sprintf 
cd /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-amdgcn-amd-amdhsa-bins/libc/test/integration/src/stdio && /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/bin/amdhsa-loader /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-amdgcn-amd-amdhsa-bins/libc/test/integration/src/stdio/libc.test.integration.src.stdio.sprintf_size_test_no_sprintf.__build__ %s\ %c\ %d First\ arg a 0
Memory access fault by GPU node-2 (Agent handle: 0x5f121b412ec0) on address 0x7e0e00000000. Reason: Unknown.
 #0 0x00007e0e797fae40 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/bin/../lib/libLLVMSupport.so.22.0git+0x1fae40)
 #1 0x00007e0e797f7aef llvm::sys::RunSignalHandlers() (/home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/bin/../lib/libLLVMSupport.so.22.0git+0x1f7aef)
 #2 0x00007e0e797f7c42 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x00007e0e78e42520 (/lib/x86_64-linux-gnu/libc.so.6+0x42520)
 #4 0x00007e0e78e969fc __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
 #5 0x00007e0e78e969fc __pthread_kill_internal ./nptl/pthread_kill.c:78:10
 #6 0x00007e0e78e969fc pthread_kill ./nptl/pthread_kill.c:89:10
 #7 0x00007e0e78e42476 gsignal ./signal/../sysdeps/posix/raise.c:27:6
 #8 0x00007e0e78e287f3 abort ./stdlib/abort.c:81:7
 #9 0x00007e0e79e83a96 (/opt/rocm/lib/libhsa-runtime64.so.1+0x83a96)
#10 0x00007e0e79e81d9c (/opt/rocm/lib/libhsa-runtime64.so.1+0x81d9c)
#11 0x00007e0e79e30bab (/opt/rocm/lib/libhsa-runtime64.so.1+0x30bab)
#12 0x00007e0e78e94ac3 start_thread ./nptl/pthread_create.c:442:8
#13 0x00007e0e78f26850 ./misc/../sysdeps/unix/sysv/linux/x86_64/clone3.S:83:0
Aborted (core dumped)
[1844/3111] Running integration test libc.test.integration.startup.gpu.init_fini_array_test

@llvm-ci
Copy link
Collaborator

llvm-ci commented Aug 19, 2025

LLVM Buildbot has detected a new failure on builder hip-third-party-libs-test running on ext_buildbot_hw_05-hip-docker while building llvm at step 4 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/206/builds/4964

Here is the relevant piece of the build log for the reference
Step 4 (annotate) failure: '../llvm-zorg/zorg/buildbot/builders/annotated/hip-tpl.py --jobs=32' (failure)
...
-- Kokkos Backends: SERIAL;HIP
-- Configuring done
-- Generating done
-- Build files have been written to: /opt/botworker/llvm/llvm-test-suite/TS-build/External/HIP/TestKokkosHIP-prefix/src/TestKokkosHIP-build
[4/8] Performing build step for 'TestKokkosHIP'
[1/3] cd /opt/botworker/llvm/llvm-test-suite/TS-build/External/HIP/TestKokkosHIP-prefix/src/TestKokkosHIP-build && /usr/bin/cmake -DRUN_CHECK_GIT_VERSION=1 -DKOKKOS_SOURCE_DIR=/opt/botworker/llvm/llvm-test-suite/TS-build/External/HIP/TestKokkosHIP-prefix/src/TestKokkosHIP -P /opt/botworker/llvm/llvm-test-suite/TS-build/External/HIP/TestKokkosHIP-prefix/src/TestKokkosHIP/cmake/build_env_info.cmake
[5/8] No install step for 'TestKokkosHIP'
[6/8] No test step for 'TestKokkosHIP'
[7/8] Completed 'TestKokkosHIP'
[8/8] cd /opt/botworker/llvm/llvm-test-suite/TS-build/External/HIP/TestKokkosHIP-prefix/src/TestKokkosHIP-build && /usr/bin/cmake -E env GTEST_FILTER=-hip.atomics:hip.bit_manip_bit_ceil ctest
FAILED: External/HIP/CMakeFiles/test-kokkos /opt/botworker/llvm/llvm-test-suite/TS-build/External/HIP/CMakeFiles/test-kokkos 
cd /opt/botworker/llvm/llvm-test-suite/TS-build/External/HIP/TestKokkosHIP-prefix/src/TestKokkosHIP-build && /usr/bin/cmake -E env GTEST_FILTER=-hip.atomics:hip.bit_manip_bit_ceil ctest
Test project /opt/botworker/llvm/llvm-test-suite/TS-build/External/HIP/TestKokkosHIP-prefix/src/TestKokkosHIP-build
      Start  1: Kokkos_CoreUnitTest_Serial_ViewSupport
 1/54 Test  #1: Kokkos_CoreUnitTest_Serial_ViewSupport .....................   Passed    0.34 sec
      Start  2: Kokkos_CoreUnitTest_HIP_ViewSupport
 2/54 Test  #2: Kokkos_CoreUnitTest_HIP_ViewSupport ........................   Passed    0.42 sec
      Start  3: Kokkos_CoreUnitTest_Serial1
 3/54 Test  #3: Kokkos_CoreUnitTest_Serial1 ................................   Passed   14.79 sec
      Start  4: Kokkos_CoreUnitTest_Serial2
 4/54 Test  #4: Kokkos_CoreUnitTest_Serial2 ................................   Passed   17.78 sec
      Start  5: Kokkos_CoreUnitTest_HIP
 5/54 Test  #5: Kokkos_CoreUnitTest_HIP ....................................Subprocess aborted***Exception: 118.28 sec
      Start  6: Kokkos_CoreUnitTest_HIPInterOpInit
 6/54 Test  #6: Kokkos_CoreUnitTest_HIPInterOpInit .........................   Passed    0.36 sec
      Start  7: Kokkos_CoreUnitTest_HIPInterOpStreams
 7/54 Test  #7: Kokkos_CoreUnitTest_HIPInterOpStreams ......................   Passed    0.37 sec
      Start  8: Kokkos_CoreUnitTest_HIPInterOpGraph
 8/54 Test  #8: Kokkos_CoreUnitTest_HIPInterOpGraph ........................   Passed    0.37 sec
      Start  9: Kokkos_CoreUnitTest_Default
 9/54 Test  #9: Kokkos_CoreUnitTest_Default ................................   Passed    0.55 sec
      Start 10: Kokkos_CoreUnitTest_LegionInitialization
10/54 Test #10: Kokkos_CoreUnitTest_LegionInitialization ...................   Passed    0.42 sec
      Start 11: Kokkos_CoreUnitTest_PushFinalizeHook
11/54 Test #11: Kokkos_CoreUnitTest_PushFinalizeHook .......................   Passed    0.35 sec
      Start 12: Kokkos_CoreUnitTest_ScopeGuard
12/54 Test #12: Kokkos_CoreUnitTest_ScopeGuard .............................   Passed    2.38 sec
      Start 13: Kokkos_CoreUnitTest_Develop
13/54 Test #13: Kokkos_CoreUnitTest_Develop ................................   Passed    0.35 sec
      Start 14: Kokkos_CoreUnitTest_PushFinalizeHookTerminateRegex
14/54 Test #14: Kokkos_CoreUnitTest_PushFinalizeHookTerminateRegex .........   Passed    0.44 sec
      Start 15: Kokkos_CoreUnitTest_PushFinalizeHookTerminateFails
15/54 Test #15: Kokkos_CoreUnitTest_PushFinalizeHookTerminateFails .........   Passed    0.43 sec
      Start 16: Kokkos_CoreUnitTest_KokkosP
16/54 Test #16: Kokkos_CoreUnitTest_KokkosP ................................   Passed    0.36 sec
      Start 17: Kokkos_CoreUnitTest_ToolIndependence
17/54 Test #17: Kokkos_CoreUnitTest_ToolIndependence .......................   Passed    0.02 sec
      Start 18: Kokkos_ProfilingTestLibraryLoadHelp
18/54 Test #18: Kokkos_ProfilingTestLibraryLoadHelp ........................   Passed    0.34 sec
Step 11 (run kokkos test suite) failure: run kokkos test suite (failure)
...
-- Kokkos Backends: SERIAL;HIP
-- Configuring done
-- Generating done
-- Build files have been written to: /opt/botworker/llvm/llvm-test-suite/TS-build/External/HIP/TestKokkosHIP-prefix/src/TestKokkosHIP-build
[4/8] Performing build step for 'TestKokkosHIP'
[1/3] cd /opt/botworker/llvm/llvm-test-suite/TS-build/External/HIP/TestKokkosHIP-prefix/src/TestKokkosHIP-build && /usr/bin/cmake -DRUN_CHECK_GIT_VERSION=1 -DKOKKOS_SOURCE_DIR=/opt/botworker/llvm/llvm-test-suite/TS-build/External/HIP/TestKokkosHIP-prefix/src/TestKokkosHIP -P /opt/botworker/llvm/llvm-test-suite/TS-build/External/HIP/TestKokkosHIP-prefix/src/TestKokkosHIP/cmake/build_env_info.cmake
[5/8] No install step for 'TestKokkosHIP'
[6/8] No test step for 'TestKokkosHIP'
[7/8] Completed 'TestKokkosHIP'
[8/8] cd /opt/botworker/llvm/llvm-test-suite/TS-build/External/HIP/TestKokkosHIP-prefix/src/TestKokkosHIP-build && /usr/bin/cmake -E env GTEST_FILTER=-hip.atomics:hip.bit_manip_bit_ceil ctest
FAILED: External/HIP/CMakeFiles/test-kokkos /opt/botworker/llvm/llvm-test-suite/TS-build/External/HIP/CMakeFiles/test-kokkos 
cd /opt/botworker/llvm/llvm-test-suite/TS-build/External/HIP/TestKokkosHIP-prefix/src/TestKokkosHIP-build && /usr/bin/cmake -E env GTEST_FILTER=-hip.atomics:hip.bit_manip_bit_ceil ctest
Test project /opt/botworker/llvm/llvm-test-suite/TS-build/External/HIP/TestKokkosHIP-prefix/src/TestKokkosHIP-build
      Start  1: Kokkos_CoreUnitTest_Serial_ViewSupport
 1/54 Test  #1: Kokkos_CoreUnitTest_Serial_ViewSupport .....................   Passed    0.34 sec
      Start  2: Kokkos_CoreUnitTest_HIP_ViewSupport
 2/54 Test  #2: Kokkos_CoreUnitTest_HIP_ViewSupport ........................   Passed    0.42 sec
      Start  3: Kokkos_CoreUnitTest_Serial1
 3/54 Test  #3: Kokkos_CoreUnitTest_Serial1 ................................   Passed   14.79 sec
      Start  4: Kokkos_CoreUnitTest_Serial2
 4/54 Test  #4: Kokkos_CoreUnitTest_Serial2 ................................   Passed   17.78 sec
      Start  5: Kokkos_CoreUnitTest_HIP
 5/54 Test  #5: Kokkos_CoreUnitTest_HIP ....................................Subprocess aborted***Exception: 118.28 sec
      Start  6: Kokkos_CoreUnitTest_HIPInterOpInit
 6/54 Test  #6: Kokkos_CoreUnitTest_HIPInterOpInit .........................   Passed    0.36 sec
      Start  7: Kokkos_CoreUnitTest_HIPInterOpStreams
 7/54 Test  #7: Kokkos_CoreUnitTest_HIPInterOpStreams ......................   Passed    0.37 sec
      Start  8: Kokkos_CoreUnitTest_HIPInterOpGraph
 8/54 Test  #8: Kokkos_CoreUnitTest_HIPInterOpGraph ........................   Passed    0.37 sec
      Start  9: Kokkos_CoreUnitTest_Default
 9/54 Test  #9: Kokkos_CoreUnitTest_Default ................................   Passed    0.55 sec
      Start 10: Kokkos_CoreUnitTest_LegionInitialization
10/54 Test #10: Kokkos_CoreUnitTest_LegionInitialization ...................   Passed    0.42 sec
      Start 11: Kokkos_CoreUnitTest_PushFinalizeHook
11/54 Test #11: Kokkos_CoreUnitTest_PushFinalizeHook .......................   Passed    0.35 sec
      Start 12: Kokkos_CoreUnitTest_ScopeGuard
12/54 Test #12: Kokkos_CoreUnitTest_ScopeGuard .............................   Passed    2.38 sec
      Start 13: Kokkos_CoreUnitTest_Develop
13/54 Test #13: Kokkos_CoreUnitTest_Develop ................................   Passed    0.35 sec
      Start 14: Kokkos_CoreUnitTest_PushFinalizeHookTerminateRegex
14/54 Test #14: Kokkos_CoreUnitTest_PushFinalizeHookTerminateRegex .........   Passed    0.44 sec
      Start 15: Kokkos_CoreUnitTest_PushFinalizeHookTerminateFails
15/54 Test #15: Kokkos_CoreUnitTest_PushFinalizeHookTerminateFails .........   Passed    0.43 sec
      Start 16: Kokkos_CoreUnitTest_KokkosP
16/54 Test #16: Kokkos_CoreUnitTest_KokkosP ................................   Passed    0.36 sec
      Start 17: Kokkos_CoreUnitTest_ToolIndependence
17/54 Test #17: Kokkos_CoreUnitTest_ToolIndependence .......................   Passed    0.02 sec
      Start 18: Kokkos_ProfilingTestLibraryLoadHelp
18/54 Test #18: Kokkos_ProfilingTestLibraryLoadHelp ........................   Passed    0.34 sec

shiltian added a commit that referenced this pull request Aug 19, 2025
…or` (#146720)"

This reverts commit 84ab301 because it breaks
several AMDGPU test bots.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants