Skip to content

Commit ab2f610

Browse files
committed
AMDGPU: llvm.amdgcn.writelane is a source of divergence
Summary: Consider: %r = call i32 @llvm.amdgcn.writelane(i32 0, i32 1, i32 2) This produces a value that is 0 on lane 1, and 2 everywhere else; i.e., it is divergent. Reported-by: Marek Olsak <[email protected]> Reviewers: arsenm, foad, mareko Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D74400
1 parent 07a5b84 commit ab2f610

File tree

2 files changed

+9
-0
lines changed

2 files changed

+9
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ def : SourceOfDivergence<int_amdgcn_permlanex16>;
247247
def : SourceOfDivergence<int_amdgcn_mov_dpp>;
248248
def : SourceOfDivergence<int_amdgcn_mov_dpp8>;
249249
def : SourceOfDivergence<int_amdgcn_update_dpp>;
250+
def : SourceOfDivergence<int_amdgcn_writelane>;
250251

251252
def : SourceOfDivergence<int_amdgcn_mfma_f32_4x4x1f32>;
252253
def : SourceOfDivergence<int_amdgcn_mfma_f32_4x4x1f32>;

llvm/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,20 @@ define amdgpu_kernel void @mov_dpp8(i32 addrspace(1)* %out, i32 %in) #0 {
4242
ret void
4343
}
4444

45+
; CHECK: DIVERGENT: %tmp0 = call i32 @llvm.amdgcn.writelane(i32 0, i32 1, i32 2)
46+
define amdgpu_kernel void @writelane(i32 addrspace(1)* %out) #0 {
47+
%tmp0 = call i32 @llvm.amdgcn.writelane(i32 0, i32 1, i32 2)
48+
store i32 %tmp0, i32 addrspace(1)* %out
49+
ret void
50+
}
51+
4552
declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #1
4653
declare i32 @llvm.amdgcn.permlane16(i32, i32, i32, i32, i1, i1) #1
4754
declare i32 @llvm.amdgcn.permlanex16(i32, i32, i32, i32, i1, i1) #1
4855
declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32, i32, i32, i1) #1
4956
declare i32 @llvm.amdgcn.mov.dpp8.i32(i32, i32) #1
5057
declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #1
58+
declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #1
5159

5260
attributes #0 = { nounwind convergent }
5361
attributes #1 = { nounwind readnone convergent }

0 commit comments

Comments
 (0)