From f8f103029e87460d02e03072f94f4d8272c8e6d2 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Tue, 23 Jan 2024 15:57:22 +0000 Subject: [PATCH 1/2] Precommit tests --- .../AMDGPU/lower-work-group-id-intrinsics.ll | 31 ++- .../AMDGPU/workgroup-id-in-arch-sgprs.ll | 176 +++++++++++------- 2 files changed, 137 insertions(+), 70 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll index c732ff7094255..b94758d1c4ddf 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs --verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-SDAG %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel --verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s define amdgpu_cs void @_amdgpu_cs_main() { ; GFX9-SDAG-LABEL: _amdgpu_cs_main: @@ -23,6 +25,13 @@ define amdgpu_cs void @_amdgpu_cs_main() { ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 ; GFX9-GISEL-NEXT: s_endpgm +; +; GFX12-LABEL: _amdgpu_cs_main: +; GFX12: ; %bb.0: ; %.entry +; GFX12-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX12-NEXT: s_nop 0 +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-NEXT: s_endpgm .entry: %idx = call i32 @llvm.amdgcn.workgroup.id.x() %idy = call i32 @llvm.amdgcn.workgroup.id.y() @@ -68,6 +77,22 @@ define amdgpu_cs void @caller() { ; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] ; GFX9-GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-GISEL-NEXT: s_endpgm +; +; GFX12-SDAG-LABEL: caller: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_mov_b32 s1, callee@abs32@hi +; GFX12-SDAG-NEXT: s_mov_b32 s0, callee@abs32@lo +; GFX12-SDAG-NEXT: s_mov_b32 s32, 0 +; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX12-SDAG-NEXT: s_endpgm +; +; GFX12-GISEL-LABEL: caller: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_mov_b32 s0, callee@abs32@lo +; GFX12-GISEL-NEXT: s_mov_b32 s1, callee@abs32@hi +; GFX12-GISEL-NEXT: s_mov_b32 s32, 0 +; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX12-GISEL-NEXT: s_endpgm %idx = call i32 @llvm.amdgcn.workgroup.id.x() call amdgpu_gfx void @callee(i32 %idx) ret void @@ -79,3 +104,5 @@ declare i32 @llvm.amdgcn.workgroup.id.x() declare i32 @llvm.amdgcn.workgroup.id.y() declare i32 @llvm.amdgcn.workgroup.id.z() declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX9: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll index c492b54759d82..74a587a3b09ab 100644 --- a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll @@ -1,25 +1,47 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs --verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SDAG %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel --verify-machineinstrs < %s | FileCheck -check-prefix=GCN-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) { -; GCN-SDAG-LABEL: workgroup_id_x: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9 -; GCN-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GCN-SDAG-NEXT: global_store_dword v0, v1, s[0:1] -; GCN-SDAG-NEXT: s_endpgm +; GFX9-SDAG-LABEL: workgroup_id_x: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9 +; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-SDAG-NEXT: s_endpgm ; -; GCN-GISEL-LABEL: workgroup_id_x: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GCN-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GCN-GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GCN-GISEL-NEXT: s_endpgm +; GFX9-GISEL-LABEL: workgroup_id_x: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX9-GISEL-NEXT: s_endpgm +; +; GFX12-SDAG-LABEL: workgroup_id_x: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-SDAG-NEXT: s_nop 0 +; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-SDAG-NEXT: s_endpgm +; +; GFX12-GISEL-LABEL: workgroup_id_x: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX12-GISEL-NEXT: s_nop 0 +; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-GISEL-NEXT: s_endpgm %idx = call i32 @llvm.amdgcn.workgroup.id.x() store i32 %idx, ptr addrspace(1) %ptrx @@ -27,27 +49,29 @@ define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) { } define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace(1) %ptry) { -; GCN-SDAG-LABEL: workgroup_id_xy: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9 -; GCN-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GCN-SDAG-NEXT: global_store_dword v0, v1, s[0:1] -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp7 -; GCN-SDAG-NEXT: global_store_dword v0, v1, s[2:3] -; GCN-SDAG-NEXT: s_endpgm +; GFX9-LABEL: workgroup_id_xy: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, ttmp9 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: v_mov_b32_e32 v1, ttmp7 +; GFX9-NEXT: global_store_dword v0, v1, s[2:3] +; GFX9-NEXT: s_endpgm ; -; GCN-GISEL-LABEL: workgroup_id_xy: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, ttmp9 -; GCN-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GCN-GISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, ttmp7 -; GCN-GISEL-NEXT: global_store_dword v0, v1, s[2:3] -; GCN-GISEL-NEXT: s_endpgm +; GFX12-LABEL: workgroup_id_xy: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 +; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX12-NEXT: v_mov_b32_e32 v2, s3 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_clause 0x1 +; GFX12-NEXT: global_store_b32 v0, v1, s[4:5] +; GFX12-NEXT: global_store_b32 v0, v2, s[6:7] +; GFX12-NEXT: s_nop 0 +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-NEXT: s_endpgm %idx = call i32 @llvm.amdgcn.workgroup.id.x() store i32 %idx, ptr addrspace(1) %ptrx %idy = call i32 @llvm.amdgcn.workgroup.id.y() @@ -57,37 +81,53 @@ define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace } define amdgpu_kernel void @workgroup_id_xyz(ptr addrspace(1) %ptrx, ptr addrspace(1) %ptry, ptr addrspace(1) %ptrz) { -; GCN-SDAG-LABEL: workgroup_id_xyz: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GCN-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 -; GCN-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9 -; GCN-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GCN-SDAG-NEXT: global_store_dword v0, v1, s[0:1] -; GCN-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, s0 -; GCN-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16 -; GCN-SDAG-NEXT: global_store_dword v0, v1, s[2:3] -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, s0 -; GCN-SDAG-NEXT: global_store_dword v0, v1, s[6:7] -; GCN-SDAG-NEXT: s_endpgm +; GFX9-SDAG-LABEL: workgroup_id_xyz: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX9-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9 +; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16 +; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[2:3] +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[6:7] +; GFX9-SDAG-NEXT: s_endpgm +; +; GFX9-GISEL-LABEL: workgroup_id_xyz: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX9-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX9-GISEL-NEXT: s_and_b32 s0, ttmp7, 0xffff +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-GISEL-NEXT: s_lshr_b32 s0, ttmp7, 16 +; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[6:7] +; GFX9-GISEL-NEXT: s_endpgm ; -; GCN-GISEL-LABEL: workgroup_id_xyz: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GCN-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 -; GCN-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GCN-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GCN-GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GCN-GISEL-NEXT: s_and_b32 s0, ttmp7, 0xffff -; GCN-GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GCN-GISEL-NEXT: s_lshr_b32 s0, ttmp7, 16 -; GCN-GISEL-NEXT: global_store_dword v1, v0, s[2:3] -; GCN-GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GCN-GISEL-NEXT: global_store_dword v1, v0, s[6:7] -; GCN-GISEL-NEXT: s_endpgm +; GFX12-LABEL: workgroup_id_xyz: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_clause 0x1 +; GFX12-NEXT: s_load_b128 s[8:11], s[0:1], 0x0 +; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x10 +; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX12-NEXT: v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v3, s4 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_clause 0x2 +; GFX12-NEXT: global_store_b32 v0, v1, s[8:9] +; GFX12-NEXT: global_store_b32 v0, v2, s[10:11] +; GFX12-NEXT: global_store_b32 v0, v3, s[0:1] +; GFX12-NEXT: s_nop 0 +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-NEXT: s_endpgm %idx = call i32 @llvm.amdgcn.workgroup.id.x() store i32 %idx, ptr addrspace(1) %ptrx %idy = call i32 @llvm.amdgcn.workgroup.id.y() From 1935d52a6eea036e44d4abd0ea79f0b923b45df1 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Tue, 23 Jan 2024 16:07:41 +0000 Subject: [PATCH 2/2] [AMDGPU] Enable architected SGPRs for GFX12 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 1 + .../AMDGPU/indirect-call-known-callees.ll | 30 ++++++++--------- .../AMDGPU/lower-work-group-id-intrinsics.ll | 32 +++++++++++++++---- .../AMDGPU/workgroup-id-in-arch-sgprs.ll | 27 +++++++++------- 4 files changed, 57 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 92985f971f17a..cb29d5d947598 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1496,6 +1496,7 @@ def FeatureISAVersion12 : FeatureSet< FeatureWavefrontSize32, FeatureShaderCyclesHiLoRegisters, FeatureArchitectedFlatScratch, + FeatureArchitectedSGPRs, FeatureAtomicFaddRtnInsts, FeatureAtomicFaddNoRtnInsts, FeatureAtomicDsPkAdd16Insts, diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll index 9965d214cc9b3..380a13ed16128 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll @@ -41,30 +41,30 @@ define amdgpu_kernel void @indirect_call_known_no_special_inputs() { ; ; GFX12-LABEL: indirect_call_known_no_special_inputs: ; GFX12: ; %bb.0: ; %bb +; GFX12-NEXT: s_getpc_b64 s[2:3] +; GFX12-NEXT: s_sext_i32_i16 s3, s3 +; GFX12-NEXT: s_add_co_u32 s2, s2, snork@gotpcrel32@lo+8 +; GFX12-NEXT: s_add_co_ci_u32 s3, s3, snork@gotpcrel32@hi+16 +; GFX12-NEXT: s_mov_b64 s[0:1], 0 ; GFX12-NEXT: s_getpc_b64 s[4:5] ; GFX12-NEXT: s_sext_i32_i16 s5, s5 -; GFX12-NEXT: s_add_co_u32 s4, s4, snork@gotpcrel32@lo+8 -; GFX12-NEXT: s_add_co_ci_u32 s5, s5, snork@gotpcrel32@hi+16 -; GFX12-NEXT: s_mov_b64 s[2:3], 0 -; GFX12-NEXT: s_getpc_b64 s[6:7] -; GFX12-NEXT: s_sext_i32_i16 s7, s7 -; GFX12-NEXT: s_add_co_u32 s6, s6, wobble@gotpcrel32@lo+8 -; GFX12-NEXT: s_add_co_ci_u32 s7, s7, wobble@gotpcrel32@hi+16 -; GFX12-NEXT: s_load_u8 s1, s[2:3], 0x0 +; GFX12-NEXT: s_add_co_u32 s4, s4, wobble@gotpcrel32@lo+8 +; GFX12-NEXT: s_add_co_ci_u32 s5, s5, wobble@gotpcrel32@hi+16 +; GFX12-NEXT: s_load_u8 s6, s[0:1], 0x0 +; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 ; GFX12-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 -; GFX12-NEXT: s_load_b64 s[4:5], s[6:7], 0x0 ; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 0 ; GFX12-NEXT: v_mov_b32_e32 v31, v0 +; GFX12-NEXT: s_mov_b32 s12, ttmp9 ; GFX12-NEXT: s_mov_b64 s[8:9], 0 -; GFX12-NEXT: s_mov_b32 s12, s0 ; GFX12-NEXT: s_mov_b32 s32, 0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_and_b32 s1, 1, s1 +; GFX12-NEXT: s_and_b32 s4, 1, s6 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX12-NEXT: s_cmp_eq_u32 s1, 1 -; GFX12-NEXT: s_cselect_b32 s3, s5, s3 -; GFX12-NEXT: s_cselect_b32 s2, s4, s2 -; GFX12-NEXT: s_swappc_b64 s[30:31], s[2:3] +; GFX12-NEXT: s_cmp_eq_u32 s4, 1 +; GFX12-NEXT: s_cselect_b32 s1, s3, s1 +; GFX12-NEXT: s_cselect_b32 s0, s2, s0 +; GFX12-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX12-NEXT: s_endpgm bb: diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll index b94758d1c4ddf..495b54758de04 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll @@ -26,12 +26,29 @@ define amdgpu_cs void @_amdgpu_cs_main() { ; GFX9-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 ; GFX9-GISEL-NEXT: s_endpgm ; -; GFX12-LABEL: _amdgpu_cs_main: -; GFX12: ; %bb.0: ; %.entry -; GFX12-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null -; GFX12-NEXT: s_nop 0 -; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GFX12-NEXT: s_endpgm +; GFX12-SDAG-LABEL: _amdgpu_cs_main: +; GFX12-SDAG: ; %bb.0: ; %.entry +; GFX12-SDAG-NEXT: s_lshr_b32 s2, ttmp7, 16 +; GFX12-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s1 +; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s2 +; GFX12-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX12-SDAG-NEXT: s_nop 0 +; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-SDAG-NEXT: s_endpgm +; +; GFX12-GISEL-LABEL: _amdgpu_cs_main: +; GFX12-GISEL: ; %bb.0: ; %.entry +; GFX12-GISEL-NEXT: s_mov_b32 s0, ttmp9 +; GFX12-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX12-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16 +; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX12-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX12-GISEL-NEXT: s_nop 0 +; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-GISEL-NEXT: s_endpgm .entry: %idx = call i32 @llvm.amdgcn.workgroup.id.x() %idy = call i32 @llvm.amdgcn.workgroup.id.y() @@ -80,6 +97,7 @@ define amdgpu_cs void @caller() { ; ; GFX12-SDAG-LABEL: caller: ; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 ; GFX12-SDAG-NEXT: s_mov_b32 s1, callee@abs32@hi ; GFX12-SDAG-NEXT: s_mov_b32 s0, callee@abs32@lo ; GFX12-SDAG-NEXT: s_mov_b32 s32, 0 @@ -88,6 +106,7 @@ define amdgpu_cs void @caller() { ; ; GFX12-GISEL-LABEL: caller: ; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 ; GFX12-GISEL-NEXT: s_mov_b32 s0, callee@abs32@lo ; GFX12-GISEL-NEXT: s_mov_b32 s1, callee@abs32@hi ; GFX12-GISEL-NEXT: s_mov_b32 s32, 0 @@ -105,4 +124,5 @@ declare i32 @llvm.amdgcn.workgroup.id.y() declare i32 @llvm.amdgcn.workgroup.id.z() declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg) ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX12: {{.*}} ; GFX9: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll index 74a587a3b09ab..769e6b0964abd 100644 --- a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll @@ -26,7 +26,7 @@ define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) { ; GFX12-SDAG-LABEL: workgroup_id_x: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX12-SDAG-NEXT: s_nop 0 @@ -36,7 +36,7 @@ define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) { ; GFX12-GISEL-LABEL: workgroup_id_x: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 +; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX12-GISEL-NEXT: s_nop 0 @@ -62,13 +62,13 @@ define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace ; ; GFX12-LABEL: workgroup_id_xy: ; GFX12: ; %bb.0: -; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 -; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX12-NEXT: v_mov_b32_e32 v2, s3 +; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 +; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9 +; GFX12-NEXT: v_mov_b32_e32 v2, ttmp7 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_clause 0x1 -; GFX12-NEXT: global_store_b32 v0, v1, s[4:5] -; GFX12-NEXT: global_store_b32 v0, v2, s[6:7] +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: global_store_b32 v0, v2, s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm @@ -116,14 +116,17 @@ define amdgpu_kernel void @workgroup_id_xyz(ptr addrspace(1) %ptrx, ptr addrspac ; GFX12-LABEL: workgroup_id_xyz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_clause 0x1 -; GFX12-NEXT: s_load_b128 s[8:11], s[0:1], 0x0 +; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x10 -; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX12-NEXT: v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v3, s4 +; GFX12-NEXT: s_and_b32 s2, ttmp7, 0xffff +; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9 +; GFX12-NEXT: s_lshr_b32 s3, ttmp7, 16 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_clause 0x2 -; GFX12-NEXT: global_store_b32 v0, v1, s[8:9] -; GFX12-NEXT: global_store_b32 v0, v2, s[10:11] +; GFX12-NEXT: global_store_b32 v0, v1, s[4:5] +; GFX12-NEXT: global_store_b32 v0, v2, s[6:7] ; GFX12-NEXT: global_store_b32 v0, v3, s[0:1] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)