From ba51ef50de89d8fe8aa16289314f0ad1e4c08a2d Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Tue, 29 Jul 2025 00:16:00 -0700 Subject: [PATCH] [AMDGPU] Support f64 atomics on gfx1250 - BUF/FLAT/GLOBAL_ADD/MIN/MAX_F64 - DS_ADD_F64 Co-authored-by: Konstantin Zhuravlyov --- llvm/lib/Target/AMDGPU/AMDGPU.td | 2 +- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 2 +- llvm/lib/Target/AMDGPU/BUFInstructions.td | 12 +- llvm/lib/Target/AMDGPU/DSInstructions.td | 3 + llvm/lib/Target/AMDGPU/FLATInstructions.td | 8 + llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 +- .../AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll | 790 +++++++++++++++++ .../CodeGen/AMDGPU/fp64-atomics-gfx90a.ll | 799 ++++++++++++++++++ llvm/test/MC/AMDGPU/gfx1250_asm_ds.s | 78 ++ .../MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s | 300 +++++++ llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s | 211 +++++ .../Disassembler/AMDGPU/gfx1250_dasm_ds.txt | 33 + .../AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt | 90 ++ .../AMDGPU/gfx1250_dasm_vflat.txt | 69 ++ 14 files changed, 2397 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index a17fb934ff66d..25e1eabb2c293 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -2429,7 +2429,7 @@ def HasAtomicFMinFMaxF64FlatInsts : def HasLdsAtomicAddF64 : Predicate<"Subtarget->hasLdsAtomicAddF64()">, - AssemblerPredicate<(any_of FeatureGFX90AInsts)>; + AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX1250Insts)>; def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">, AssemblerPredicate<(all_of FeatureFlatGlobalInsts)>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index fedfa3f9dd900..f16351fac9e2e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1682,7 +1682,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, if (ST.hasFlatAtomicFaddF32Inst()) Atomic.legalFor({{S32, FlatPtr}}); - if (ST.hasGFX90AInsts()) { + if (ST.hasGFX90AInsts() || ST.hasGFX1250Insts()) { // These are legal with some caveats, and should have undergone expansion in // the IR in most situations // TODO: Move atomic expansion into legalizer diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index f99e71637f70f..1956a15c57d67 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -2489,7 +2489,7 @@ multiclass VBUFFER_MTBUF_Real_gfx12 op, string real_name> { } //===----------------------------------------------------------------------===// -// MUBUF - GFX11, GFX12. +// MUBUF - GFX11, GFX12, GFX1250. //===----------------------------------------------------------------------===// // gfx11 instruction that accept both old and new assembler name. @@ -2600,6 +2600,12 @@ multiclass MUBUF_Real_Atomic_gfx11_gfx12 op, def : Mnem_gfx12; } +multiclass MUBUF_Real_Atomic_gfx12_Renamed op, string real_name> : + MUBUF_Real_Atomic_gfx12_impl, + MUBUF_Real_Atomic_gfx12_impl { + def : Mnem_gfx12.Mnemonic, real_name>; +} + defm BUFFER_GL0_INV : MUBUF_Real_gfx11<0x02B>; defm BUFFER_GL1_INV : MUBUF_Real_gfx11<0x02C>; @@ -2678,6 +2684,10 @@ defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomic_gfx11_gfx12<0x04B, "buffer defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_gfx12<0x059>; defm BUFFER_ATOMIC_PK_ADD_BF16 : MUBUF_Real_Atomic_gfx12<0x05a>; +defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Real_Atomic_gfx12<0x055>; +defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Real_Atomic_gfx12_Renamed<0x05b, "buffer_atomic_min_num_f64">; +defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Real_Atomic_gfx12_Renamed<0x05c, "buffer_atomic_max_num_f64">; + //===----------------------------------------------------------------------===// // MUBUF - GFX10. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 319cc9d1da181..3ff675d6e5e97 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -1397,6 +1397,9 @@ defm DS_BVH_STACK_RTN_B32 : DS_Real_gfx12<0x0e0, defm DS_BVH_STACK_PUSH8_POP1_RTN_B32 : DS_Real_gfx12<0x0e1>; defm DS_BVH_STACK_PUSH8_POP2_RTN_B64 : DS_Real_gfx12<0x0e2>; +defm DS_ADD_F64 : DS_Real_gfx12<0x054>; +defm DS_ADD_RTN_F64 : DS_Real_gfx12<0x074>; + let AssemblerPredicate = HasLdsBarrierArriveAtomic in { defm DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 : DS_Real_gfx12<0x056>; defm DS_ATOMIC_BARRIER_ARRIVE_RTN_B64 : DS_Real_gfx12<0x075>; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 8ede9caead8bc..d5d1074622135 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -3488,6 +3488,14 @@ defm GLOBAL_LOAD_TR_B64_w32 : VFLAT_Real_AllAddr_gfx1250<0x058, "globa defm GLOBAL_LOAD_TR4_B64 : VFLAT_Real_AllAddr_gfx1250<0x073>; defm GLOBAL_LOAD_TR6_B96 : VFLAT_Real_AllAddr_gfx1250<0x074>; +defm FLAT_ATOMIC_ADD_F64 : VFLAT_Real_Atomics_gfx1250<0x055>; +defm FLAT_ATOMIC_MIN_F64 : VFLAT_Real_Atomics_gfx1250<0x05b, "flat_atomic_min_num_f64">; +defm FLAT_ATOMIC_MAX_F64 : VFLAT_Real_Atomics_gfx1250<0x05c, "flat_atomic_max_num_f64">; + +defm GLOBAL_ATOMIC_ADD_F64 : VFLAT_Real_Atomics_gfx1250<0x055>; +defm GLOBAL_ATOMIC_MIN_F64 : VFLAT_Real_Atomics_gfx1250<0x05b, "global_atomic_min_num_f64">; +defm GLOBAL_ATOMIC_MAX_F64 : VFLAT_Real_Atomics_gfx1250<0x05c, "global_atomic_max_num_f64">; + def True16D16Table : GenericTable { let FilterClass = "True16D16Table"; let CppTypeName = "True16D16Info"; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 10ded0e1d1c3a..5357a375ae5a9 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -715,7 +715,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasVINTERPEncoding() const { return GFX11Insts && !hasGFX1250Insts(); } // DS_ADD_F64/DS_ADD_RTN_F64 - bool hasLdsAtomicAddF64() const { return hasGFX90AInsts(); } + bool hasLdsAtomicAddF64() const { + return hasGFX90AInsts() || hasGFX1250Insts(); + } bool hasMultiDwordFlatScratchAddressing() const { return getGeneration() >= GFX9; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll index 0e132f130c844..2785b78da99e2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s -check-prefix=GFX90A ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s -check-prefix=GFX942 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck %s -check-prefix=GFX1250 declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg) declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg) @@ -37,6 +38,17 @@ define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, doub ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_buffer_atomic_add_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -56,6 +68,13 @@ define amdgpu_ps void @raw_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, doub ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_buffer_atomic_add_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr poison @@ -92,6 +111,24 @@ define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %rsr ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_mov_b32 s6, 4 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -120,6 +157,17 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_noret_f64(ptr addrspace(8) ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_ptr_buffer_atomic_add_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -139,6 +187,13 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inreg ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_ptr_buffer_atomic_add_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr poison @@ -175,6 +230,24 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr addrsp ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_ptr_buffer_atomic_add_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_mov_b32 s6, 4 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -203,6 +276,17 @@ define amdgpu_kernel void @struct_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, d ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_buffer_atomic_add_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -222,6 +306,13 @@ define amdgpu_ps void @struct_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, d ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_buffer_atomic_add_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr poison @@ -258,6 +349,23 @@ define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> % ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -286,6 +394,17 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_noret_f64(ptr addrspace( ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_ptr_buffer_atomic_add_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -305,6 +424,13 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inr ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_ptr_buffer_atomic_add_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr poison @@ -341,6 +467,23 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr add ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_ptr_buffer_atomic_add_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -369,6 +512,17 @@ define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, doub ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_buffer_atomic_min_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -388,6 +542,13 @@ define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, doub ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_buffer_atomic_min_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr poison @@ -424,6 +585,24 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %rsr ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_mov_b32 s6, 4 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -452,6 +631,17 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_noret_f64(ptr addrspace(8) ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_ptr_buffer_atomic_min_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -471,6 +661,13 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inreg ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_ptr_buffer_atomic_min_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr poison @@ -507,6 +704,24 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr addrsp ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_mov_b32 s6, 4 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -535,6 +750,17 @@ define amdgpu_kernel void @struct_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, d ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_buffer_atomic_min_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -554,6 +780,13 @@ define amdgpu_ps void @struct_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, d ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_buffer_atomic_min_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr poison @@ -590,6 +823,23 @@ define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> % ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -618,6 +868,17 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_noret_f64(ptr addrspace( ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_ptr_buffer_atomic_min_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -637,6 +898,13 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inr ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_ptr_buffer_atomic_min_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr poison @@ -673,6 +941,23 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr add ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_ptr_buffer_atomic_min_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -701,6 +986,17 @@ define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, doub ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_buffer_atomic_max_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -720,6 +1016,13 @@ define amdgpu_ps void @raw_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, doub ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_buffer_atomic_max_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr poison @@ -756,6 +1059,24 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %rsr ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_mov_b32 s6, 4 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -784,6 +1105,17 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_noret_f64(ptr addrspace(8) ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_ptr_buffer_atomic_max_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -803,6 +1135,13 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inreg ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_ptr_buffer_atomic_max_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr poison @@ -839,6 +1178,24 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrsp ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_mov_b32 s6, 4 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -867,6 +1224,17 @@ define amdgpu_kernel void @struct_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, d ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_buffer_atomic_max_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -886,6 +1254,13 @@ define amdgpu_ps void @struct_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, d ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_buffer_atomic_max_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr poison @@ -922,6 +1297,23 @@ define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> % ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -950,6 +1342,17 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_noret_f64(ptr addrspace( ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_ptr_buffer_atomic_max_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -969,6 +1372,13 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inr ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_ptr_buffer_atomic_max_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr poison @@ -1005,6 +1415,23 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr add ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_ptr_buffer_atomic_max_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -1056,6 +1483,30 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt ; GFX942-NEXT: buffer_inv sc0 sc1 ; GFX942-NEXT: .LBB36_2: ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_mov_b32 s0, exec_lo +; GFX1250-NEXT: s_mov_b32 s1, exec_lo +; GFX1250-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_cmpx_eq_u32_e32 0, v0 +; GFX1250-NEXT: s_cbranch_execz .LBB36_2 +; GFX1250-NEXT: ; %bb.1: +; GFX1250-NEXT: s_bcnt1_i32_b32 s0, s0 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1] +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: .LBB36_2: +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1104,6 +1555,28 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace( ; GFX942-NEXT: buffer_inv sc1 ; GFX942-NEXT: .LBB37_2: ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_agent: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_mov_b32 s0, exec_lo +; GFX1250-NEXT: s_mov_b32 s1, exec_lo +; GFX1250-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_cmpx_eq_u32_e32 0, v0 +; GFX1250-NEXT: s_cbranch_execz .LBB37_2 +; GFX1250-NEXT: ; %bb.1: +; GFX1250-NEXT: s_bcnt1_i32_b32 s0, s0 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1] +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: .LBB37_2: +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1154,6 +1627,30 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace ; GFX942-NEXT: buffer_inv sc0 sc1 ; GFX942-NEXT: .LBB38_2: ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_system: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_mov_b32 s0, exec_lo +; GFX1250-NEXT: s_mov_b32 s1, exec_lo +; GFX1250-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_cmpx_eq_u32_e32 0, v0 +; GFX1250-NEXT: s_cbranch_execz .LBB38_2 +; GFX1250-NEXT: ; %bb.1: +; GFX1250-NEXT: s_bcnt1_i32_b32 s0, s0 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1] +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: .LBB38_2: +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1202,6 +1699,28 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace( ; GFX942-NEXT: buffer_inv sc1 ; GFX942-NEXT: .LBB39_2: ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_flush: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_mov_b32 s0, exec_lo +; GFX1250-NEXT: s_mov_b32 s1, exec_lo +; GFX1250-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_cmpx_eq_u32_e32 0, v0 +; GFX1250-NEXT: s_cbranch_execz .LBB39_2 +; GFX1250-NEXT: ; %bb.1: +; GFX1250-NEXT: s_bcnt1_i32_b32 s0, s0 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1] +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: .LBB39_2: +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1229,6 +1748,19 @@ define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %dat ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: buffer_inv sc0 sc1 ; GFX942-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: global_atomic_fadd_f64_rtn_pat: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1254,6 +1786,18 @@ define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, doubl ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: buffer_inv sc1 ; GFX942-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: global_atomic_fadd_f64_rtn_pat_agent: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1281,6 +1825,19 @@ define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, doub ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: buffer_inv sc0 sc1 ; GFX942-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: global_atomic_fadd_f64_rtn_pat_system: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1329,6 +1886,28 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs ; GFX942-NEXT: buffer_inv sc1 ; GFX942-NEXT: .LBB43_2: ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_mov_b32 s0, exec_lo +; GFX1250-NEXT: s_mov_b32 s1, exec_lo +; GFX1250-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_cmpx_eq_u32_e32 0, v0 +; GFX1250-NEXT: s_cbranch_execz .LBB43_2 +; GFX1250-NEXT: ; %bb.1: +; GFX1250-NEXT: s_bcnt1_i32_b32 s0, s0 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1] +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: .LBB43_2: +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1360,6 +1939,19 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 { ; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX942-NEXT: buffer_inv sc0 sc1 ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret void @@ -1389,6 +1981,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 { ; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX942-NEXT: buffer_inv sc1 ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat_agent: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret void @@ -1420,6 +2023,19 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 { ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: buffer_inv sc0 sc1 ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat_system: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret void @@ -1447,6 +2063,19 @@ define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 { ; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX942-NEXT: buffer_inv sc0 sc1 ; GFX942-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: flat_atomic_fadd_f64_rtn_pat: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1472,6 +2101,18 @@ define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 { ; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX942-NEXT: buffer_inv sc1 ; GFX942-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: flat_atomic_fadd_f64_rtn_pat_agent: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1501,6 +2142,19 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 { ; GFX942-NEXT: buffer_inv sc0 sc1 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: flat_atomic_fadd_f64_rtn_pat_system: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1530,6 +2184,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) { ; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX942-NEXT: buffer_inv sc1 ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret void @@ -1575,6 +2240,40 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: .LBB51_2: ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: local_atomic_fadd_f64_noret_pat: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_mov_b32 s1, exec_lo +; GFX1250-NEXT: s_mov_b32 s0, 0 +; GFX1250-NEXT: v_mbcnt_lo_u32_b32 v0, s1, 0 +; GFX1250-NEXT: s_mov_b32 s2, exec_lo +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_cmpx_eq_u32_e32 0, v0 +; GFX1250-NEXT: s_cbranch_execz .LBB51_3 +; GFX1250-NEXT: ; %bb.1: +; GFX1250-NEXT: s_bcnt1_i32_b32 s1, s1 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: v_cvt_f64_u32_e32 v[0:1], s1 +; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b32_e32 v4, s1 +; GFX1250-NEXT: ds_load_b64 v[2:3], v4 +; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1] +; GFX1250-NEXT: .LBB51_2: ; %atomicrmw.start +; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_add_f64_e32 v[6:7], v[2:3], v[0:1] +; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[6:7], v4, v[6:7], v[2:3] +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3] +; GFX1250-NEXT: v_mov_b64_e32 v[2:3], v[6:7] +; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 +; GFX1250-NEXT: s_cbranch_execnz .LBB51_2 +; GFX1250-NEXT: .LBB51_3: +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1620,6 +2319,40 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: .LBB52_2: ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: local_atomic_fadd_f64_noret_pat_flush: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_mov_b32 s1, exec_lo +; GFX1250-NEXT: s_mov_b32 s0, 0 +; GFX1250-NEXT: v_mbcnt_lo_u32_b32 v0, s1, 0 +; GFX1250-NEXT: s_mov_b32 s2, exec_lo +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_cmpx_eq_u32_e32 0, v0 +; GFX1250-NEXT: s_cbranch_execz .LBB52_3 +; GFX1250-NEXT: ; %bb.1: +; GFX1250-NEXT: s_bcnt1_i32_b32 s1, s1 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: v_cvt_f64_u32_e32 v[0:1], s1 +; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b32_e32 v4, s1 +; GFX1250-NEXT: ds_load_b64 v[2:3], v4 +; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1] +; GFX1250-NEXT: .LBB52_2: ; %atomicrmw.start +; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_add_f64_e32 v[6:7], v[2:3], v[0:1] +; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[6:7], v4, v[6:7], v[2:3] +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3] +; GFX1250-NEXT: v_mov_b64_e32 v[2:3], v[6:7] +; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 +; GFX1250-NEXT: s_cbranch_execnz .LBB52_2 +; GFX1250-NEXT: .LBB52_3: +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1665,6 +2398,40 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: .LBB53_2: ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_mov_b32 s1, exec_lo +; GFX1250-NEXT: s_mov_b32 s0, 0 +; GFX1250-NEXT: v_mbcnt_lo_u32_b32 v0, s1, 0 +; GFX1250-NEXT: s_mov_b32 s2, exec_lo +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_cmpx_eq_u32_e32 0, v0 +; GFX1250-NEXT: s_cbranch_execz .LBB53_3 +; GFX1250-NEXT: ; %bb.1: +; GFX1250-NEXT: s_bcnt1_i32_b32 s1, s1 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: v_cvt_f64_u32_e32 v[0:1], s1 +; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b32_e32 v4, s1 +; GFX1250-NEXT: ds_load_b64 v[2:3], v4 +; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1] +; GFX1250-NEXT: .LBB53_2: ; %atomicrmw.start +; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_add_f64_e32 v[6:7], v[2:3], v[0:1] +; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[6:7], v4, v[6:7], v[2:3] +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3] +; GFX1250-NEXT: v_mov_b64_e32 v[2:3], v[6:7] +; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 +; GFX1250-NEXT: s_cbranch_execnz .LBB53_2 +; GFX1250-NEXT: .LBB53_3: +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1687,6 +2454,29 @@ define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data ; GFX942-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: local_atomic_fadd_f64_rtn_pat: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b32_e32 v2, v0 +; GFX1250-NEXT: ds_load_b64 v[0:1], v0 +; GFX1250-NEXT: s_mov_b32 s0, 0 +; GFX1250-NEXT: .LBB54_1: ; %atomicrmw.start +; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[4:5], v[0:1] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX1250-NEXT: v_add_f64_e32 v[0:1], 4.0, v[4:5] +; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[0:1], v2, v[0:1], v[4:5] +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[4:5] +; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 +; GFX1250-NEXT: s_cbranch_execnz .LBB54_1 +; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] main_body: %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst ret double %ret diff --git a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll index 6067194d947fa..f9a24fee59692 100644 --- a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a -amdgpu-atomic-optimizer-strategy=None | FileCheck %s -check-prefix=GFX90A ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx942 -amdgpu-atomic-optimizer-strategy=None | FileCheck %s -check-prefix=GFX942 +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1250 -amdgpu-atomic-optimizer-strategy=None | FileCheck %s -check-prefix=GFX1250 declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg) declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg) @@ -38,6 +39,17 @@ define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, doub ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_buffer_atomic_add_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -57,6 +69,13 @@ define amdgpu_ps void @raw_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, doub ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_buffer_atomic_add_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr poison @@ -93,6 +112,24 @@ define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %rsr ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_mov_b32 s6, 4 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -121,6 +158,17 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_noret_f64(ptr addrspace(8) ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_ptr_buffer_atomic_add_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -140,6 +188,13 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inreg ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_ptr_buffer_atomic_add_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr poison @@ -176,6 +231,24 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr addrsp ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_ptr_buffer_atomic_add_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_mov_b32 s6, 4 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -204,6 +277,17 @@ define amdgpu_kernel void @struct_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, d ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_buffer_atomic_add_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -223,6 +307,13 @@ define amdgpu_ps void @struct_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, d ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_buffer_atomic_add_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr poison @@ -259,6 +350,23 @@ define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> % ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -287,6 +395,17 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_noret_f64(ptr addrspace( ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_ptr_buffer_atomic_add_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -306,6 +425,13 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inr ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_ptr_buffer_atomic_add_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr poison @@ -342,6 +468,23 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr add ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_ptr_buffer_atomic_add_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -370,6 +513,17 @@ define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, doub ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_buffer_atomic_min_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -389,6 +543,13 @@ define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, doub ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_buffer_atomic_min_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr poison @@ -425,6 +586,24 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %rsr ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_mov_b32 s6, 4 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -453,6 +632,17 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_noret_f64(ptr addrspace(8) ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_ptr_buffer_atomic_min_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -472,6 +662,13 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inreg ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_ptr_buffer_atomic_min_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr poison @@ -508,6 +705,24 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr addrsp ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_mov_b32 s6, 4 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -536,6 +751,17 @@ define amdgpu_kernel void @struct_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, d ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_buffer_atomic_min_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -555,6 +781,13 @@ define amdgpu_ps void @struct_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, d ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_buffer_atomic_min_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr poison @@ -591,6 +824,23 @@ define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> % ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -619,6 +869,17 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_noret_f64(ptr addrspace( ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_ptr_buffer_atomic_min_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -638,6 +899,13 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inr ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_ptr_buffer_atomic_min_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr poison @@ -674,6 +942,23 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr add ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_ptr_buffer_atomic_min_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -702,6 +987,17 @@ define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, doub ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_buffer_atomic_max_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -721,6 +1017,13 @@ define amdgpu_ps void @raw_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, doub ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_buffer_atomic_max_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr poison @@ -757,6 +1060,24 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %rsr ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_mov_b32 s6, 4 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -785,6 +1106,17 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_noret_f64(ptr addrspace(8) ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_ptr_buffer_atomic_max_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -804,6 +1136,13 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inreg ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_ptr_buffer_atomic_max_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr poison @@ -840,6 +1179,24 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrsp ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_mov_b32 s6, 4 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -868,6 +1225,17 @@ define amdgpu_kernel void @struct_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, d ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_buffer_atomic_max_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -887,6 +1255,13 @@ define amdgpu_ps void @struct_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, d ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_buffer_atomic_max_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr poison @@ -923,6 +1298,23 @@ define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> % ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -951,6 +1343,17 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_noret_f64(ptr addrspace( ; GFX942-NEXT: v_mov_b32_e32 v2, s8 ; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_ptr_buffer_atomic_max_noret_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -970,6 +1373,13 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inr ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_ptr_buffer_atomic_max_rtn_f64: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr poison @@ -1006,6 +1416,23 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr add ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9] ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: struct_ptr_buffer_atomic_max_rtn_f64_off4_slc: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v2, s10 +; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -1038,6 +1465,19 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: buffer_inv sc0 sc1 ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1067,6 +1507,17 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace( ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: buffer_inv sc1 ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_agent: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1098,6 +1549,19 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: buffer_inv sc0 sc1 ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_system: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1127,6 +1591,17 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace( ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: buffer_inv sc1 ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_flush: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1154,6 +1629,19 @@ define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %dat ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: buffer_inv sc0 sc1 ; GFX942-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: global_atomic_fadd_f64_rtn_pat: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1179,6 +1667,18 @@ define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, doubl ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: buffer_inv sc1 ; GFX942-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: global_atomic_fadd_f64_rtn_pat_agent: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1206,6 +1706,19 @@ define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, doub ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: buffer_inv sc0 sc1 ; GFX942-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: global_atomic_fadd_f64_rtn_pat_system: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1246,6 +1759,17 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: buffer_inv sc1 ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst ret void @@ -1277,6 +1801,19 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 { ; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX942-NEXT: buffer_inv sc0 sc1 ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret void @@ -1306,6 +1843,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 { ; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX942-NEXT: buffer_inv sc1 ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat_agent: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret void @@ -1337,6 +1885,19 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 { ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: buffer_inv sc0 sc1 ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat_system: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret void @@ -1364,6 +1925,19 @@ define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 { ; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX942-NEXT: buffer_inv sc0 sc1 ; GFX942-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: flat_atomic_fadd_f64_rtn_pat: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1389,6 +1963,18 @@ define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 { ; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX942-NEXT: buffer_inv sc1 ; GFX942-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: flat_atomic_fadd_f64_rtn_pat_agent: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1418,6 +2004,19 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 { ; GFX942-NEXT: buffer_inv sc0 sc1 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: flat_atomic_fadd_f64_rtn_pat_system: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1458,6 +2057,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) { ; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX942-NEXT: buffer_inv sc1 ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1 ret void @@ -1485,6 +2095,31 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret(ptr addrspace(3) %ptr, do ; GFX942-NEXT: ds_add_f64 v2, v[0:1] ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: local_atomic_fadd_f64_noret: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x24 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x2c +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v2, s2 +; GFX1250-NEXT: s_mov_b32 s2, 0 +; GFX1250-NEXT: ds_load_b64 v[0:1], v0 +; GFX1250-NEXT: .LBB51_1: ; %atomicrmw.start +; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_add_f64_e32 v[4:5], s[0:1], v[0:1] +; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[4:5], v2, v[4:5], v[0:1] +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[4:5], v[0:1] +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], v[4:5] +; GFX1250-NEXT: s_or_b32 s2, vcc_lo, s2 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s2 +; GFX1250-NEXT: s_cbranch_execnz .LBB51_1 +; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX1250-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0) ret void @@ -1508,6 +2143,30 @@ define double @local_atomic_fadd_f64_rtn(ptr addrspace(3) %ptr, double %data) { ; GFX942-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: local_atomic_fadd_f64_rtn: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v5, v2 :: v_dual_mov_b32 v2, v0 +; GFX1250-NEXT: v_mov_b32_e32 v4, v1 +; GFX1250-NEXT: ds_load_b64 v[0:1], v0 +; GFX1250-NEXT: s_mov_b32 s0, 0 +; GFX1250-NEXT: .LBB52_1: ; %atomicrmw.start +; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[6:7], v[0:1] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX1250-NEXT: v_add_f64_e32 v[0:1], v[6:7], v[4:5] +; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[0:1], v2, v[0:1], v[6:7] +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[6:7] +; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 +; GFX1250-NEXT: s_cbranch_execnz .LBB52_1 +; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] main_body: %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0) ret double %ret @@ -1534,6 +2193,29 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr ; GFX942-NEXT: ds_add_f64 v2, v[0:1] ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: local_atomic_fadd_f64_noret_pat: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v2, s0 +; GFX1250-NEXT: s_mov_b32 s0, 0 +; GFX1250-NEXT: ds_load_b64 v[0:1], v0 +; GFX1250-NEXT: .LBB53_1: ; %atomicrmw.start +; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_add_f64_e32 v[4:5], 4.0, v[0:1] +; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[4:5], v2, v[4:5], v[0:1] +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[4:5], v[0:1] +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], v[4:5] +; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 +; GFX1250-NEXT: s_cbranch_execnz .LBB53_1 +; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1560,6 +2242,29 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3 ; GFX942-NEXT: ds_add_f64 v2, v[0:1] ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: local_atomic_fadd_f64_noret_pat_flush: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v2, s0 +; GFX1250-NEXT: s_mov_b32 s0, 0 +; GFX1250-NEXT: ds_load_b64 v[0:1], v0 +; GFX1250-NEXT: .LBB54_1: ; %atomicrmw.start +; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_add_f64_e32 v[4:5], 4.0, v[0:1] +; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[4:5], v2, v[4:5], v[0:1] +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[4:5], v[0:1] +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], v[4:5] +; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 +; GFX1250-NEXT: s_cbranch_execnz .LBB54_1 +; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1586,6 +2291,29 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp ; GFX942-NEXT: ds_add_f64 v2, v[0:1] ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: s_endpgm +; +; GFX1250-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v2, s0 +; GFX1250-NEXT: s_mov_b32 s0, 0 +; GFX1250-NEXT: ds_load_b64 v[0:1], v0 +; GFX1250-NEXT: .LBB55_1: ; %atomicrmw.start +; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_add_f64_e32 v[4:5], 4.0, v[0:1] +; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[4:5], v2, v[4:5], v[0:1] +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[4:5], v[0:1] +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], v[4:5] +; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 +; GFX1250-NEXT: s_cbranch_execnz .LBB55_1 +; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX1250-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst ret void @@ -1608,6 +2336,29 @@ define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data ; GFX942-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: local_atomic_fadd_f64_rtn_pat: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b32_e32 v2, v0 +; GFX1250-NEXT: ds_load_b64 v[0:1], v0 +; GFX1250-NEXT: s_mov_b32 s0, 0 +; GFX1250-NEXT: .LBB56_1: ; %atomicrmw.start +; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[4:5], v[0:1] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX1250-NEXT: v_add_f64_e32 v[0:1], 4.0, v[4:5] +; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[0:1], v2, v[0:1], v[4:5] +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[4:5] +; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 +; GFX1250-NEXT: s_cbranch_execnz .LBB56_1 +; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] main_body: %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1631,6 +2382,30 @@ define double @local_atomic_fadd_f64_rtn_ieee_unsafe(ptr addrspace(3) %ptr, doub ; GFX942-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v5, v2 :: v_dual_mov_b32 v2, v0 +; GFX1250-NEXT: v_mov_b32_e32 v4, v1 +; GFX1250-NEXT: ds_load_b64 v[0:1], v0 +; GFX1250-NEXT: s_mov_b32 s0, 0 +; GFX1250-NEXT: .LBB57_1: ; %atomicrmw.start +; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[6:7], v[0:1] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX1250-NEXT: v_add_f64_e32 v[0:1], v[6:7], v[4:5] +; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[0:1], v2, v[0:1], v[6:7] +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[6:7] +; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 +; GFX1250-NEXT: s_cbranch_execnz .LBB57_1 +; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] main_body: %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0) ret double %ret @@ -1654,6 +2429,30 @@ define double @local_atomic_fadd_f64_rtn_ieee_safe(ptr addrspace(3) %ptr, double ; GFX942-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: local_atomic_fadd_f64_rtn_ieee_safe: +; GFX1250: ; %bb.0: ; %main_body +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v5, v2 :: v_dual_mov_b32 v2, v0 +; GFX1250-NEXT: v_mov_b32_e32 v4, v1 +; GFX1250-NEXT: ds_load_b64 v[0:1], v0 +; GFX1250-NEXT: s_mov_b32 s0, 0 +; GFX1250-NEXT: .LBB58_1: ; %atomicrmw.start +; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: v_mov_b64_e32 v[6:7], v[0:1] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX1250-NEXT: v_add_f64_e32 v[0:1], v[6:7], v[4:5] +; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[0:1], v2, v[0:1], v[6:7] +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[6:7] +; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 +; GFX1250-NEXT: s_cbranch_execnz .LBB58_1 +; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] main_body: %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0) ret double %ret diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_ds.s b/llvm/test/MC/AMDGPU/gfx1250_asm_ds.s index 98436c9d6aa9c..f1641fc693b1c 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_ds.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_ds.s @@ -24,3 +24,81 @@ ds_atomic_barrier_arrive_rtn_b64 v[2:3], v2, v[4:5] offset:513 ds_atomic_barrier_arrive_rtn_b64 v[254:255], v2, v[4:5] offset:65535 // GFX1250: ds_atomic_barrier_arrive_rtn_b64 v[254:255], v2, v[4:5] offset:65535 ; encoding: [0xff,0xff,0xd4,0xd9,0x02,0x04,0x00,0xfe] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +ds_add_f64 v1, v[2:3] offset:65535 +// GFX1250: ds_add_f64 v1, v[2:3] offset:65535 ; encoding: [0xff,0xff,0x50,0xd9,0x01,0x02,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: ds_add_f64 v1, v[2:3] offset:65535 +// GFX12-ERR-NEXT:{{^}}^ + +ds_add_f64 v255, v[2:3] offset:65535 +// GFX1250: ds_add_f64 v255, v[2:3] offset:65535 ; encoding: [0xff,0xff,0x50,0xd9,0xff,0x02,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: ds_add_f64 v255, v[2:3] offset:65535 +// GFX12-ERR-NEXT:{{^}}^ + +ds_add_f64 v1, v[254:255] offset:65535 +// GFX1250: ds_add_f64 v1, v[254:255] offset:65535 ; encoding: [0xff,0xff,0x50,0xd9,0x01,0xfe,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: ds_add_f64 v1, v[254:255] offset:65535 +// GFX12-ERR-NEXT:{{^}}^ + +ds_add_f64 v1, v[2:3] +// GFX1250: ds_add_f64 v1, v[2:3] ; encoding: [0x00,0x00,0x50,0xd9,0x01,0x02,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: ds_add_f64 v1, v[2:3] +// GFX12-ERR-NEXT:{{^}}^ + +ds_add_f64 v1, v[2:3] +// GFX1250: ds_add_f64 v1, v[2:3] ; encoding: [0x00,0x00,0x50,0xd9,0x01,0x02,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: ds_add_f64 v1, v[2:3] +// GFX12-ERR-NEXT:{{^}}^ + +ds_add_f64 v1, v[2:3] offset:4 +// GFX1250: ds_add_f64 v1, v[2:3] offset:4 ; encoding: [0x04,0x00,0x50,0xd9,0x01,0x02,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: ds_add_f64 v1, v[2:3] offset:4 +// GFX12-ERR-NEXT:{{^}}^ + +ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:65535 +// GFX1250: ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0x01,0x02,0x00,0x04] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:65535 +// GFX12-ERR-NEXT:{{^}}^ + +ds_add_rtn_f64 v[254:255], v1, v[2:3] offset:65535 +// GFX1250: ds_add_rtn_f64 v[254:255], v1, v[2:3] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0x01,0x02,0x00,0xfe] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: ds_add_rtn_f64 v[254:255], v1, v[2:3] offset:65535 +// GFX12-ERR-NEXT:{{^}}^ + +ds_add_rtn_f64 v[4:5], v255, v[2:3] offset:65535 +// GFX1250: ds_add_rtn_f64 v[4:5], v255, v[2:3] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0xff,0x02,0x00,0x04] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: ds_add_rtn_f64 v[4:5], v255, v[2:3] offset:65535 +// GFX12-ERR-NEXT:{{^}}^ + +ds_add_rtn_f64 v[4:5], v1, v[254:255] offset:65535 +// GFX1250: ds_add_rtn_f64 v[4:5], v1, v[254:255] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0x01,0xfe,0x00,0x04] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: ds_add_rtn_f64 v[4:5], v1, v[254:255] offset:65535 +// GFX12-ERR-NEXT:{{^}}^ + +ds_add_rtn_f64 v[4:5], v1, v[2:3] +// GFX1250: ds_add_rtn_f64 v[4:5], v1, v[2:3] ; encoding: [0x00,0x00,0xd0,0xd9,0x01,0x02,0x00,0x04] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: ds_add_rtn_f64 v[4:5], v1, v[2:3] +// GFX12-ERR-NEXT:{{^}}^ + +ds_add_rtn_f64 v[4:5], v1, v[2:3] +// GFX1250: ds_add_rtn_f64 v[4:5], v1, v[2:3] ; encoding: [0x00,0x00,0xd0,0xd9,0x01,0x02,0x00,0x04] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: ds_add_rtn_f64 v[4:5], v1, v[2:3] +// GFX12-ERR-NEXT:{{^}}^ + +ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:4 +// GFX1250: ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:4 ; encoding: [0x04,0x00,0xd0,0xd9,0x01,0x02,0x00,0x04] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:4 +// GFX12-ERR-NEXT:{{^}}^ diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s index 1d14bd91a7569..7a4da255b5594 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s @@ -18,3 +18,303 @@ buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU // GFX12-ERR-NEXT:{{^}}buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv // GFX12-ERR-NEXT:{{^}} ^ + +buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:4095 +// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT:{{^}}buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_add_f64 v[4:5], off, s[12:15], s3 offset:4095 +// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], off, s[12:15], s3 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_add_f64 v[4:5], off, s[96:99], s3 offset:4095 +// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], off, s[96:99], s3 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_add_f64 v[4:5], off, s[8:11], s101 offset:4095 +// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], off, s[8:11], s101 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_add_f64 v[4:5], off, s[8:11], m0 offset:4095 +// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], off, s[8:11], m0 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 +// GFX1250: buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 +// GFX1250: buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 +// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 +// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:7 +// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:7 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:4095 +// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_min_num_f64 v[4:5], off, s[12:15], s3 offset:4095 +// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[12:15], s3 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_min_num_f64 v[4:5], off, s[96:99], s3 offset:4095 +// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[96:99], s3 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s101 offset:4095 +// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s101 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_min_num_f64 v[4:5], off, s[8:11], m0 offset:4095 +// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], m0 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 +// GFX1250: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 +// GFX1250: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 +// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 +// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:7 +// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:7 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:4095 +// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_max_num_f64 v[4:5], off, s[12:15], s3 offset:4095 +// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[12:15], s3 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_max_num_f64 v[4:5], off, s[96:99], s3 offset:4095 +// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[96:99], s3 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s101 offset:4095 +// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s101 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_max_num_f64 v[4:5], off, s[8:11], m0 offset:4095 +// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], m0 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 +// GFX1250: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 +// GFX1250: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 +// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 +// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:7 +// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:7 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 offset:4095 +// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_min_f64 v[4:5], off, s[12:15], s3 offset:4095 +// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[12:15], s3 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_min_f64 v[4:5], off, s[96:99], s3 offset:4095 +// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[96:99], s3 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_min_f64 v[4:5], off, s[8:11], s101 offset:4095 +// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[8:11], s101 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_min_f64 v[4:5], off, s[8:11], m0 offset:4095 +// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[8:11], m0 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_min_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 +// GFX1250: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_min_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 +// GFX1250: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 +// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 +// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 offset:7 +// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 offset:7 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 offset:4095 +// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_max_f64 v[4:5], off, s[12:15], s3 offset:4095 +// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[12:15], s3 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_max_f64 v[4:5], off, s[96:99], s3 offset:4095 +// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[96:99], s3 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_max_f64 v[4:5], off, s[8:11], s101 offset:4095 +// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[8:11], s101 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_max_f64 v[4:5], off, s[8:11], m0 offset:4095 +// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[8:11], m0 offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_max_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 +// GFX1250: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_max_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 +// GFX1250: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 +// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 +// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 +// GFX12-ERR-NEXT:{{^}}^ + +buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 offset:7 +// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 offset:7 +// GFX12-ERR-NEXT:{{^}}^ diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s index c5288a76e5721..1ef40832e0460 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s @@ -285,6 +285,217 @@ flat_load_monitor_b64 v[2:3], v2, s[4:5] offset:64 scale_offset // GFX1250: flat_load_monitor_b64 v[2:3], v2, s[4:5] offset:64 scale_offset ; encoding: [0x04,0x40,0x1c,0xec,0x02,0x00,0x01,0x00,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 +// GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_add_f64 v[254:255], v[2:3] offset:4095 +// GFX1250: flat_atomic_add_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_add_f64 v[254:255], v[2:3] offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_add_f64 v[0:1], v[254:255] offset:4095 +// GFX1250: flat_atomic_add_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_add_f64 v[0:1], v[254:255] offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_add_f64 v[0:1], v[2:3] +// GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_add_f64 v[0:1], v[2:3] +// GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_add_f64 v[0:1], v[2:3] offset:7 +// GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] offset:7 +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_min_num_f64 v[0:1], v[2:3] offset:4095 +// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_min_num_f64 v[254:255], v[2:3] offset:4095 +// GFX1250: flat_atomic_min_num_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_min_num_f64 v[254:255], v[2:3] offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_min_num_f64 v[0:1], v[254:255] offset:4095 +// GFX1250: flat_atomic_min_num_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_min_num_f64 v[0:1], v[254:255] offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_min_num_f64 v[0:1], v[2:3] +// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_min_num_f64 v[0:1], v[2:3] +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_min_num_f64 v[0:1], v[2:3] +// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_min_num_f64 v[0:1], v[2:3] +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_min_num_f64 v[0:1], v[2:3] offset:7 +// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:7 +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_max_num_f64 v[0:1], v[2:3] offset:4095 +// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_max_num_f64 v[254:255], v[2:3] offset:4095 +// GFX1250: flat_atomic_max_num_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_max_num_f64 v[254:255], v[2:3] offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_max_num_f64 v[0:1], v[254:255] offset:4095 +// GFX1250: flat_atomic_max_num_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_max_num_f64 v[0:1], v[254:255] offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_max_num_f64 v[0:1], v[2:3] +// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_max_num_f64 v[0:1], v[2:3] +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_max_num_f64 v[0:1], v[2:3] +// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_max_num_f64 v[0:1], v[2:3] +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_max_num_f64 v[0:1], v[2:3] offset:7 +// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:7 +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_min_f64 v[0:1], v[2:3] offset:4095 +// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_min_f64 v[254:255], v[2:3] offset:4095 +// GFX1250: flat_atomic_min_num_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_min_f64 v[254:255], v[2:3] offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_min_f64 v[0:1], v[254:255] offset:4095 +// GFX1250: flat_atomic_min_num_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_min_f64 v[0:1], v[254:255] offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_min_f64 v[0:1], v[2:3] +// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_min_f64 v[0:1], v[2:3] +// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_min_f64 v[0:1], v[2:3] offset:7 +// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] offset:7 +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_max_f64 v[0:1], v[2:3] offset:4095 +// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_max_f64 v[254:255], v[2:3] offset:4095 +// GFX1250: flat_atomic_max_num_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_max_f64 v[254:255], v[2:3] offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_max_f64 v[0:1], v[254:255] offset:4095 +// GFX1250: flat_atomic_max_num_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_max_f64 v[0:1], v[254:255] offset:4095 +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_max_f64 v[0:1], v[2:3] +// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_max_f64 v[0:1], v[2:3] +// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] +// GFX12-ERR-NEXT:{{^}}^ + +flat_atomic_max_f64 v[0:1], v[2:3] offset:7 +// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] offset:7 +// GFX12-ERR-NEXT:{{^}}^ + +global_atomic_add_f64 v[0:1], v[2:3], off +// GFX1250: global_atomic_add_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0x40,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: global_atomic_add_f64 v[0:1], v[2:3], off +// GFX12-ERR-NEXT:{{^}}^ + +global_atomic_min_num_f64 v[0:1], v[2:3], off +// GFX1250: global_atomic_min_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0xc0,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: global_atomic_min_num_f64 v[0:1], v[2:3], off +// GFX12-ERR-NEXT:{{^}}^ + +global_atomic_max_num_f64 v[0:1], v[2:3], off +// GFX1250: global_atomic_max_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0x00,0x17,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: global_atomic_max_num_f64 v[0:1], v[2:3], off +// GFX12-ERR-NEXT:{{^}}^ + +global_atomic_min_f64 v[0:1], v[2:3], off +// GFX1250: global_atomic_min_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0xc0,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: global_atomic_min_f64 v[0:1], v[2:3], off +// GFX12-ERR-NEXT:{{^}}^ + +global_atomic_max_f64 v[0:1], v[2:3], off +// GFX1250: global_atomic_max_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0x00,0x17,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR-NEXT: global_atomic_max_f64 v[0:1], v[2:3], off +// GFX12-ERR-NEXT:{{^}}^ + + global_store_async_from_lds_b8 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS // GFX1250: global_store_async_from_lds_b8 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0xc0,0x18,0xee,0x00,0x00,0xbc,0x00,0x02,0x00,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_ds.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_ds.txt index e03c4327d9814..0870aa7ba3dc2 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_ds.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_ds.txt @@ -17,3 +17,36 @@ # GFX1250: ds_atomic_barrier_arrive_rtn_b64 v[2:3], v2, v[4:5] offset:513 ; encoding: [0x01,0x02,0xd4,0xd9,0x02,0x04,0x00,0x02] 0x01,0x02,0xd4,0xd9,0x02,0x04,0x00,0x02 + +# GFX1250: ds_add_f64 v1, v[254:255] offset:65535 ; encoding: [0xff,0xff,0x50,0xd9,0x01,0xfe,0x00,0x00] +0xff,0xff,0x50,0xd9,0x01,0xfe,0x00,0x00 + +# GFX1250: ds_add_f64 v1, v[2:3] ; encoding: [0x00,0x00,0x50,0xd9,0x01,0x02,0x00,0x00] +0x00,0x00,0x50,0xd9,0x01,0x02,0x00,0x00 + +# GFX1250: ds_add_f64 v1, v[2:3] offset:4 ; encoding: [0x04,0x00,0x50,0xd9,0x01,0x02,0x00,0x00] +0x04,0x00,0x50,0xd9,0x01,0x02,0x00,0x00 + +# GFX1250: ds_add_f64 v1, v[2:3] offset:65535 ; encoding: [0xff,0xff,0x50,0xd9,0x01,0x02,0x00,0x00] +0xff,0xff,0x50,0xd9,0x01,0x02,0x00,0x00 + +# GFX1250: ds_add_f64 v255, v[2:3] offset:65535 ; encoding: [0xff,0xff,0x50,0xd9,0xff,0x02,0x00,0x00] +0xff,0xff,0x50,0xd9,0xff,0x02,0x00,0x00 + +# GFX1250: ds_add_rtn_f64 v[254:255], v1, v[2:3] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0x01,0x02,0x00,0xfe] +0xff,0xff,0xd0,0xd9,0x01,0x02,0x00,0xfe + +# GFX1250: ds_add_rtn_f64 v[4:5], v1, v[254:255] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0x01,0xfe,0x00,0x04] +0xff,0xff,0xd0,0xd9,0x01,0xfe,0x00,0x04 + +# GFX1250: ds_add_rtn_f64 v[4:5], v1, v[2:3] ; encoding: [0x00,0x00,0xd0,0xd9,0x01,0x02,0x00,0x04] +0x00,0x00,0xd0,0xd9,0x01,0x02,0x00,0x04 + +# GFX1250: ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:4 ; encoding: [0x04,0x00,0xd0,0xd9,0x01,0x02,0x00,0x04] +0x04,0x00,0xd0,0xd9,0x01,0x02,0x00,0x04 + +# GFX1250: ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0x01,0x02,0x00,0x04] +0xff,0xff,0xd0,0xd9,0x01,0x02,0x00,0x04 + +# GFX1250: ds_add_rtn_f64 v[4:5], v255, v[2:3] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0xff,0x02,0x00,0x04] +0xff,0xff,0xd0,0xd9,0xff,0x02,0x00,0x04 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt index a2f12115bb64b..2499225626acc 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt @@ -8,3 +8,93 @@ # GFX1250: buffer_store_b128 v[2:5], v0, s[12:15], s4 idxen offset:4095 nv ; encoding: [0x84,0x40,0x07,0xc4,0x02,0x18,0x80,0x80,0x00,0xff,0x0f,0x00] 0x84,0x40,0x07,0xc4,0x02,0x18,0x80,0x80,0x00,0xff,0x0f,0x00 + +# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00 + +# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00] +0x03,0x40,0x15,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00 + +# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00] +0x03,0x40,0x15,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00 + +# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +0x65,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00 + +# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +0x7d,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00 + +# GFX1250: buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00] +0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00 + +# GFX1250: buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00] +0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00 + +# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00] +0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00 + +# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00] +0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00 + +# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00] +0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00 + +# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00 + +# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00] +0x03,0xc0,0x16,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00 + +# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00] +0x03,0xc0,0x16,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00 + +# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +0x65,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00 + +# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +0x7d,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00 + +# GFX1250: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00] +0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00 + +# GFX1250: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00] +0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00 + +# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00] +0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00 + +# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00] +0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00 + +# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00] +0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00 + +# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00 + +# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00] +0x03,0x00,0x17,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00 + +# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00] +0x03,0x00,0x17,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00 + +# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +0x65,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00 + +# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00] +0x7d,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00 + +# GFX1250: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00] +0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00 + +# GFX1250: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00] +0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00 + +# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00] +0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00 + +# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00] +0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00 + +# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00] +0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt index 291192b53e320..c8eee9b36eb94 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt @@ -3177,6 +3177,75 @@ # GFX1250: global_load_monitor_b64 v[2:3], v2, s[4:5] offset:64 scale_offset ; encoding: [0x04,0x40,0x1c,0xee,0x02,0x00,0x01,0x00,0x02,0x40,0x00,0x00] 0x04,0x40,0x1c,0xee,0x02,0x00,0x01,0x00,0x02,0x40,0x00,0x00 +# GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00] +0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00 + +# GFX1250: flat_atomic_add_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00] +0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00 + +# GFX1250: flat_atomic_add_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00] +0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00 + +# GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00 + +# GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00 + +# GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00] +0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00 + +# GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00] +0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00 + +# GFX1250: flat_atomic_min_num_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00] +0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00 + +# GFX1250: flat_atomic_min_num_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00] +0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00 + +# GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00 + +# GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00 + +# GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00] +0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00 + +# GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00] +0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00 + +# GFX1250: flat_atomic_max_num_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00] +0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00 + +# GFX1250: flat_atomic_max_num_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00] +0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00 + +# GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00 + +# GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00 + +# GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00] +0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00 + +# GFX1250: global_atomic_add_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0x40,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +0x7c,0x40,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00 + +# GFX1250: global_atomic_min_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0xc0,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +0x7c,0xc0,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00 + +# GFX1250: global_atomic_max_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0x00,0x17,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +0x7c,0x00,0x17,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00 + +# GFX1250: global_atomic_min_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0xc0,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +0x7c,0xc0,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00 + +# GFX1250: global_atomic_max_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0x00,0x17,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +0x7c,0x00,0x17,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00 + # GFX1250: global_load_async_to_lds_b128 v1, v[2:3], off offset:64 ; encoding: [0x7c,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00] 0x7c,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00