diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 947ac5c27620f..4290c3a16262b 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -446,6 +446,9 @@ multiclass Ternary_i16_Pats_gfx9; } +let True16Predicate = UseRealTrue16Insts in { + defm: Ternary_i16_Pats_gfx9; +} // End True16Predicates = UseRealTrue16Insts let True16Predicate = UseFakeTrue16Insts in { defm: Ternary_i16_Pats_gfx9; } // End True16Predicates = UseFakeTrue16Insts diff --git a/llvm/test/CodeGen/AMDGPU/mad.u16.ll b/llvm/test/CodeGen/AMDGPU/mad.u16.ll index c500942840053..c4c0dc6998265 100644 --- a/llvm/test/CodeGen/AMDGPU/mad.u16.ll +++ b/llvm/test/CodeGen/AMDGPU/mad.u16.ll @@ -2,7 +2,8 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX11 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefixes=GFX11,GFX11-TRUE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefixes=GFX11,GFX11-FAKE16 %s ; FIXME: GFX9 should be producing v_mad_u16 instead of v_mad_legacy_u16. @@ -65,22 +66,44 @@ define amdgpu_kernel void @mad_u16( ; GFX10-NEXT: global_store_short v0, v1, s[8:9] ; GFX10-NEXT: s_endpgm ; -; GFX11-LABEL: mad_u16: -; GFX11: ; %bb.0: ; %entry -; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 -; GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: global_load_u16 v0, v0, s[6:7] glc dlc -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_mad_u16 v0, v1, v2, v0 -; GFX11-NEXT: global_store_b16 v3, v0, s[0:1] -; GFX11-NEXT: s_endpgm +; GFX11-TRUE16-LABEL: mad_u16: +; GFX11-TRUE16: ; %bb.0: ; %entry +; GFX11-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-TRUE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-TRUE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-TRUE16-NEXT: global_load_u16 v3, v0, s[6:7] glc dlc +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v1.l +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: s_endpgm +; +; GFX11-FAKE16-LABEL: mad_u16: +; GFX11-FAKE16: ; %bb.0: ; %entry +; GFX11-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-FAKE16-NEXT: global_load_u16 v0, v0, s[6:7] glc dlc +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-FAKE16-NEXT: v_mad_u16 v0, v1, v2, v0 +; GFX11-FAKE16-NEXT: global_store_b16 v3, v0, s[0:1] +; GFX11-FAKE16-NEXT: s_endpgm ptr addrspace(1) %r, ptr addrspace(1) %a, ptr addrspace(1) %b, @@ -121,11 +144,20 @@ define i16 @v_mad_u16(i16 %arg0, i16 %arg1, i16 %arg2) { ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mad_u16: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_mad_u16 v0, v0, v1, v2 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: v_mad_u16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v1.l +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: v_mad_u16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v2 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %mul = mul i16 %arg0, %arg1 %add = add i16 %mul, %arg2 ret i16 %add @@ -151,13 +183,23 @@ define i32 @v_mad_u16_zext(i16 %arg0, i16 %arg1, i16 %arg2) { ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mad_u16_zext: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_mad_u16 v0, v0, v1, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: v_mad_u16_zext: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v1.l +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: v_mad_u16_zext: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v2 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %mul = mul i16 %arg0, %arg1 %add = add i16 %mul, %arg2 %zext = zext i16 %add to i32 @@ -187,13 +229,23 @@ define i64 @v_mad_u16_zext64(i16 %arg0, i16 %arg1, i16 %arg2) { ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mad_u16_zext64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_mad_u16 v0, v0, v1, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0xffff, v0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: v_mad_u16_zext64: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v1.l +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0xffff, v0 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: v_mad_u16_zext64: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v2 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0xffff, v0 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %mul = mul i16 %arg0, %arg1 %add = add i16 %mul, %arg2 %zext = zext i16 %add to i64