From 6b41b18c975ebc67cf6649c6ee9786dc7bd80641 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Wed, 28 May 2025 03:06:10 -0400 Subject: [PATCH] true16 for v_cvt_pk_bf8/fp8_f32 --- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 12 +- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 1 + llvm/lib/Target/AMDGPU/VOP3Instructions.td | 70 ++++++++- .../CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll | 139 ++++++++++++------ llvm/test/MC/AMDGPU/gfx12_asm_vop3.s | 36 +++-- llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s | 78 +++++----- llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s | 38 +++-- .../Disassembler/AMDGPU/gfx12_dasm_vop3.txt | 42 +++++- .../AMDGPU/gfx12_dasm_vop3_dpp16.txt | 102 ++++++++++--- .../AMDGPU/gfx12_dasm_vop3_dpp8.txt | 52 ++++++- 10 files changed, 421 insertions(+), 149 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 95932c4932327..540bf4944660a 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -8961,10 +8961,14 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, // Adding vdst_in operand is already covered for these DPP instructions in // cvtVOP3DPP. if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) && - !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 || - Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 || - Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 || - Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 || + !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 || + Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 || + Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 || + Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 || + Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 || + Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 || + Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 || + Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 || Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 || Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 || Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 || diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 84a6aeacc226a..083345d4d1e12 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2822,6 +2822,7 @@ def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>; def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>; def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>; def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; +def VOP_I16_F32_F32 : VOPProfile <[i16, f32, f32, untyped]>; def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=*/1>; def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>; def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 0252c4f1b0929..8d0deed26b6ff 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -570,6 +570,38 @@ def VOP3_CVT_PK_F8_F32_Profile : VOP3_Profile { let HasExtVOP3DPP = 1; } +def VOP3_CVT_PK_F8_F32_Profile_fake16 : VOP3_Profile_Fake16 { + defvar Tail = (ins VGPR_32:$vdst_in, op_sel0:$op_sel); + let InsVOP3OpSel = !con(getIns64.ret, + Tail); + let InsVOP3Base = !con(getInsVOP3Base.ret, + Tail); + let HasClamp = 0; + let HasExtVOP3DPP = 1; +} + +// This t16 profile with vdst_in operand is for backward compatibility and is used +// for user controlled packing +def VOP3_CVT_PK_F8_F32_Profile_t16 : VOP3_Profile_True16 { + defvar Tail = (ins VGPR_16:$vdst_in, op_sel0:$op_sel); + let InsVOP3OpSel = !con(getIns64.ret, + Tail); + let InsVOP3Base = !con(getInsVOP3Base.ret, + Tail); + let HasClamp = 0; + let HasExtVOP3DPP = 1; +} + def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile, VOP3_OPSEL> { let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, @@ -675,8 +707,12 @@ defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", VOP3_Profile; - defm V_CVT_PK_BF8_F32 : VOP3Inst<"v_cvt_pk_bf8_f32", VOP3_CVT_PK_F8_F32_Profile>; + defm V_CVT_PK_FP8_F32 : VOP3Inst_t16_with_profiles<"v_cvt_pk_fp8_f32", VOP3_CVT_PK_F8_F32_Profile, + VOP3_CVT_PK_F8_F32_Profile_t16, + VOP3_CVT_PK_F8_F32_Profile_fake16>; + defm V_CVT_PK_BF8_F32 : VOP3Inst_t16_with_profiles<"v_cvt_pk_bf8_f32", VOP3_CVT_PK_F8_F32_Profile, + VOP3_CVT_PK_F8_F32_Profile_t16, + VOP3_CVT_PK_F8_F32_Profile_fake16>; let SubtargetPredicate = isGFX12Plus in { defm V_CVT_SR_FP8_F32_gfx12 : VOP3Inst<"v_cvt_sr_fp8_f32_gfx12", VOP3_CVT_SR_F8_ByteSel_Profile>; @@ -698,6 +734,21 @@ class Cvt_PK_F8_F32_Pat : G (inst !if(index, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1, $old, 0) >; +multiclass Cvt_PK_F8_F32_t16_Pat { +def : GCNPat< + (i32 (node f32:$src0, f32:$src1, i32:$old, -1)), + (REG_SEQUENCE VGPR_32, + (i16 (EXTRACT_SUBREG $old, lo16)), lo16, + (i16 (inst SRCMODS.DST_OP_SEL, $src0, 0, $src1, (i16 (EXTRACT_SUBREG $old, hi16)), 0)), hi16) +>; +def : GCNPat< + (i32 (node f32:$src0, f32:$src1, i32:$old, 0)), + (REG_SEQUENCE VGPR_32, + (i16 (inst 0, $src0, 0, $src1, (i16 (EXTRACT_SUBREG $old, lo16)), 0)), lo16, + (i16 (EXTRACT_SUBREG $old, hi16)), hi16) +>; +} + class Cvt_SR_F8_F32_Pat index, VOP3_Pseudo inst> : GCNPat< (i32 (node f32:$src0, i32:$src1, i32:$old, index)), (inst !if(index{1}, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1, @@ -712,9 +763,20 @@ class Cvt_SR_F8_ByteSel_Pat; def : Cvt_PK_F8_F32_Pat; } +let True16Predicate = UseFakeTrue16Insts in { + def : Cvt_PK_F8_F32_Pat; + def : Cvt_PK_F8_F32_Pat; +} +} + +let True16Predicate = UseRealTrue16Insts in { +defm : Cvt_PK_F8_F32_t16_Pat; +defm : Cvt_PK_F8_F32_t16_Pat; +} let SubtargetPredicate = isGFX940Plus in { foreach Index = [0, 1, 2, 3] in { @@ -1642,8 +1704,8 @@ defm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x368, "v_m defm V_PERMLANE16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x30f>; defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>; -defm V_CVT_PK_FP8_F32 : VOP3Only_Realtriple_gfx12<0x369>; -defm V_CVT_PK_BF8_F32 : VOP3Only_Realtriple_gfx12<0x36a>; +defm V_CVT_PK_FP8_F32 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x369, "v_cvt_pk_fp8_f32">; +defm V_CVT_PK_BF8_F32 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x36a, "v_cvt_pk_bf8_f32">; defm V_CVT_SR_FP8_F32_gfx12 : VOP3_Realtriple_with_name_gfx12<0x36b, "V_CVT_SR_FP8_F32_gfx12", "v_cvt_sr_fp8_f32" >; defm V_CVT_SR_BF8_F32_gfx12 : VOP3_Realtriple_with_name_gfx12<0x36c, "V_CVT_SR_BF8_F32_gfx12", "v_cvt_sr_bf8_f32">; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll index 1a46e6f6afcd7..16d32b73b9b0d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll @@ -3,7 +3,8 @@ ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX942 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX950 %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX950 %s -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s declare float @llvm.amdgcn.cvt.f32.bf8(i32, i32) @@ -275,17 +276,29 @@ define i32 @test_cvt_pk_bf8_f32_word0(float %x, float %y, i32 %old) { ; GFX9X-NEXT: v_mov_b32_e32 v0, v2 ; GFX9X-NEXT: s_setpc_b64 s[30:31] ; -; GFX12-LABEL: test_cvt_pk_bf8_f32_word0: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_mov_b32_e32 v0, v2 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX12-TRUE16-LABEL: test_cvt_pk_bf8_f32_word0: +; GFX12-TRUE16: ; %bb.0: +; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-NEXT: v_cvt_pk_bf8_f32 v2.l, v0, v1 +; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v2 +; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-LABEL: test_cvt_pk_bf8_f32_word0: +; GFX12-FAKE16: ; %bb.0: +; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1 +; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v2 +; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] %ret = tail call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %x, float %y, i32 %old, i1 false) ret i32 %ret } @@ -299,17 +312,29 @@ define i32 @test_cvt_pk_bf8_f32_word1(float %x, float %y, i32 %old) { ; GFX9X-NEXT: v_mov_b32_e32 v0, v2 ; GFX9X-NEXT: s_setpc_b64 s[30:31] ; -; GFX12-LABEL: test_cvt_pk_bf8_f32_word1: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1 op_sel:[0,0,1] -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_mov_b32_e32 v0, v2 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX12-TRUE16-LABEL: test_cvt_pk_bf8_f32_word1: +; GFX12-TRUE16: ; %bb.0: +; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-NEXT: v_cvt_pk_bf8_f32 v2.h, v0, v1 op_sel:[0,0,1] +; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v2 +; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-LABEL: test_cvt_pk_bf8_f32_word1: +; GFX12-FAKE16: ; %bb.0: +; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1 op_sel:[0,0,1] +; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v2 +; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] %ret = tail call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %x, float %y, i32 %old, i1 true) ret i32 %ret } @@ -322,17 +347,29 @@ define i32 @test_cvt_pk_fp8_f32_word0(float %x, float %y, i32 %old) { ; GFX9X-NEXT: v_mov_b32_e32 v0, v2 ; GFX9X-NEXT: s_setpc_b64 s[30:31] ; -; GFX12-LABEL: test_cvt_pk_fp8_f32_word0: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_mov_b32_e32 v0, v2 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX12-TRUE16-LABEL: test_cvt_pk_fp8_f32_word0: +; GFX12-TRUE16: ; %bb.0: +; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-NEXT: v_cvt_pk_fp8_f32 v2.l, v0, v1 +; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v2 +; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-LABEL: test_cvt_pk_fp8_f32_word0: +; GFX12-FAKE16: ; %bb.0: +; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 +; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v2 +; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] %ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %x, float %y, i32 %old, i1 false) ret i32 %ret } @@ -346,17 +383,29 @@ define i32 @test_cvt_pk_fp8_f32_word1(float %x, float %y, i32 %old) { ; GFX9X-NEXT: v_mov_b32_e32 v0, v2 ; GFX9X-NEXT: s_setpc_b64 s[30:31] ; -; GFX12-LABEL: test_cvt_pk_fp8_f32_word1: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1] -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_mov_b32_e32 v0, v2 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX12-TRUE16-LABEL: test_cvt_pk_fp8_f32_word1: +; GFX12-TRUE16: ; %bb.0: +; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 +; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 +; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 +; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-NEXT: v_cvt_pk_fp8_f32 v2.h, v0, v1 op_sel:[0,0,1] +; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v2 +; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-FAKE16-LABEL: test_cvt_pk_fp8_f32_word1: +; GFX12-FAKE16: ; %bb.0: +; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 +; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 +; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 +; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1] +; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v2 +; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] %ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %x, float %y, i32 %old, i1 true) ret i32 %ret } diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s index afd4af92b4f27..08b6b26d1f260 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s @@ -1169,23 +1169,35 @@ v_cubetc_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 v_cubetc_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 // GFX12: v_cubetc_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x0e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_cvt_pk_fp8_f32 v1, v2, v3 -// GFX12: v_cvt_pk_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x69,0xd7,0x02,0x07,0x02,0x00] +v_cvt_pk_fp8_f32 v1.l, v2, v3 +// GFX12: v_cvt_pk_fp8_f32 v1.l, v2, v3 ; encoding: [0x01,0x00,0x69,0xd7,0x02,0x07,0x02,0x00] -v_cvt_pk_fp8_f32 v1, -v2, |v3| -// GFX12: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20] +v_cvt_pk_fp8_f32 v1.l, -v2, |v3| +// GFX12: v_cvt_pk_fp8_f32 v1.l, -v2, |v3| ; encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20] -v_cvt_pk_fp8_f32 v1, s2, 3 -// GFX12: v_cvt_pk_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0x69,0xd7,0x02,0x06,0x01,0x00] +v_cvt_pk_fp8_f32 v1.l, s2, 3 +// GFX12: v_cvt_pk_fp8_f32 v1.l, s2, 3 ; encoding: [0x01,0x00,0x69,0xd7,0x02,0x06,0x01,0x00] -v_cvt_pk_bf8_f32 v1, v2, v3 -// GFX12: v_cvt_pk_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x6a,0xd7,0x02,0x07,0x02,0x00] +v_cvt_pk_fp8_f32 v1.h v2, v3 +// GFX12: v_cvt_pk_fp8_f32 v1.h, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x69,0xd7,0x02,0x07,0x02,0x00] -v_cvt_pk_bf8_f32 v1, -v2, |v3| -// GFX12: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20] +v_cvt_pk_fp8_f32 v255.h v2, v3 +// GFX12: v_cvt_pk_fp8_f32 v255.h, v2, v3 op_sel:[0,0,1] ; encoding: [0xff,0x40,0x69,0xd7,0x02,0x07,0x02,0x00] -v_cvt_pk_bf8_f32 v1, s2, 3 -// GFX12: v_cvt_pk_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0x6a,0xd7,0x02,0x06,0x01,0x00] +v_cvt_pk_bf8_f32 v1.l, v2, v3 +// GFX12: v_cvt_pk_bf8_f32 v1.l, v2, v3 ; encoding: [0x01,0x00,0x6a,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_pk_bf8_f32 v1.l, -v2, |v3| +// GFX12: v_cvt_pk_bf8_f32 v1.l, -v2, |v3| ; encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20] + +v_cvt_pk_bf8_f32 v1.l, s2, 3 +// GFX12: v_cvt_pk_bf8_f32 v1.l, s2, 3 ; encoding: [0x01,0x00,0x6a,0xd7,0x02,0x06,0x01,0x00] + +v_cvt_pk_bf8_f32 v1.h, v2, v3 +// GFX12: v_cvt_pk_bf8_f32 v1.h, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x6a,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_pk_bf8_f32 v255.h, -v2, |v3| +// GFX12: v_cvt_pk_bf8_f32 v255.h, -v2, |v3| op_sel:[0,0,1] ; encoding: [0xff,0x42,0x6a,0xd7,0x02,0x07,0x02,0x20] v_cvt_sr_fp8_f32 v1, v2, v3 // GFX12: v_cvt_sr_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x6b,0xd7,0x02,0x07,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s index cfd01ee02aa7e..6284ca4726928 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s @@ -1209,59 +1209,65 @@ v_cubetc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_m v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x0e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] -v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] -v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,1,2,3] -// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] +v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,1,2,3] +// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] -v_cvt_pk_bf8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_pk_bf8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +v_cvt_pk_bf8_f32_e64_dpp v6.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: v_cvt_pk_bf8_f32_e64_dpp v6.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] -v_cvt_pk_bf8_f32_e64_dpp v1, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] +v_cvt_pk_bf8_f32_e64_dpp v1.l, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] -v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] +v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] -v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] +v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd +// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] -v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd -// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] +v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd +// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] -v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 -// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] +v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 +// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] -v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 -// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 +// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] -v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] -v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,1,2,3] -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] +v_cvt_pk_fp8_f32_e64_dpp v255.h, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v255.h, -v2, |v3| op_sel:[0,0,1] quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0xff,0x42,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] -v_cvt_pk_fp8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,1,2,3] +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] -v_cvt_pk_fp8_f32_e64_dpp v1, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] +v_cvt_pk_fp8_f32_e64_dpp v6.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v6.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] -v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] +v_cvt_pk_fp8_f32_e64_dpp v1.l, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] -v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] +v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] -v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] +v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] -v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] +v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] -v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] + +v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] + +v_cvt_pk_fp8_f32_e64_dpp v255.h, -v2, |v3| quad_perm:[0,1,2,3] +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v255.h, -v2, |v3| op_sel:[0,0,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x42,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd // GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s index b6d395c8ede89..c9f64fe6d10c1 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s @@ -760,29 +760,35 @@ v_cubetc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x0e,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] -v_cvt_pk_fp8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,2,3,0,1] -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,2,3,0,1] ; encoding: [0x05,0x00,0x69,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0xa9,0x21] +v_cvt_pk_fp8_f32_e64_dpp v5.l, v1, v2 dpp8:[7,6,5,4,2,3,0,1] +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v5.l, v1, v2 dpp8:[7,6,5,4,2,3,0,1] ; encoding: [0x05,0x00,0x69,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0xa9,0x21] -v_cvt_pk_fp8_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x69,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +v_cvt_pk_fp8_f32_e64_dpp v5.l, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v5.l, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x69,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] -v_cvt_pk_fp8_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x69,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cvt_pk_fp8_f32_e64_dpp v5.l, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v5.l, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x69,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] -v_cvt_pk_fp8_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x69,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +v_cvt_pk_fp8_f32_e64_dpp v255.l, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v255.l, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x69,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_cvt_pk_bf8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cvt_pk_bf8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cvt_pk_fp8_f32_e64_dpp v255.h, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v255.h, -|v255|, -|v255| op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x43,0x69,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_cvt_pk_bf8_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cvt_pk_bf8_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x6a,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +v_cvt_pk_bf8_f32_e64_dpp v5.l, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_pk_bf8_f32_e64_dpp v5.l, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_cvt_pk_bf8_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_cvt_pk_bf8_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x6a,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cvt_pk_bf8_f32_e64_dpp v5.l, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_pk_bf8_f32_e64_dpp v5.l, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x6a,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] -v_cvt_pk_bf8_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] -// GFX12: v_cvt_pk_bf8_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x6a,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +v_cvt_pk_bf8_f32_e64_dpp v5.l, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cvt_pk_bf8_f32_e64_dpp v5.l, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x6a,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_bf8_f32_e64_dpp v255.l, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] +// GFX12: v_cvt_pk_bf8_f32_e64_dpp v255.l, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x6a,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_bf8_f32_e64_dpp v255.h, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] +// GFX12: v_cvt_pk_bf8_f32_e64_dpp v255.h, -|v255|, -|v255| op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x43,0x6a,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6b,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt index e7d92af7511f5..5980a6820a344 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt @@ -1318,22 +1318,52 @@ # GFX12: v_cubetc_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x0e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] 0x01,0x00,0x69,0xd7,0x02,0x07,0x02,0x00 -# GFX12: v_cvt_pk_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x69,0xd7,0x02,0x07,0x02,0x00] +# W32-REAL16: v_cvt_pk_fp8_f32 v1.l, v2, v3 ; encoding: [0x01,0x00,0x69,0xd7,0x02,0x07,0x02,0x00] +# W32-FAKE16: v_cvt_pk_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x69,0xd7,0x02,0x07,0x02,0x00] +# W64-REAL16: v_cvt_pk_fp8_f32 v1.l, v2, v3 ; encoding: [0x01,0x00,0x69,0xd7,0x02,0x07,0x02,0x00] +# W64-FAKE16: v_cvt_pk_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x69,0xd7,0x02,0x07,0x02,0x00] 0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20 -# GFX12: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20] +# W32-REAL16: v_cvt_pk_fp8_f32 v1.l, -v2, |v3| ; encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20] +# W32-FAKE16: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20] +# W64-REAL16: v_cvt_pk_fp8_f32 v1.l, -v2, |v3| ; encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20] +# W64-FAKE16: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20] 0x01,0x00,0x69,0xd7,0x02,0x06,0x01,0x00 -# GFX12: v_cvt_pk_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0x69,0xd7,0x02,0x06,0x01,0x00] +# W32-REAL16: v_cvt_pk_fp8_f32 v1.l, s2, 3 ; encoding: [0x01,0x00,0x69,0xd7,0x02,0x06,0x01,0x00] +# W32-FAKE16: v_cvt_pk_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0x69,0xd7,0x02,0x06,0x01,0x00] +# W64-REAL16: v_cvt_pk_fp8_f32 v1.l, s2, 3 ; encoding: [0x01,0x00,0x69,0xd7,0x02,0x06,0x01,0x00] +# W64-FAKE16: v_cvt_pk_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0x69,0xd7,0x02,0x06,0x01,0x00] + +0x01,0x40,0x69,0xd7,0x02,0x07,0x02,0x00 +# W32-REAL16: v_cvt_pk_fp8_f32 v1.h, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x69,0xd7,0x02,0x07,0x02,0x00] +# W32-FAKE16: v_cvt_pk_fp8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x69,0xd7,0x02,0x07,0x02,0x00] +# W64-REAL16: v_cvt_pk_fp8_f32 v1.h, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x69,0xd7,0x02,0x07,0x02,0x00] +# W64-FAKE16: v_cvt_pk_fp8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x69,0xd7,0x02,0x07,0x02,0x00] 0x01,0x00,0x6a,0xd7,0x02,0x07,0x02,0x00 -# GFX12: v_cvt_pk_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x6a,0xd7,0x02,0x07,0x02,0x00] +# W32-REAL16: v_cvt_pk_bf8_f32 v1.l, v2, v3 ; encoding: [0x01,0x00,0x6a,0xd7,0x02,0x07,0x02,0x00] +# W32-FAKE16: v_cvt_pk_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x6a,0xd7,0x02,0x07,0x02,0x00] +# W64-REAL16: v_cvt_pk_bf8_f32 v1.l, v2, v3 ; encoding: [0x01,0x00,0x6a,0xd7,0x02,0x07,0x02,0x00] +# W64-FAKE16: v_cvt_pk_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x6a,0xd7,0x02,0x07,0x02,0x00] 0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20 -# GFX12: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20] +# W32-REAL16: v_cvt_pk_bf8_f32 v1.l, -v2, |v3| ; encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20] +# W32-FAKE16: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20] +# W64-REAL16: v_cvt_pk_bf8_f32 v1.l, -v2, |v3| ; encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20] +# W64-FAKE16: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20] 0x01,0x00,0x6a,0xd7,0x02,0x06,0x01,0x00 -# GFX12: v_cvt_pk_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0x6a,0xd7,0x02,0x06,0x01,0x00] +# W32-REAL16: v_cvt_pk_bf8_f32 v1.l, s2, 3 ; encoding: [0x01,0x00,0x6a,0xd7,0x02,0x06,0x01,0x00] +# W32-FAKE16: v_cvt_pk_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0x6a,0xd7,0x02,0x06,0x01,0x00] +# W64-REAL16: v_cvt_pk_bf8_f32 v1.l, s2, 3 ; encoding: [0x01,0x00,0x6a,0xd7,0x02,0x06,0x01,0x00] +# W64-FAKE16: v_cvt_pk_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0x6a,0xd7,0x02,0x06,0x01,0x00] + +0x01,0x40,0x6a,0xd7,0x02,0x07,0x02,0x00 +# W32-REAL16: v_cvt_pk_bf8_f32 v1.h, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x6a,0xd7,0x02,0x07,0x02,0x00] +# W32-FAKE16: v_cvt_pk_bf8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x6a,0xd7,0x02,0x07,0x02,0x00] +# W64-REAL16: v_cvt_pk_bf8_f32 v1.h, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x6a,0xd7,0x02,0x07,0x02,0x00] +# W64-FAKE16: v_cvt_pk_bf8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x6a,0xd7,0x02,0x07,0x02,0x00] 0x01,0x00,0x6b,0xd7,0x02,0x07,0x02,0x00 # GFX12: v_cvt_sr_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x6b,0xd7,0x02,0x07,0x02,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt index a84e0e1734ff5..397852da6ef3b 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt @@ -1119,58 +1119,124 @@ # GFX12: v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x0e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] 0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed -# GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] 0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20 -# GFX12: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20] +# W32-REAL16: v_cvt_pk_bf8_f32 v1.l, -v2, |v3| ; encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20] +# W32-FAKE16: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20] +# W64-REAL16: v_cvt_pk_bf8_f32 v1.l, -v2, |v3| ; encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20] +# W64-FAKE16: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20] 0x06,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed -# GFX12: v_cvt_pk_bf8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v6.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v6.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] 0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed -# GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] +# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] +# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] +# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] +# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] 0x01,0x02,0x6a,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed -# GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] +# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] +# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] +# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] +# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] 0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed -# GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] +# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] +# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] +# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] +# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] 0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d -# GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] +# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] +# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] +# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] +# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] 0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5 -# GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] +# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] +# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] +# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] +# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] 0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed -# GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] + +0x01,0x42,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed +# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.h, -v2, |v3| op_sel:[0,0,1] quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x42,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| op_sel:[0,0,1] quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x42,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.h, -v2, |v3| op_sel:[0,0,1] quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x42,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| op_sel:[0,0,1] quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x42,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] 0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed -# GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] 0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20 -# GFX12: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20] +# W32-REAL16: v_cvt_pk_fp8_f32 v1.l, -v2, |v3| ; encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20] +# W32-FAKE16: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20] +# W64-REAL16: v_cvt_pk_fp8_f32 v1.l, -v2, |v3| ; encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20] +# W64-FAKE16: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20] 0x06,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed -# GFX12: v_cvt_pk_fp8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v6.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v6.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] 0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed -# GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] +# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] +# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] +# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] +# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] 0x01,0x02,0x69,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed -# GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] +# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] +# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] +# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] +# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] 0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed -# GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] +# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] +# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] +# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] +# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] 0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d -# GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] +# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] +# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] +# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] +# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] 0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5 -# GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] +# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] +# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] +# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] +# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] 0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed -# GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] + +0x01,0x42,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed +# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.h, -v2, |v3| op_sel:[0,0,1] quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x42,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| op_sel:[0,0,1] quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x42,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.h, -v2, |v3| op_sel:[0,0,1] quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x42,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| op_sel:[0,0,1] quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x42,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] 0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed # GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt index e4bef0a17385c..8688410a8c9b2 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt @@ -681,28 +681,64 @@ # GFX12: v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x0e,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] 0x05,0x00,0x69,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0xa9,0x21 -# GFX12: v_cvt_pk_fp8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,2,3,0,1] ; encoding: [0x05,0x00,0x69,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0xa9,0x21] +# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v5.l, v1, v2 dpp8:[7,6,5,4,2,3,0,1] ; encoding: [0x05,0x00,0x69,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0xa9,0x21] +# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,2,3,0,1] ; encoding: [0x05,0x00,0x69,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0xa9,0x21] +# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v5.l, v1, v2 dpp8:[7,6,5,4,2,3,0,1] ; encoding: [0x05,0x00,0x69,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0xa9,0x21] +# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,2,3,0,1] ; encoding: [0x05,0x00,0x69,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0xa9,0x21] 0x05,0x01,0x69,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 -# GFX12: v_cvt_pk_fp8_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x69,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v5.l, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x69,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x69,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v5.l, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x69,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x69,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x05,0x02,0x69,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05 -# GFX12: v_cvt_pk_fp8_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x69,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v5.l, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x69,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x69,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v5.l, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x69,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x69,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] 0xff,0x03,0x69,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 -# GFX12: v_cvt_pk_fp8_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x69,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v255.l, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x69,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x69,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v255.l, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x69,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x69,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +0xff,0x43,0x69,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 +# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v255.h, -|v255|, -|v255| op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x43,0x69,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v255, -|v255|, -|v255| op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x43,0x69,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v255.h, -|v255|, -|v255| op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x43,0x69,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v255, -|v255|, -|v255| op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x43,0x69,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] 0x05,0x00,0x6a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_cvt_pk_bf8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v5.l, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v5.l, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x01,0x6a,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 -# GFX12: v_cvt_pk_bf8_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x6a,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v5.l, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x6a,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x6a,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v5.l, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x6a,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x6a,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x05,0x02,0x6a,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05 -# GFX12: v_cvt_pk_bf8_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x6a,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v5.l, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x6a,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x6a,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v5.l, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x6a,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x6a,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] 0xff,0x03,0x6a,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 -# GFX12: v_cvt_pk_bf8_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x6a,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v255.l, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x6a,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x6a,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v255.l, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x6a,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x6a,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +0xff,0x43,0x6a,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 +# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v255.h, -|v255|, -|v255| op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x43,0x6a,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v255, -|v255|, -|v255| op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x43,0x6a,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v255.h, -|v255|, -|v255| op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x43,0x6a,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v255, -|v255|, -|v255| op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x43,0x6a,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] 0x05,0x00,0x6b,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6b,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]