Skip to content

Conversation

@arsenm
Copy link
Contributor

@arsenm arsenm commented Nov 26, 2024

Co-authored-by: Shilei Tian [email protected]

This was referenced Nov 26, 2024
@arsenm arsenm marked this pull request as ready for review November 26, 2024 21:34
@llvmbot llvmbot added the llvm:mc Machine (object) code label Nov 26, 2024
@llvmbot
Copy link
Member

llvmbot commented Nov 26, 2024

@llvm/pr-subscribers-mc

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Co-authored-by: Shilei Tian <[email protected]>


Full diff: https://github.com/llvm/llvm-project/pull/117795.diff

5 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+4)
  • (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+2)
  • (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+31-1)
  • (modified) llvm/test/MC/AMDGPU/gfx950_asm_features.s (+96)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt (+72)
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index f63bc0ea4e7b0c..3ff588c4d0a452 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -336,6 +336,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
   }
 
+  bool isPackedFP32InputMods() const {
+    return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::v2f32);
+  }
+
   bool isVReg() const {
     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
            isRegClass(AMDGPU::VReg_64RegClassID) ||
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 32ebcd06c5e53c..d08bda37292105 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1535,6 +1535,7 @@ class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass {
 
 def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>;
 def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>;
+def PackedF32InputModsMatchClass : PackedFPInputModsMatchClass<32>;
 
 class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> {
   let PrintMethod = "printOperandAndFPInputMods";
@@ -1546,6 +1547,7 @@ class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <
 
 def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>;
 def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>;
+def PackedF32InputMods : PackedFPInputMods<PackedF32InputModsMatchClass>;
 
 def MFMALdScaleModifierOp : TImmLeaf<i32, [{
   return isUInt<2>(Imm);
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 42475b18afecdc..237bb0faffff8b 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -983,6 +983,29 @@ class VOP3_CVT_SCALE_FP4_F16BF16_TiedInput_Profile<VOPProfile P> : VOP3_Profile<
   let HasExtVOP3DPP = 0;
 }
 
+class VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<ValueType Src0Ty> :
+    VOP3_Profile<VOPProfile<[i32, Src0Ty, i32, f32]>, VOP3_OPSEL> {
+  let InsVOP3OpSel = (ins PackedF16InputMods: $src0_modifiers, Src0RC64:$src0,
+                          Int32InputMods:     $src1_modifiers, Src1RC64:$src1,
+                          FP32InputMods:      $src2_modifiers, Src2RC64:$src2,
+                          VGPR_32:$vdst_in,   op_sel0:$op_sel);
+  let HasClamp = 0;
+  let HasExtVOP3DPP = 0;
+  let HasOpSel = 1;
+  let HasOMod = 0;
+}
+
+def VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile : VOP3_Profile<VOPProfile<[i32, v2f32, i32, f32]>, VOP3_OPSEL> {
+  let InsVOP3OpSel = (ins PackedF32InputMods: $src0_modifiers, Src0RC64:$src0,
+                          Int32InputMods:     $src1_modifiers, Src1RC64:$src1,
+                          FP32InputMods:      $src2_modifiers, Src2RC64:$src2,
+                          VGPR_32:$vdst_in,   op_sel0:$op_sel);
+  let HasClamp = 0;
+  let HasExtVOP3DPP = 0;
+  let HasOpSel = 1;
+  let HasOMod = 0;
+}
+
 class VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<ValueType DstTy> : VOP3_Profile<VOPProfile<[DstTy, i32, f32, untyped]>,
                                               VOP3_OPSEL> {
   let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
@@ -1049,8 +1072,12 @@ let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in
 
 let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in {
   defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f32>>;
-  let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in
+  let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
     defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp4_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<VOP_I32_F32_F32_F32>>;
+    defm V_CVT_SCALEF32_SR_PK_FP4_F16:  VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_f16", VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<v2f16>>;
+    defm V_CVT_SCALEF32_SR_PK_FP4_BF16: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_bf16", VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<v2bf16>>;
+    defm V_CVT_SCALEF32_SR_PK_FP4_F32:  VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_f32", VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile>;
+  }
   defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f16>>;
   defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_bf16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2bf16>>;
 
@@ -2117,6 +2144,9 @@ defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3OpSel_Real_gfx9 <0x250>;
 defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3OpSel_Real_gfx9 <0x251>;
 defm V_CVT_SCALEF32_PK_FP4_F16 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x24c>;
 defm V_CVT_SCALEF32_PK_FP4_BF16: VOP3OpSel_Real_gfx9_forced_opsel2 <0x24d>;
+defm V_CVT_SCALEF32_SR_PK_FP4_F16:  VOP3OpSel_Real_gfx9 <0x24e>;
+defm V_CVT_SCALEF32_SR_PK_FP4_BF16: VOP3OpSel_Real_gfx9 <0x24f>;
+defm V_CVT_SCALEF32_SR_PK_FP4_F32:  VOP3OpSel_Real_gfx9 <0x23e>;
 }
 let OtherPredicates = [HasFP6BF6ConversionScaleInsts] in {
 defm V_CVT_SCALEF32_PK32_F32_FP6 : VOP3_Real_gfx9<0x256, "v_cvt_scalef32_pk32_f32_fp6">;
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
index 75022d8cf0cdd0..1a776c1050578c 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -1279,3 +1279,99 @@ v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[0,0,0]
 // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
 // GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9c,0xd3,0x00,0x01,0x04,0x04]
 v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1]
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 ; encoding: [0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 ; encoding: [0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 ; encoding: [0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x4f,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,1,1]
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x4e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,1,1]
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x3e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,1,1]
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,0,1] ; encoding: [0x00,0x40,0x4f,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,0,1]
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,0,1] ; encoding: [0x00,0x40,0x4e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,0,1]
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,0,1] ; encoding: [0x00,0x40,0x3e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,0,1]
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,1,0] ; encoding: [0x00,0x20,0x4f,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,1,0]
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,1,0] ; encoding: [0x00,0x20,0x4e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,1,0]
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,1,0] ; encoding: [0x00,0x20,0x3e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,1,0]
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, -v2, v4, v5 ; encoding: [0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x24]
+v_cvt_scalef32_sr_pk_fp4_bf16 v0, -v2, v4, v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, -v5 ; encoding: [0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x84]
+v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, -v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, |v5| ; encoding: [0x00,0x04,0x4f,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, |v5|
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, |v2|, v4, v5 ; encoding: [0x00,0x01,0x4f,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_bf16 v0, |v2|, v4, v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, -v2, v4, v5 ; encoding: [0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x24]
+v_cvt_scalef32_sr_pk_fp4_f16 v0, -v2, v4, v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, -v5 ; encoding: [0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x84]
+v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, -v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, |v2|, v4, v5 ; encoding: [0x00,0x01,0x4e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f16 v0, |v2|, v4, v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, |v5| ; encoding: [0x00,0x04,0x4e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, |v5|
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, -v[2:3], v4, v5 ; encoding: [0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x24]
+v_cvt_scalef32_sr_pk_fp4_f32 v0, -v[2:3], v4, v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, -v5 ; encoding: [0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x84]
+v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, -v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, |v[2:3]|, v4, v5 ; encoding: [0x00,0x01,0x3e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f32 v0, |v[2:3]|, v4, v5
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, |v5| ; encoding: [0x00,0x04,0x3e,0xd2,0x02,0x09,0x16,0x04]
+v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, |v5|
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
index adb4f78942503e..2b5b4c7770924d 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
@@ -942,3 +942,75 @@
 
 # GFX950: v_pk_minimum3_f16 v8, v0, v1, s8        ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18]
 0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 ; encoding: [0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 ; encoding: [0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 ; encoding: [0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x4f,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x60,0x4f,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x4e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x60,0x4e,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x3e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x60,0x3e,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,0,1] ; encoding: [0x00,0x40,0x4f,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x40,0x4f,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,0,1] ; encoding: [0x00,0x40,0x4e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x40,0x4e,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,0,1] ; encoding: [0x00,0x40,0x3e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x40,0x3e,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,1,0] ; encoding: [0x00,0x20,0x4f,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x20,0x4f,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,1,0] ; encoding: [0x00,0x20,0x4e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x20,0x4e,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,1,0] ; encoding: [0x00,0x20,0x3e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x20,0x3e,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, -v2, v4, v5 ; encoding: [0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x24]
+0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x24
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, -v5 ; encoding: [0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x84]
+0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x84
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, |v5| ; encoding: [0x00,0x04,0x4f,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x04,0x4f,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, |v2|, v4, v5 ; encoding: [0x00,0x01,0x4f,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x01,0x4f,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, -v2, v4, v5 ; encoding: [0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x24]
+0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x24
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, -v5 ; encoding: [0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x84]
+0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x84
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, |v2|, v4, v5 ; encoding: [0x00,0x01,0x4e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x01,0x4e,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, |v5| ; encoding: [0x00,0x04,0x4e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x04,0x4e,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, -v[2:3], v4, v5 ; encoding: [0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x24]
+0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x24
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, -v5 ; encoding: [0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x84]
+0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x84
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, |v[2:3]|, v4, v5 ; encoding: [0x00,0x01,0x3e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x01,0x3e,0xd2,0x02,0x09,0x16,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, |v5| ; encoding: [0x00,0x04,0x3e,0xd2,0x02,0x09,0x16,0x04]
+0x00,0x04,0x3e,0xd2,0x02,0x09,0x16,0x04

Copy link
Contributor Author

arsenm commented Nov 27, 2024

Merge activity

  • Nov 26, 7:08 PM EST: A user started a stack merge that includes this pull request via Graphite.
  • Nov 26, 7:39 PM EST: Graphite rebased this pull request as part of a merge.
  • Nov 26, 7:41 PM EST: A user merged this pull request with Graphite.

@arsenm arsenm force-pushed the users/arsenm/gfx950/codegen-v_cvt_scalef32_pk_fp4_f16_bf16 branch from 9ff806f to efa35c5 Compare November 27, 2024 00:35
Base automatically changed from users/arsenm/gfx950/codegen-v_cvt_scalef32_pk_fp4_f16_bf16 to main November 27, 2024 00:38
@arsenm arsenm force-pushed the users/arsenm/gfx950/mc-v_cvt_scale_sr_fp4 branch from 50432fb to 244a560 Compare November 27, 2024 00:38
@arsenm arsenm merged commit d3c103b into main Nov 27, 2024
5 of 7 checks passed
@arsenm arsenm deleted the users/arsenm/gfx950/mc-v_cvt_scale_sr_fp4 branch November 27, 2024 00:41
searlmc1 pushed a commit to ROCm/llvm-project that referenced this pull request Feb 3, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:AMDGPU llvm:mc Machine (object) code

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants