diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index cc5255c1e6ef4..cf00910210e0b 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -899,7 +899,7 @@ def VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile : VOP3_Profile, +class VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile : VOP3_Profile, VOP3_OPSEL> { let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, FP32InputMods:$src1_modifiers, Src1RC64:$src1, @@ -929,7 +929,7 @@ let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile>; defm V_CVT_SCALEF32_F32_FP8 : VOP3Inst<"v_cvt_scalef32_f32_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile>; defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>; - defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp8", VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile>; + defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile>; defm V_CVT_SCALEF32_PK_FP8_F16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>; defm V_CVT_SCALEF32_PK_FP8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>; } @@ -938,14 +938,16 @@ let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in defm V_CVT_SCALEF32_F16_BF8 : VOP3Inst<"v_cvt_scalef32_f16_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile>; defm V_CVT_SCALEF32_F32_BF8 : VOP3Inst<"v_cvt_scalef32_f32_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile>; defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>; - defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f32_bf8", VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile>; + defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f32_bf8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile>; defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>; defm V_CVT_SCALEF32_PK_BF8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>; } let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in { - defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp4", VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile>; + defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile>; defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp4_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>; + defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile>; + defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_bf16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile>; } let SubtargetPredicate = isGFX10Plus in { @@ -1889,4 +1891,6 @@ defm V_CVT_SCALEF32_PK_BF8_BF16: VOP3OpSel_Real_gfx9 <0x245>; let OtherPredicates = [HasFP4ConversionScaleInsts] in { defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3OpSel_Real_gfx9 <0x23f>; defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3OpSel_Real_gfx9 <0x23d>; +defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3OpSel_Real_gfx9 <0x250>; +defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3OpSel_Real_gfx9 <0x251>; } diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s index 1c825c105eb21..85cd02aa71417 100644 --- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s @@ -788,4 +788,100 @@ v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,1] // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: // GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x3d,0xd2,0x01,0x05,0x0c,0x02] -v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1] \ No newline at end of file +v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0] \ No newline at end of file diff --git a/llvm/test/MC/AMDGPU/gfx950_err.s b/llvm/test/MC/AMDGPU/gfx950_err.s index 56f4eff0a2793..89167ae35e296 100644 --- a/llvm/test/MC/AMDGPU/gfx950_err.s +++ b/llvm/test/MC/AMDGPU/gfx950_err.s @@ -101,3 +101,27 @@ v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 div:2 // GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 clamp div:2 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt index 1eac7628a22e7..80b5835fab108 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt @@ -539,3 +539,75 @@ # GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x3d,0xd2,0x01,0x05,0x0c,0x02] 0x01,0x60,0x3d,0xd2,0x01,0x05,0x0c,0x02 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0x50,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x00,0x00] +0x01,0x00,0x50,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x06,0x01,0x00] +0x01,0x00,0x50,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x07,0x02,0x00] +0x01,0x08,0x50,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x07,0x00,0x00] +0x01,0x08,0x50,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x06,0x01,0x00] +0x01,0x08,0x50,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x07,0x02,0x00] +0x01,0x10,0x50,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x07,0x00,0x00] +0x01,0x10,0x50,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x06,0x01,0x00] +0x01,0x10,0x50,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x07,0x02,0x00] +0x01,0x18,0x50,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x07,0x00,0x00] +0x01,0x18,0x50,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x06,0x01,0x00] +0x01,0x18,0x50,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0x51,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x07,0x00,0x00] +0x01,0x00,0x51,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x06,0x01,0x00] +0x01,0x00,0x51,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x07,0x02,0x00] +0x01,0x08,0x51,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x07,0x00,0x00] +0x01,0x08,0x51,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x06,0x01,0x00] +0x01,0x08,0x51,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x07,0x02,0x00] +0x01,0x10,0x51,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x07,0x00,0x00] +0x01,0x10,0x51,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x06,0x01,0x00] +0x01,0x10,0x51,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x07,0x02,0x00] +0x01,0x18,0x51,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x07,0x00,0x00] +0x01,0x18,0x51,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00] +0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00