diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index dc94edf85586f..25117544d6a84 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -685,6 +685,13 @@ def FeatureFP8ConversionInsts : SubtargetFeature<"fp8-conversion-insts", "Has fp8 and bf8 conversion instructions" >; +def FeatureCvtFP8VOP1Bug : SubtargetFeature<"cvt-fp8-vop1-bug", + "HasCvtFP8Vop1Bug", + "true", + "FP8/BF8 VOP1 form of conversion to F32 is unreliable", + [FeatureFP8ConversionInsts] +>; + def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst", "HasPkFmacF16Inst", "true", @@ -1439,6 +1446,7 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureMAIInsts, FeatureFP8Insts, FeatureFP8ConversionInsts, + FeatureCvtFP8VOP1Bug, FeaturePkFmacF16Inst, FeatureAtomicFaddRtnInsts, FeatureAtomicFaddNoRtnInsts, diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index e6b7342d5fffc..1945812609316 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -158,6 +158,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasMAIInsts = false; bool HasFP8Insts = false; bool HasFP8ConversionInsts = false; + bool HasCvtFP8Vop1Bug = false; bool HasPkFmacF16Inst = false; bool HasAtomicFMinFMaxF32GlobalInsts = false; bool HasAtomicFMinFMaxF64GlobalInsts = false; @@ -1352,7 +1353,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasSplitBarriers() const { return getGeneration() >= GFX12; } // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable. - bool hasCvtFP8VOP1Bug() const { return true; } + bool hasCvtFP8VOP1Bug() const { return HasCvtFP8Vop1Bug; } // \returns true if CSUB (a.k.a. SUB_CLAMP on GFX12) atomics support a // no-return form. diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 871a7c3c2579e..be98d201a64a7 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -582,7 +582,7 @@ class Cvt_F32_F8_Pat; -let SubtargetPredicate = isGFX9Only in { +let SubtargetPredicate = HasFP8ConversionInsts in { let OtherPredicates = [HasCvtFP8VOP1Bug] in { def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)), (V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>; @@ -590,18 +590,21 @@ let OtherPredicates = [HasCvtFP8VOP1Bug] in { (V_CVT_F32_BF8_sdwa 0, $src, 0, 0, 0)>; } -let OtherPredicates = [HasNoCvtFP8VOP1Bug] in { +let OtherPredicates = [HasNoCvtFP8VOP1Bug, HasSDWA] in { // FIXME: HasSDWA is a substitute for !gfx12 def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)), (V_CVT_F32_FP8_e32 $src)>; def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)), (V_CVT_F32_BF8_e32 $src)>; } +let OtherPredicates = [HasSDWA] in { foreach Index = [1, 2, 3] in { def : Cvt_F32_F8_Pat; def : Cvt_F32_F8_Pat; } -} // End SubtargetPredicate = isGFX9Only +} // End OtherPredicates = [HasSDWA] + +} // End SubtargetPredicate = HasFP8ConversionInsts class Cvt_PK_F32_F8_Pat : GCNPat< @@ -611,7 +614,7 @@ class Cvt_PK_F32_F8_Pat; -let SubtargetPredicate = isGFX9Only in { +let SubtargetPredicate = HasFP8ConversionInsts, OtherPredicates = [HasSDWA] in { foreach Index = [0, -1] in { def : Cvt_PK_F32_F8_Pat; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f32.fp8.err.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f32.fp8.err.ll new file mode 100644 index 0000000000000..29812993d541e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f32.fp8.err.ll @@ -0,0 +1,100 @@ +; RUN: split-file %s %t + +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/fp8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE0-ERR %s +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/fp8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE1-ERR %s +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/bf8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE0-ERR %s +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/bf8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE1-ERR %s + +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/fp8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE0-ERR %s +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/fp8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE1-ERR %s +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/bf8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE0-ERR %s +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/bf8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE1-ERR %s + + +; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/fp8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE0-ERR-GISEL %s +; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/fp8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE1-ERR-GISEL %s +; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/bf8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE0-ERR-GISEL %s +; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/bf8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE1-ERR-GISEL %s + +; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/fp8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE0-ERR-GISEL %s +; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/fp8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-FP8-BYTE1-ERR-GISEL %s +; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/bf8-byte0-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE0-ERR-GISEL %s +; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/bf8-byte1-err.ll 2>&1 | FileCheck -check-prefix=ERR-BF8-BYTE1-ERR-GISEL %s + + + +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/pk-fp8-word0-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-FP8-WORD0-ERR %s +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/pk-fp8-word1-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-FP8-WORD1-ERR %s +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/pk-bf8-word0-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-BF8-WORD0-ERR %s +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 -filetype=null %t/pk-bf8-word1-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-BF8-WORD1-ERR %s + +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/pk-fp8-word0-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-FP8-WORD0-ERR %s +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/pk-fp8-word1-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-FP8-WORD1-ERR %s +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/pk-bf8-word0-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-BF8-WORD0-ERR %s +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %t/pk-bf8-word1-err.ll 2>&1 | FileCheck -check-prefix=ERR-PK-BF8-WORD1-ERR %s + + +;--- fp8-byte0-err.ll +; ERR-FP8-BYTE0-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.f32.fp8 +; ERR-FP8-BYTE0-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.f32.fp8), %{{[0-9]+}}:vgpr(s32), 0 + +define float @test_cvt_f32_fp8_byte0(i32 %a) { + %ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 0) + ret float %ret +} + +;--- fp8-byte1-err.ll +; ERR-FP8-BYTE1-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.f32.fp8 +; ERR-FP8-BYTE1-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.f32.fp8), %{{[0-9]+}}:vgpr(s32), 1 +define float @test_cvt_f32_fp8_byte1(i32 %a) { + %ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 1) + ret float %ret +} + +;--- bf8-byte0-err.ll +; ERR-BF8-BYTE0-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.f32.bf8 +; ERR-BF8-BYTE0-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.f32.bf8), %{{[0-9]+}}:vgpr(s32), 0 +define float @test_cvt_f32_bf8_byte0(i32 %a) { + %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 0) + ret float %ret +} + +;--- bf8-byte1-err.ll +; ERR-BF8-BYTE1-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.f32.bf8 +; ERR-BF8-BYTE1-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.f32.bf8), %{{[0-9]+}}:vgpr(s32), 1 +define float @test_cvt_f32_bf8_byte1(i32 %a) { + %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 1) + ret float %ret +} + +;--- pk-fp8-word0-err.ll +; ERR-PK-FP8-WORD0-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.pk.f32.fp8 +; ERR-PK-FP8-WORD0-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.f32.fp8), %{{[0-9]+}}:vgpr(s32), 0 +define <2 x float> @test_cvt_pk_f32_fp8_word0(i32 %a) { + %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 false) + ret <2 x float> %ret +} + +;--- pk-fp8-word1-err.ll +; ERR-PK-FP8-WORD1-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.pk.f32.fp8 +; ERR-PK-FP8-WORD1-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.f32.fp8), %{{[0-9]+}}:vgpr(s32), 1 +define <2 x float> @test_cvt_pk_f32_fp8_word1(i32 %a) { + %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 true) + ret <2 x float> %ret +} + +;--- pk-bf8-word0-err.ll +; ERR-PK-BF8-WORD0-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.pk.f32.bf8 +; ERR-PK-BF8-WORD0-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.f32.bf8), %{{[0-9]+}}:vgpr(s32), 0 +define <2 x float> @test_cvt_pk_f32_bf8_word0(i32 %a) { + %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 false) + ret <2 x float> %ret +} + +;--- pk-bf8-word1-err.ll +; ERR-PK-BF8-WORD1-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.cvt.pk.f32.bf8 +; ERR-PK-BF8-WORD1-ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.f32.bf8), %{{[0-9]+}}:vgpr(s32), 1 +define <2 x float> @test_cvt_pk_f32_bf8_word1(i32 %a) { + %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 true) + ret <2 x float> %ret +}