@@ -80,7 +80,7 @@ namespace {
80
80
// Define commonly used chipsets versions for convenience.
81
81
constexpr Chipset kGfx908 = Chipset(9 , 0 , 8 );
82
82
constexpr Chipset kGfx90a = Chipset(9 , 0 , 0xa );
83
- constexpr Chipset kGfx940 = Chipset(9 , 4 , 0 );
83
+ constexpr Chipset kGfx942 = Chipset(9 , 4 , 2 );
84
84
85
85
// / Define lowering patterns for raw buffer ops
86
86
template <typename GpuOp, typename Intrinsic>
@@ -483,7 +483,7 @@ static std::optional<StringRef> mfmaOpToIntrinsic(MFMAOp mfma,
483
483
destElem = destType.getElementType ();
484
484
485
485
if (sourceElem.isF32 () && destElem.isF32 ()) {
486
- if (mfma.getReducePrecision () && chipset >= kGfx940 ) {
486
+ if (mfma.getReducePrecision () && chipset >= kGfx942 ) {
487
487
if (m == 32 && n == 32 && k == 4 && b == 1 )
488
488
return ROCDL::mfma_f32_32x32x4_xf32::getOperationName ();
489
489
if (m == 16 && n == 16 && k == 8 && b == 1 )
@@ -551,9 +551,9 @@ static std::optional<StringRef> mfmaOpToIntrinsic(MFMAOp mfma,
551
551
return ROCDL::mfma_i32_32x32x8i8::getOperationName ();
552
552
if (m == 16 && n == 16 && k == 16 && b == 1 )
553
553
return ROCDL::mfma_i32_16x16x16i8::getOperationName ();
554
- if (m == 32 && n == 32 && k == 16 && b == 1 && chipset >= kGfx940 )
554
+ if (m == 32 && n == 32 && k == 16 && b == 1 && chipset >= kGfx942 )
555
555
return ROCDL::mfma_i32_32x32x16_i8::getOperationName ();
556
- if (m == 16 && n == 16 && k == 32 && b == 1 && chipset >= kGfx940 )
556
+ if (m == 16 && n == 16 && k == 32 && b == 1 && chipset >= kGfx942 )
557
557
return ROCDL::mfma_i32_16x16x32_i8::getOperationName ();
558
558
}
559
559
@@ -565,7 +565,7 @@ static std::optional<StringRef> mfmaOpToIntrinsic(MFMAOp mfma,
565
565
}
566
566
567
567
if (isa<Float8E5M2FNUZType>(sourceElem) && destElem.isF32 () &&
568
- chipset >= kGfx940 ) {
568
+ chipset >= kGfx942 ) {
569
569
// Known to be correct because there are no scalar f8 instructions and
570
570
// because a length mismatch will have been caught by the verifier.
571
571
Type sourceBElem =
@@ -585,7 +585,7 @@ static std::optional<StringRef> mfmaOpToIntrinsic(MFMAOp mfma,
585
585
}
586
586
587
587
if (isa<Float8E4M3FNUZType>(sourceElem) && destElem.isF32 () &&
588
- chipset >= kGfx940 ) {
588
+ chipset >= kGfx942 ) {
589
589
Type sourceBElem =
590
590
cast<VectorType>(mfma.getSourceB ().getType ()).getElementType ();
591
591
if (m == 16 && n == 16 && k == 32 && b == 1 ) {
@@ -653,8 +653,8 @@ struct MFMAOpLowering : public ConvertOpToLLVMPattern<MFMAOp> {
653
653
return op->emitOpError (" MFMA only supported on gfx908+" );
654
654
uint32_t getBlgpField = static_cast <uint32_t >(op.getBlgp ());
655
655
if (op.getNegateA () || op.getNegateB () || op.getNegateC ()) {
656
- if (chipset < kGfx940 )
657
- return op.emitOpError (" negation unsupported on older than gfx940 " );
656
+ if (chipset < kGfx942 )
657
+ return op.emitOpError (" negation unsupported on older than gfx942 " );
658
658
getBlgpField |=
659
659
op.getNegateA () | (op.getNegateB () << 1 ) | (op.getNegateC () << 2 );
660
660
}
@@ -775,7 +775,7 @@ LogicalResult ExtPackedFp8OpLowering::matchAndRewrite(
775
775
ExtPackedFp8Op op, ExtPackedFp8OpAdaptor adaptor,
776
776
ConversionPatternRewriter &rewriter) const {
777
777
Location loc = op.getLoc ();
778
- if (chipset.majorVersion != 9 || chipset < kGfx940 )
778
+ if (chipset.majorVersion != 9 || chipset < kGfx942 )
779
779
return rewriter.notifyMatchFailure (
780
780
loc, " Fp8 conversion instructions are not available on target "
781
781
" architecture and their emulation is not implemented" );
@@ -819,7 +819,7 @@ LogicalResult PackedTrunc2xFp8OpLowering::matchAndRewrite(
819
819
PackedTrunc2xFp8Op op, PackedTrunc2xFp8OpAdaptor adaptor,
820
820
ConversionPatternRewriter &rewriter) const {
821
821
Location loc = op.getLoc ();
822
- if (chipset.majorVersion != 9 || chipset < kGfx940 )
822
+ if (chipset.majorVersion != 9 || chipset < kGfx942 )
823
823
return rewriter.notifyMatchFailure (
824
824
loc, " Fp8 conversion instructions are not available on target "
825
825
" architecture and their emulation is not implemented" );
@@ -856,7 +856,7 @@ LogicalResult PackedStochRoundFp8OpLowering::matchAndRewrite(
856
856
PackedStochRoundFp8Op op, PackedStochRoundFp8OpAdaptor adaptor,
857
857
ConversionPatternRewriter &rewriter) const {
858
858
Location loc = op.getLoc ();
859
- if (chipset.majorVersion != 9 || chipset < kGfx940 )
859
+ if (chipset.majorVersion != 9 || chipset < kGfx942 )
860
860
return rewriter.notifyMatchFailure (
861
861
loc, " Fp8 conversion instructions are not available on target "
862
862
" architecture and their emulation is not implemented" );
0 commit comments