@@ -1926,6 +1926,11 @@ static const fltSemantics *getFltSemantics(MVT VT) {
1926
1926
1927
1927
static const fltSemantics *getOpFltSemantics (uint8_t OperandType) {
1928
1928
switch (OperandType) {
1929
+ // When floating-point immediate is used as operand of type i16, the 32-bit
1930
+ // representation of the constant truncated to the 16 LSBs should be used.
1931
+ case AMDGPU::OPERAND_REG_IMM_INT16:
1932
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1933
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1929
1934
case AMDGPU::OPERAND_REG_IMM_INT32:
1930
1935
case AMDGPU::OPERAND_REG_IMM_FP32:
1931
1936
case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
@@ -1949,13 +1954,10 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1949
1954
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1950
1955
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1951
1956
return &APFloat::IEEEdouble ();
1952
- case AMDGPU::OPERAND_REG_IMM_INT16:
1953
1957
case AMDGPU::OPERAND_REG_IMM_FP16:
1954
1958
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1955
- case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1956
1959
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1957
1960
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1958
- case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1959
1961
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1960
1962
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1961
1963
case AMDGPU::OPERAND_REG_IMM_V2FP16:
@@ -2001,13 +2003,15 @@ static bool isSafeTruncation(int64_t Val, unsigned Size) {
2001
2003
}
2002
2004
2003
2005
static bool isInlineableLiteralOp16 (int64_t Val, MVT VT, bool HasInv2Pi) {
2004
- if (VT.getScalarType () == MVT::i16 ) {
2005
- // FP immediate values are broken.
2006
- return isInlinableIntLiteral (Val);
2007
- }
2006
+ if (VT.getScalarType () == MVT::i16 )
2007
+ return isInlinableLiteral32 (Val, HasInv2Pi);
2008
+
2009
+ if (VT.getScalarType () == MVT::f16 )
2010
+ return AMDGPU::isInlinableLiteralFP16 (Val, HasInv2Pi);
2008
2011
2009
- // f16/v2f16 operands work correctly for all values.
2010
- return AMDGPU::isInlinableLiteral16 (Val, HasInv2Pi);
2012
+ assert (VT.getScalarType () == MVT::bf16 );
2013
+
2014
+ return AMDGPU::isInlinableLiteralBF16 (Val, HasInv2Pi);
2011
2015
}
2012
2016
2013
2017
bool AMDGPUOperand::isInlinableImm (MVT type) const {
@@ -2041,9 +2045,30 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const {
2041
2045
return false ;
2042
2046
2043
2047
if (type.getScalarSizeInBits () == 16 ) {
2044
- return isInlineableLiteralOp16 (
2045
- static_cast <int16_t >(FPLiteral.bitcastToAPInt ().getZExtValue ()),
2046
- type, AsmParser->hasInv2PiInlineImm ());
2048
+ bool Lost = false ;
2049
+ switch (type.getScalarType ().SimpleTy ) {
2050
+ default :
2051
+ llvm_unreachable (" unknown 16-bit type" );
2052
+ case MVT::bf16 :
2053
+ FPLiteral.convert (APFloatBase::BFloat (), APFloat::rmNearestTiesToEven,
2054
+ &Lost);
2055
+ break ;
2056
+ case MVT::f16 :
2057
+ FPLiteral.convert (APFloatBase::IEEEhalf (), APFloat::rmNearestTiesToEven,
2058
+ &Lost);
2059
+ break ;
2060
+ case MVT::i16 :
2061
+ FPLiteral.convert (APFloatBase::IEEEsingle (),
2062
+ APFloat::rmNearestTiesToEven, &Lost);
2063
+ break ;
2064
+ }
2065
+ // We need to use 32-bit representation here because when a floating-point
2066
+ // inline constant is used as an i16 operand, its 32-bit representation
2067
+ // representation will be used. We will need the 32-bit value to check if
2068
+ // it is FP inline constant.
2069
+ uint32_t ImmVal = FPLiteral.bitcastToAPInt ().getZExtValue ();
2070
+ return isInlineableLiteralOp16 (ImmVal, type,
2071
+ AsmParser->hasInv2PiInlineImm ());
2047
2072
}
2048
2073
2049
2074
// Check if single precision literal is inlinable
@@ -2375,15 +2400,26 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
2375
2400
return ;
2376
2401
2377
2402
case AMDGPU::OPERAND_REG_IMM_INT16:
2378
- case AMDGPU::OPERAND_REG_IMM_FP16:
2379
- case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2380
2403
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2381
- case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2382
2404
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2405
+ if (isSafeTruncation (Val, 16 ) &&
2406
+ AMDGPU::isInlinableIntLiteral (static_cast <int16_t >(Val))) {
2407
+ Inst.addOperand (MCOperand::createImm (Val & 0xffffffff ));
2408
+ setImmKindConst ();
2409
+ return ;
2410
+ }
2411
+
2412
+ Inst.addOperand (MCOperand::createImm (Val & 0xffff ));
2413
+ setImmKindLiteral ();
2414
+ return ;
2415
+
2416
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2417
+ case AMDGPU::OPERAND_REG_IMM_FP16:
2418
+ case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2383
2419
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2384
2420
if (isSafeTruncation (Val, 16 ) &&
2385
- AMDGPU::isInlinableLiteral16 (static_cast <int16_t >(Val),
2386
- AsmParser->hasInv2PiInlineImm ())) {
2421
+ AMDGPU::isInlinableLiteralFP16 (static_cast <int16_t >(Val),
2422
+ AsmParser->hasInv2PiInlineImm ())) {
2387
2423
Inst.addOperand (MCOperand::createImm (Val));
2388
2424
setImmKindConst ();
2389
2425
return ;
@@ -2410,12 +2446,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
2410
2446
return ;
2411
2447
2412
2448
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2449
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: {
2450
+ assert (isSafeTruncation (Val, 16 ));
2451
+ assert (AMDGPU::isInlinableIntLiteral (static_cast <int16_t >(Val)));
2452
+ Inst.addOperand (MCOperand::createImm (Val));
2453
+ return ;
2454
+ }
2413
2455
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2414
- case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2415
2456
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2416
2457
assert (isSafeTruncation (Val, 16 ));
2417
- assert (AMDGPU::isInlinableLiteral16 (static_cast <int16_t >(Val),
2418
- AsmParser->hasInv2PiInlineImm ()));
2458
+ assert (AMDGPU::isInlinableLiteralFP16 (static_cast <int16_t >(Val),
2459
+ AsmParser->hasInv2PiInlineImm ()));
2419
2460
2420
2461
Inst.addOperand (MCOperand::createImm (Val));
2421
2462
return ;
@@ -3559,7 +3600,19 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3559
3600
OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
3560
3601
return AMDGPU::isInlinableLiteralV2BF16 (Val);
3561
3602
3562
- return AMDGPU::isInlinableLiteral16 (Val, hasInv2PiInlineImm ());
3603
+ if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 ||
3604
+ OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16 ||
3605
+ OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP16 ||
3606
+ OperandType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED)
3607
+ return AMDGPU::isInlinableLiteralFP16 (Val, hasInv2PiInlineImm ());
3608
+
3609
+ if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 ||
3610
+ OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16 ||
3611
+ OperandType == AMDGPU::OPERAND_REG_INLINE_AC_BF16 ||
3612
+ OperandType == AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED)
3613
+ return AMDGPU::isInlinableLiteralBF16 (Val, hasInv2PiInlineImm ());
3614
+
3615
+ llvm_unreachable (" invalid operand type" );
3563
3616
}
3564
3617
default :
3565
3618
llvm_unreachable (" invalid operand size" );
0 commit comments