Skip to content

Commit 158eeb3

Browse files
authored
[AMDGPU] Change scale_sel to be 4 bits (#157900)
The latest SP changes updated it to use `OP_SEL[0:3]` instead of `OP_SEL[0:2]`. Fixes SWDEV-554472.
1 parent 081fe1d commit 158eeb3

File tree

10 files changed

+45
-28
lines changed

10 files changed

+45
-28
lines changed

clang/lib/Sema/SemaAMDGPU.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
100100
case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_bf16_bf6:
101101
case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_fp6:
102102
case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_bf6:
103-
return SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 7);
103+
return SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 15);
104104
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
105105
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
106106
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:

clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -75,21 +75,21 @@ void test_cvt_scale_pk(global half8 *outh8, global bfloat8 *outy8, uint2 src2,
7575
*outf16 = __builtin_amdgcn_cvt_scale_pk16_f32_fp6(src3, scale, scale_sel); // expected-error {{'__builtin_amdgcn_cvt_scale_pk16_f32_fp6' must be a constant integer}}
7676
*outf16 = __builtin_amdgcn_cvt_scale_pk16_f32_bf6(src3, scale, scale_sel); // expected-error {{'__builtin_amdgcn_cvt_scale_pk16_f32_bf6' must be a constant integer}}
7777

78-
*outh8 = __builtin_amdgcn_cvt_scale_pk8_f16_fp8(src2, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
79-
*outy8 = __builtin_amdgcn_cvt_scale_pk8_bf16_fp8(src2, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
80-
*outh8 = __builtin_amdgcn_cvt_scale_pk8_f16_bf8(src2, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
81-
*outy8 = __builtin_amdgcn_cvt_scale_pk8_bf16_bf8(src2, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
82-
*outh8 = __builtin_amdgcn_cvt_scale_pk8_f16_fp4(src1, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
83-
*outy8 = __builtin_amdgcn_cvt_scale_pk8_bf16_fp4(src1, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
84-
*outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_fp8(src2, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
85-
*outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_bf8(src2, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
86-
*outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_fp4(src1, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
87-
*outh16 = __builtin_amdgcn_cvt_scale_pk16_f16_fp6(src3, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
88-
*outy16 = __builtin_amdgcn_cvt_scale_pk16_bf16_fp6(src3, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
89-
*outh16 = __builtin_amdgcn_cvt_scale_pk16_f16_bf6(src3, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
90-
*outy16 = __builtin_amdgcn_cvt_scale_pk16_bf16_bf6(src3, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
91-
*outf16 = __builtin_amdgcn_cvt_scale_pk16_f32_fp6(src3, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
92-
*outf16 = __builtin_amdgcn_cvt_scale_pk16_f32_bf6(src3, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
78+
*outh8 = __builtin_amdgcn_cvt_scale_pk8_f16_fp8(src2, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
79+
*outy8 = __builtin_amdgcn_cvt_scale_pk8_bf16_fp8(src2, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
80+
*outh8 = __builtin_amdgcn_cvt_scale_pk8_f16_bf8(src2, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
81+
*outy8 = __builtin_amdgcn_cvt_scale_pk8_bf16_bf8(src2, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
82+
*outh8 = __builtin_amdgcn_cvt_scale_pk8_f16_fp4(src1, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
83+
*outy8 = __builtin_amdgcn_cvt_scale_pk8_bf16_fp4(src1, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
84+
*outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_fp8(src2, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
85+
*outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_bf8(src2, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
86+
*outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_fp4(src1, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
87+
*outh16 = __builtin_amdgcn_cvt_scale_pk16_f16_fp6(src3, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
88+
*outy16 = __builtin_amdgcn_cvt_scale_pk16_bf16_fp6(src3, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
89+
*outh16 = __builtin_amdgcn_cvt_scale_pk16_f16_bf6(src3, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
90+
*outy16 = __builtin_amdgcn_cvt_scale_pk16_bf16_bf6(src3, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
91+
*outf16 = __builtin_amdgcn_cvt_scale_pk16_f32_fp6(src3, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
92+
*outf16 = __builtin_amdgcn_cvt_scale_pk16_f32_bf6(src3, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
9393
}
9494

9595
void test_amdgcn_load_monitor(global int* b32gaddr, global v2i* b64gaddr, global v4i* b128gaddr, int *b32faddr, v2i* b64faddr, v4i *b128faddr,

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -663,7 +663,7 @@ def int_amdgcn_cvt_sr_bf8_f16 : DefaultAttrsIntrinsic<
663663
[IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
664664
>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f16">;
665665

666-
// llvm.amdgcn.cvt.scale.pk32.f16.bf6 v32f16 vdst, v6i32 src0, i32 scale_sel [0..7]
666+
// llvm.amdgcn.cvt.scale.pk32.f16.bf6 v32f16 vdst, v6i32 src0, i32 scale_sel [0..15]
667667
class AMDGPUCvtScaleIntrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
668668
[DstTy], [Src0Ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>]
669669
>, ClangBuiltin<"__builtin_amdgcn_"#name>;

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1353,7 +1353,7 @@ def MatrixAReuse : NamedBitOperand<"matrix_a_reuse">;
13531353
def MatrixBReuse : NamedBitOperand<"matrix_b_reuse">;
13541354

13551355
def ScaleSel : NamedIntOperand<"scale_sel"> {
1356-
let Validator = "isUInt<3>";
1356+
let Validator = "isUInt<4>";
13571357
}
13581358

13591359
class KImmFPOperand<ValueType vt> : ImmOperand<vt> {

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -414,10 +414,9 @@ class VOP3a_BITOP3_gfx12<bits<10> op, VOPProfile p> : VOP3e_gfx11_gfx12<op, p> {
414414
}
415415

416416
class VOP3a_ScaleSel_gfx1250<bits<10> op, VOPProfile p> : VOP3e_gfx11_gfx12<op, p> {
417-
bits<3> scale_sel;
417+
bits<4> scale_sel;
418418

419-
let Inst{13-11} = scale_sel;
420-
let Inst{14} = 0;
419+
let Inst{14-11} = scale_sel;
421420
}
422421

423422
class VOP3Interp_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> {

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scale.pk.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ define amdgpu_ps void @test_cvt_scale_pk8_f32_fp8_vv(<2 x i32> %src, i32 %scale,
106106
; GFX1250-SDAG-LABEL: test_cvt_scale_pk8_f32_fp8_vv:
107107
; GFX1250-SDAG: ; %bb.0:
108108
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v13, v4 :: v_dual_mov_b32 v12, v3
109-
; GFX1250-SDAG-NEXT: v_cvt_scale_pk8_f32_fp8 v[4:11], v[0:1], v2 scale_sel:7
109+
; GFX1250-SDAG-NEXT: v_cvt_scale_pk8_f32_fp8 v[4:11], v[0:1], v2 scale_sel:8
110110
; GFX1250-SDAG-NEXT: s_clause 0x1
111111
; GFX1250-SDAG-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
112112
; GFX1250-SDAG-NEXT: global_store_b128 v[12:13], v[4:7], off
@@ -115,12 +115,12 @@ define amdgpu_ps void @test_cvt_scale_pk8_f32_fp8_vv(<2 x i32> %src, i32 %scale,
115115
; GFX1250-GISEL-LABEL: test_cvt_scale_pk8_f32_fp8_vv:
116116
; GFX1250-GISEL: ; %bb.0:
117117
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v12, v3 :: v_dual_mov_b32 v13, v4
118-
; GFX1250-GISEL-NEXT: v_cvt_scale_pk8_f32_fp8 v[4:11], v[0:1], v2 scale_sel:7
118+
; GFX1250-GISEL-NEXT: v_cvt_scale_pk8_f32_fp8 v[4:11], v[0:1], v2 scale_sel:8
119119
; GFX1250-GISEL-NEXT: s_clause 0x1
120120
; GFX1250-GISEL-NEXT: global_store_b128 v[12:13], v[4:7], off
121121
; GFX1250-GISEL-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
122122
; GFX1250-GISEL-NEXT: s_endpgm
123-
%cvt = tail call <8 x float> @llvm.amdgcn.cvt.scale.pk8.f32.fp8(<2 x i32> %src, i32 %scale, i32 7)
123+
%cvt = tail call <8 x float> @llvm.amdgcn.cvt.scale.pk8.f32.fp8(<2 x i32> %src, i32 %scale, i32 8)
124124
store <8 x float> %cvt, ptr addrspace(1) %out, align 16
125125
ret void
126126
}
@@ -313,12 +313,12 @@ define amdgpu_ps void @test_cvt_scale_pk16_bf16_bf6_sl(<3 x i32> inreg %src, ptr
313313
; GFX1250-NEXT: v_dual_mov_b32 v10, s0 :: v_dual_mov_b32 v11, s1
314314
; GFX1250-NEXT: v_mov_b32_e32 v12, s2
315315
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
316-
; GFX1250-NEXT: v_cvt_scale_pk16_bf16_bf6 v[2:9], v[10:12], 0x64 scale_sel:7
316+
; GFX1250-NEXT: v_cvt_scale_pk16_bf16_bf6 v[2:9], v[10:12], 0x64 scale_sel:8
317317
; GFX1250-NEXT: s_clause 0x1
318318
; GFX1250-NEXT: global_store_b128 v[0:1], v[6:9], off offset:16
319319
; GFX1250-NEXT: global_store_b128 v[0:1], v[2:5], off
320320
; GFX1250-NEXT: s_endpgm
321-
%cvt = tail call <16 x bfloat> @llvm.amdgcn.cvt.scale.pk16.bf16.bf6(<3 x i32> %src, i32 100, i32 7)
321+
%cvt = tail call <16 x bfloat> @llvm.amdgcn.cvt.scale.pk16.bf16.bf6(<3 x i32> %src, i32 100, i32 8)
322322
store <16 x bfloat> %cvt, ptr addrspace(1) %out, align 8
323323
ret void
324324
}

llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -713,6 +713,9 @@ v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], 0xcf00
713713
v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], v8 scale_sel:7
714714
// GFX1250: v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], v8 scale_sel:7 ; encoding: [0x0a,0x38,0xab,0xd6,0x14,0x11,0x02,0x00]
715715

716+
v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], v8 scale_sel:8
717+
// GFX1250: v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], v8 scale_sel:8 ; encoding: [0x0a,0x40,0xab,0xd6,0x14,0x11,0x02,0x00]
718+
716719
v_cvt_scale_pk8_bf16_bf8 v[10:13], v[20:21], v8
717720
// GFX1250: v_cvt_scale_pk8_bf16_bf8 v[10:13], v[20:21], v8 ; encoding: [0x0a,0x00,0xac,0xd6,0x14,0x11,0x02,0x00]
718721

@@ -758,6 +761,9 @@ v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], 0xcf00
758761
v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], v8 scale_sel:7
759762
// GFX1250: v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], v8 scale_sel:7 ; encoding: [0x0a,0x38,0xad,0xd6,0x14,0x11,0x02,0x00]
760763

764+
v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], v8 scale_sel:8
765+
// GFX1250: v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], v8 scale_sel:8 ; encoding: [0x0a,0x40,0xad,0xd6,0x14,0x11,0x02,0x00]
766+
761767
v_cvt_scale_pk8_f32_fp4 v[10:17], v20, v8
762768
// GFX1250: v_cvt_scale_pk8_f32_fp4 v[10:17], v20, v8 ; encoding: [0x0a,0x00,0xa1,0xd6,0x14,0x11,0x02,0x00]
763769

llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -713,6 +713,9 @@ v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], 0xcf00
713713
v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], v8 scale_sel:7
714714
// GFX1250: v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], v8 scale_sel:7 ; encoding: [0x0a,0x38,0xab,0xd6,0x14,0x11,0x02,0x00]
715715

716+
v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], v8 scale_sel:8
717+
// GFX1250: v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], v8 scale_sel:8 ; encoding: [0x0a,0x40,0xab,0xd6,0x14,0x11,0x02,0x00]
718+
716719
v_cvt_scale_pk8_bf16_bf8 v[10:13], v[20:21], v8
717720
// GFX1250: v_cvt_scale_pk8_bf16_bf8 v[10:13], v[20:21], v8 ; encoding: [0x0a,0x00,0xac,0xd6,0x14,0x11,0x02,0x00]
718721

@@ -758,6 +761,9 @@ v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], 0xcf00
758761
v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], v8 scale_sel:7
759762
// GFX1250: v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], v8 scale_sel:7 ; encoding: [0x0a,0x38,0xad,0xd6,0x14,0x11,0x02,0x00]
760763

764+
v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], v8 scale_sel:8
765+
// GFX1250: v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], v8 scale_sel:8 ; encoding: [0x0a,0x40,0xad,0xd6,0x14,0x11,0x02,0x00]
766+
761767
v_cvt_scale_pk8_f32_fp4 v[10:17], v20, v8
762768
// GFX1250: v_cvt_scale_pk8_f32_fp4 v[10:17], v20, v8 ; encoding: [0x0a,0x00,0xa1,0xd6,0x14,0x11,0x02,0x00]
763769

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,9 +277,9 @@ v_cvt_sr_fp8_f16 v1, v2, v3 mul:2
277277
// GFX125X-ERR-NEXT:{{^}}v_cvt_sr_fp8_f16 v1, v2, v3 mul:2
278278
// GFX125X-ERR-NEXT:{{^}} ^
279279

280-
v_cvt_scale_pk8_f32_fp8 v[10:17], v[20:21], v8 scale_sel:8
280+
v_cvt_scale_pk8_f32_fp8 v[10:17], v[20:21], v8 scale_sel:16
281281
// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid scale_sel value.
282-
// GFX125X-ERR-NEXT:{{^}}v_cvt_scale_pk8_f32_fp8 v[10:17], v[20:21], v8 scale_sel:8
282+
// GFX125X-ERR-NEXT:{{^}}v_cvt_scale_pk8_f32_fp8 v[10:17], v[20:21], v8 scale_sel:16
283283
// GFX125X-ERR-NEXT:{{^}} ^
284284

285285
v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:4

llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -761,6 +761,9 @@
761761
0x0a,0x38,0xab,0xd6,0x14,0x11,0x02,0x00
762762
# GFX1250: v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], v8 scale_sel:7 ; encoding: [0x0a,0x38,0xab,0xd6,0x14,0x11,0x02,0x00]
763763

764+
0x0a,0x40,0xab,0xd6,0x14,0x11,0x02,0x00
765+
# GFX1250: v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], v8 scale_sel:8 ; encoding: [0x0a,0x40,0xab,0xd6,0x14,0x11,0x02,0x00]
766+
764767
0x0a,0x00,0xa8,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00
765768
# GFX1250: v_cvt_scale_pk8_f16_fp8 v[10:13], v[20:21], 0xcf00 ; encoding: [0x0a,0x00,0xa8,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
766769

@@ -800,6 +803,9 @@
800803
0x0a,0x38,0xad,0xd6,0x14,0x11,0x02,0x00
801804
# GFX1250: v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], v8 scale_sel:7 ; encoding: [0x0a,0x38,0xad,0xd6,0x14,0x11,0x02,0x00]
802805

806+
0x0a,0x40,0xad,0xd6,0x14,0x11,0x02,0x00
807+
# GFX1250: v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], v8 scale_sel:8 ; encoding: [0x0a,0x40,0xad,0xd6,0x14,0x11,0x02,0x00]
808+
803809
0x0a,0x00,0xaa,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00
804810
# GFX1250: v_cvt_scale_pk8_f32_fp8 v[10:17], v[20:21], 0xcf00 ; encoding: [0x0a,0x00,0xaa,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
805811

0 commit comments

Comments
 (0)