Skip to content

Commit f6b8388

Browse files
Wang, Junjwanggit86
andauthored
[AMDGPU][MC] Fix disassembly for v_permlane16_swap_b32 for GFX950 (#1… (llvm#3004)
…46600) When targeting GFX950, disassembly of v_permlane16_swap_b32 and v_permlane32_swap_b32 instructions produces errors when they use certain vdst operand values, e.g., v_permlane16_swap_b32 v218, v219. This patch fixes this problem. Co-authored-by: Jun Wang <[email protected]>
1 parent 6b86029 commit f6b8388

File tree

3 files changed

+62
-2
lines changed

3 files changed

+62
-2
lines changed

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -391,8 +391,8 @@ def VOP_PERMLANE_SWAP : VOPProfile<[i32, i32, untyped, untyped]> {
391391
let HasExtDPP = 0;
392392
let HasExtSDWA = 0;
393393

394-
let Ins32 = (ins Src0RC64:$vdst_in, Src0RC32:$src0);
395-
let Ins64 = (ins Src0RC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl);
394+
let Ins32 = (ins DstRC:$vdst_in, Src0RC32:$src0);
395+
let Ins64 = (ins DstRC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl);
396396
let InsVOP3OpSel = (ins Src0RC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl);
397397
let Asm64 = "$vdst, $src0$bound_ctrl$fi";
398398
let AsmVOP3OpSel = "$vdst, $src0$bound_ctrl$fi";

llvm/test/MC/AMDGPU/gfx950_asm_features.s

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,26 @@ global_load_lds_dwordx4 v2, s[4:5] offset:4
4141
// GFX950: v_permlane16_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb3,0x02,0x7e]
4242
v_permlane16_swap_b32 v1, v2
4343

44+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
45+
// GFX950: v_permlane16_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb3,0xb4,0x7f]
46+
v_permlane16_swap_b32 v218, v219
47+
4448
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
4549
// GFX950: v_permlane16_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb3,0x02,0x7e]
4650
v_permlane16_swap_b32_e32 v1, v2
4751

52+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
53+
// GFX950: v_permlane16_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb3,0xb4,0x7f]
54+
v_permlane16_swap_b32_e32 v218, v219
55+
4856
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
4957
// GFX950: v_permlane16_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00]
5058
v_permlane16_swap_b32_e64 v1, v2
5159

60+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
61+
// GFX950: v_permlane16_swap_b32_e64 v218, v219 ; encoding: [0xda,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00]
62+
v_permlane16_swap_b32_e64 v218, v219
63+
5264
// FIXME: Parsed as bound_ctrl:1?
5365
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
5466
// GFX950: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00]
@@ -82,14 +94,26 @@ v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1
8294
// GFX950: v_permlane32_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb5,0x02,0x7e]
8395
v_permlane32_swap_b32 v1, v2
8496

97+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
98+
// GFX950: v_permlane32_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb5,0xb4,0x7f]
99+
v_permlane32_swap_b32 v218, v219
100+
85101
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
86102
// GFX950: v_permlane32_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb5,0x02,0x7e]
87103
v_permlane32_swap_b32_e32 v1, v2
88104

105+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
106+
// GFX950: v_permlane32_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb5,0xb4,0x7f]
107+
v_permlane32_swap_b32_e32 v218, v219
108+
89109
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
90110
// GFX950: v_permlane32_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00]
91111
v_permlane32_swap_b32_e64 v1, v2
92112

113+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
114+
// GFX950: v_permlane32_swap_b32_e64 v218, v219 ; encoding: [0xda,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00]
115+
v_permlane32_swap_b32_e64 v218, v219
116+
93117
// FIXME: Parsed as bound_ctrl:1?
94118
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
95119
// GFX950: v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00]

llvm/test/MC/Disassembler/AMDGPU/gfx950.txt

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,27 @@
4747
# GFX950: v_permlane16_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb3,0x02,0x7e]
4848
0x02,0xb3,0x02,0x7e
4949

50+
# GFX950: v_permlane16_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb3,0xb4,0x7f]
51+
0xdb,0xb3,0xb4,0x7f
52+
53+
# GFX950: v_permlane16_swap_b32_e32 v218, v2 ; encoding: [0x02,0xb3,0xb4,0x7f]
54+
0x02,0xb3,0xb4,0x7f
55+
56+
# GFX950: v_permlane16_swap_b32_e32 v2, v219 ; encoding: [0xdb,0xb3,0x04,0x7e]
57+
0xdb,0xb3,0x04,0x7e
58+
5059
# GFX950: v_permlane16_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00]
5160
0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00
5261

62+
# GFX950: v_permlane16_swap_b32_e64 v218, v219 ; encoding: [0xda,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00]
63+
0xda,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00
64+
65+
# GFX950: v_permlane16_swap_b32_e64 v218, v2 ; encoding: [0xda,0x00,0x99,0xd1,0x02,0x01,0x00,0x00]
66+
0xda,0x00,0x99,0xd1,0x02,0x01,0x00,0x00
67+
68+
# GFX950: v_permlane16_swap_b32_e64 v2, v219 ; encoding: [0x02,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00]
69+
0x02,0x00,0x99,0xd1,0xdb,0x01,0x00,0x00
70+
5371
# GFX950: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00]
5472
0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00
5573

@@ -63,9 +81,27 @@
6381
# GFX950: v_permlane32_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb5,0x02,0x7e]
6482
0x02,0xb5,0x02,0x7e
6583

84+
# GFX950: v_permlane32_swap_b32_e32 v218, v219 ; encoding: [0xdb,0xb5,0xb4,0x7f]
85+
0xdb,0xb5,0xb4,0x7f
86+
87+
# GFX950: v_permlane32_swap_b32_e32 v218, v2 ; encoding: [0x02,0xb5,0xb4,0x7f]
88+
0x02,0xb5,0xb4,0x7f
89+
90+
# GFX950: v_permlane32_swap_b32_e32 v2, v219 ; encoding: [0xdb,0xb5,0x04,0x7e]
91+
0xdb,0xb5,0x04,0x7e
92+
6693
# GFX950: v_permlane32_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00]
6794
0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00
6895

96+
# GFX950: v_permlane32_swap_b32_e64 v218, v219 ; encoding: [0xda,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00]
97+
0xda,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00
98+
99+
# GFX950: v_permlane32_swap_b32_e64 v218, v2 ; encoding: [0xda,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00]
100+
0xda,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00
101+
102+
# GFX950: v_permlane32_swap_b32_e64 v2, v219 ; encoding: [0x02,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00]
103+
0x02,0x00,0x9a,0xd1,0xdb,0x01,0x00,0x00
104+
69105
# GFX950: v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00]
70106
0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00
71107

0 commit comments

Comments
 (0)