@@ -1228,51 +1228,49 @@ define void @v_shuffle_v3bf16_v2bf16__3_u_1(ptr addrspace(1) inreg %ptr) {
1228
1228
; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_u_1:
1229
1229
; GFX900: ; %bb.0:
1230
1230
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1231
+ ; GFX900-NEXT: v_mov_b32_e32 v0, 0
1231
1232
; GFX900-NEXT: ;;#ASMSTART
1232
1233
; GFX900-NEXT: ; def v1
1233
1234
; GFX900-NEXT: ;;#ASMEND
1234
1235
; GFX900-NEXT: ;;#ASMSTART
1235
1236
; GFX900-NEXT: ; def v2
1236
1237
; GFX900-NEXT: ;;#ASMEND
1237
- ; GFX900-NEXT: v_mov_b32_e32 v0, 0
1238
1238
; GFX900-NEXT: v_alignbit_b32 v2, s4, v2, 16
1239
- ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1239
+ ; GFX900-NEXT: global_store_short_d16_hi v0, v1, s[16:17] offset:4
1240
1240
; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
1241
- ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
1242
1241
; GFX900-NEXT: s_waitcnt vmcnt(0)
1243
1242
; GFX900-NEXT: s_setpc_b64 s[30:31]
1244
1243
;
1245
1244
; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_u_1:
1246
1245
; GFX90A: ; %bb.0:
1247
1246
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1247
+ ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
1248
1248
; GFX90A-NEXT: ;;#ASMSTART
1249
1249
; GFX90A-NEXT: ; def v1
1250
1250
; GFX90A-NEXT: ;;#ASMEND
1251
1251
; GFX90A-NEXT: ;;#ASMSTART
1252
1252
; GFX90A-NEXT: ; def v2
1253
1253
; GFX90A-NEXT: ;;#ASMEND
1254
- ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
1255
1254
; GFX90A-NEXT: v_alignbit_b32 v2, s4, v2, 16
1256
- ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1255
+ ; GFX90A-NEXT: global_store_short_d16_hi v0, v1, s[16:17] offset:4
1257
1256
; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
1258
- ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
1259
1257
; GFX90A-NEXT: s_waitcnt vmcnt(0)
1260
1258
; GFX90A-NEXT: s_setpc_b64 s[30:31]
1261
1259
;
1262
1260
; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_u_1:
1263
1261
; GFX942: ; %bb.0:
1264
1262
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1263
+ ; GFX942-NEXT: v_mov_b32_e32 v0, 0
1265
1264
; GFX942-NEXT: ;;#ASMSTART
1266
1265
; GFX942-NEXT: ; def v1
1267
1266
; GFX942-NEXT: ;;#ASMEND
1268
1267
; GFX942-NEXT: ;;#ASMSTART
1269
1268
; GFX942-NEXT: ; def v2
1270
1269
; GFX942-NEXT: ;;#ASMEND
1271
- ; GFX942-NEXT: v_mov_b32_e32 v0, 0
1270
+ ; GFX942-NEXT: s_nop 0
1272
1271
; GFX942-NEXT: v_alignbit_b32 v2, s0, v2, 16
1273
- ; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1272
+ ; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4
1274
1273
; GFX942-NEXT: global_store_dword v0, v2, s[0:1]
1275
- ; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4
1276
1274
; GFX942-NEXT: s_waitcnt vmcnt(0)
1277
1275
; GFX942-NEXT: s_setpc_b64 s[30:31]
1278
1276
%vec0 = call <2 x bfloat> asm "; def $0" , "=v" ()
0 commit comments