Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 20 additions & 5 deletions llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
def BITOP3_32 : ComplexPattern<i32, 4, "SelectBITOP3", [and, or, xor]>;
def BITOP3_16 : ComplexPattern<i16, 4, "SelectBITOP3", [and, or, xor]>;

// Matches PTRADD as a commutative operation.
def ptradd_commutative : PatFrags<(ops node:$src0, node:$src1),
[(ptradd node:$src0, node:$src1), (ptradd node:$src1, node:$src0)]>;

// Special case for v_div_fmas_{f32|f64}, since it seems to be the
// only VOP instruction that implicitly reads VCC.
let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod" in {
Expand Down Expand Up @@ -938,12 +942,18 @@ def : GCNPat<
(DivergentBinFrag<mul> i32:$src0, IsPow2Plus1:$src1),
(V_LSHL_ADD_U32_e64 i32:$src0, (i32 (Log2_32 imm:$src1)), i32:$src0)>;

let SubtargetPredicate = HasLshlAddU64Inst in
let SubtargetPredicate = HasLshlAddU64Inst in {
def : GCNPat<
(ThreeOpFrag<shl_0_to_4, add> i64:$src0, i32:$src1, i64:$src2),
(V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)
>;

def : GCNPat <
// (ptradd z, (shl x, y)) or (ptradd (shl x, y), z) -> ((x << y) + z)
(ThreeOpFrag<shl_0_to_4, ptradd_commutative> i64:$src0, i32:$src1, i64:$src2),
(V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)>;
} // End SubtargetPredicate = HasLshlAddU64Inst

let SubtargetPredicate = HasAddMinMaxInsts in {
def : ThreeOp_i32_Pats<add, smax, V_ADD_MAX_I32_e64>;
def : ThreeOp_i32_Pats<add, umax, V_ADD_MAX_U32_e64>;
Expand Down Expand Up @@ -1019,19 +1029,24 @@ multiclass IMAD32_Pats <VOP3_Pseudo inst> {

// Handle cases where amdgpu-codegenprepare-mul24 made a mul24 instead of a normal mul.
// We need to separate this because otherwise OtherPredicates would be overriden.
class IMAD32_Mul24_Pat<VOP3_Pseudo inst>: GCNPat <
(i64 (add (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)), i64:$src2)),
class IMAD32_Mul24_Pats_Impl<VOP3_Pseudo inst, SDPatternOperator AddOp> : GCNPat <
(i64 (AddOp (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)), i64:$src2)),
(inst $src0, $src1, $src2, 0 /* clamp */)
>;

multiclass IMAD32_Mul24_Pats<VOP3_Pseudo inst> {
def : IMAD32_Mul24_Pats_Impl<inst, add>;
def : IMAD32_Mul24_Pats_Impl<inst, ptradd_commutative>;
}

// exclude pre-GFX9 where it was slow
let OtherPredicates = [HasNotMADIntraFwdBug], SubtargetPredicate = isGFX9Plus in {
defm : IMAD32_Pats<V_MAD_U64_U32_e64>;
def : IMAD32_Mul24_Pat<V_MAD_U64_U32_e64>;
defm : IMAD32_Mul24_Pats<V_MAD_U64_U32_e64>;
}
let OtherPredicates = [HasMADIntraFwdBug], SubtargetPredicate = isGFX11Only in {
defm : IMAD32_Pats<V_MAD_U64_U32_gfx11_e64>;
def : IMAD32_Mul24_Pat<V_MAD_U64_U32_gfx11_e64>;
defm : IMAD32_Mul24_Pats<V_MAD_U64_U32_gfx11_e64>;
}

def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3_OPSEL> {
Expand Down
5 changes: 1 addition & 4 deletions llvm/test/CodeGen/AMDGPU/fold-gep-offset.ll
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,7 @@ define i32 @flat_offset_maybe_oob(ptr %p, i32 %i) {
; GFX942-GISEL: ; %bb.0:
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX942-GISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX942-GISEL-NEXT: s_nop 1
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX942-GISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 12, v0
; GFX942-GISEL-NEXT: s_nop 1
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
Expand Down
32 changes: 9 additions & 23 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.load.async.to.lds.ll
Original file line number Diff line number Diff line change
Expand Up @@ -270,29 +270,15 @@ entry:
}

define amdgpu_ps void @cluster_load_async_to_lds_b64_saddr_no_scale_offset(ptr addrspace(1) inreg %gaddr, ptr addrspace(3) %laddr, i32 inreg %mask, i32 %idx) {
; GFX1250-SDAG-LABEL: cluster_load_async_to_lds_b64_saddr_no_scale_offset:
; GFX1250-SDAG: ; %bb.0: ; %entry
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX1250-SDAG-NEXT: s_mov_b32 m0, s2
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 2, s[0:1]
; GFX1250-SDAG-NEXT: cluster_load_async_to_lds_b64 v0, v[2:3], off offset:16 th:TH_LOAD_NT
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: cluster_load_async_to_lds_b64_saddr_no_scale_offset:
; GFX1250-GISEL: ; %bb.0: ; %entry
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, v1
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1]
; GFX1250-GISEL-NEXT: s_mov_b32 m0, s2
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX1250-GISEL-NEXT: v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_add_co_u32 v2, vcc_lo, v4, v2
; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v3, null, v5, v3, vcc_lo
; GFX1250-GISEL-NEXT: cluster_load_async_to_lds_b64 v0, v[2:3], off offset:16 th:TH_LOAD_NT
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-LABEL: cluster_load_async_to_lds_b64_saddr_no_scale_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v2, v1
; GFX1250-NEXT: s_mov_b32 m0, s2
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX1250-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 2, s[0:1]
; GFX1250-NEXT: cluster_load_async_to_lds_b64 v0, v[2:3], off offset:16 th:TH_LOAD_NT
; GFX1250-NEXT: s_endpgm
entry:
%idxprom = sext i32 %idx to i64
%gep = getelementptr i32, ptr addrspace(1) %gaddr, i64 %idxprom
Expand Down
29 changes: 8 additions & 21 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.async.to.lds.ll
Original file line number Diff line number Diff line change
Expand Up @@ -160,27 +160,14 @@ entry:
}

define amdgpu_ps void @global_load_async_to_lds_b64_saddr_no_scale_offset(ptr addrspace(1) inreg %gaddr, ptr addrspace(3) %laddr, i32 %idx) {
; GFX1250-SDAG-LABEL: global_load_async_to_lds_b64_saddr_no_scale_offset:
; GFX1250-SDAG: ; %bb.0: ; %entry
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 2, s[0:1]
; GFX1250-SDAG-NEXT: global_load_async_to_lds_b64 v0, v[2:3], off offset:16 th:TH_LOAD_NT
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: global_load_async_to_lds_b64_saddr_no_scale_offset:
; GFX1250-GISEL: ; %bb.0: ; %entry
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, v1
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1]
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX1250-GISEL-NEXT: v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_add_co_u32 v2, vcc_lo, v4, v2
; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v3, null, v5, v3, vcc_lo
; GFX1250-GISEL-NEXT: global_load_async_to_lds_b64 v0, v[2:3], off offset:16 th:TH_LOAD_NT
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-LABEL: global_load_async_to_lds_b64_saddr_no_scale_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v2, v1
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX1250-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 2, s[0:1]
; GFX1250-NEXT: global_load_async_to_lds_b64 v0, v[2:3], off offset:16 th:TH_LOAD_NT
; GFX1250-NEXT: s_endpgm
entry:
%idxprom = sext i32 %idx to i64
%gep = getelementptr i32, ptr addrspace(1) %gaddr, i64 %idxprom
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,27 +160,14 @@ entry:
}

define amdgpu_ps void @global_store_async_from_lds_b64_saddr_no_scale_offset(ptr addrspace(1) inreg %gaddr, ptr addrspace(3) %laddr, i32 %idx) {
; GFX1250-SDAG-LABEL: global_store_async_from_lds_b64_saddr_no_scale_offset:
; GFX1250-SDAG: ; %bb.0: ; %entry
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 2, s[0:1]
; GFX1250-SDAG-NEXT: global_store_async_from_lds_b64 v[2:3], v0, off offset:16 th:TH_STORE_NT
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: global_store_async_from_lds_b64_saddr_no_scale_offset:
; GFX1250-GISEL: ; %bb.0: ; %entry
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, v1
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1]
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX1250-GISEL-NEXT: v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_add_co_u32 v2, vcc_lo, v4, v2
; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v3, null, v5, v3, vcc_lo
; GFX1250-GISEL-NEXT: global_store_async_from_lds_b64 v[2:3], v0, off offset:16 th:TH_STORE_NT
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-LABEL: global_store_async_from_lds_b64_saddr_no_scale_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v2, v1
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX1250-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 2, s[0:1]
; GFX1250-NEXT: global_store_async_from_lds_b64 v[2:3], v0, off offset:16 th:TH_STORE_NT
; GFX1250-NEXT: s_endpgm
entry:
%idxprom = sext i32 %idx to i64
%gep = getelementptr i32, ptr addrspace(1) %gaddr, i64 %idxprom
Expand Down
35 changes: 12 additions & 23 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.monitor.gfx1250.ll
Original file line number Diff line number Diff line change
Expand Up @@ -169,33 +169,22 @@ entry:
}

define amdgpu_ps void @global_load_monitor_b64_saddr_no_scale_offset(ptr addrspace(1) inreg %addr, ptr addrspace(1) %use, i32 %idx) {
; GFX1250-SDAG-LABEL: global_load_monitor_b64_saddr_no_scale_offset:
; GFX1250-SDAG: ; %bb.0: ; %entry
; GFX1250-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 2, s[0:1]
; GFX1250-SDAG-NEXT: global_load_monitor_b64 v[2:3], v[2:3], off th:TH_LOAD_NT
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: global_store_b64 v[0:1], v[2:3], off
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: global_load_monitor_b64_saddr_no_scale_offset:
; GFX1250-GISEL: ; %bb.0: ; %entry
; GFX1250-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1]
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
; GFX1250-GISEL-NEXT: v_add_co_u32 v2, vcc_lo, v4, v2
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v3, null, v5, v3, vcc_lo
; GFX1250-GISEL-NEXT: global_load_monitor_b64 v[2:3], v[2:3], off th:TH_LOAD_NT
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-LABEL: global_load_monitor_b64_saddr_no_scale_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 2, s[0:1]
; GFX1250-NEXT: global_load_monitor_b64 v[2:3], v[2:3], off th:TH_LOAD_NT
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b64 v[0:1], v[2:3], off
; GFX1250-NEXT: s_endpgm
entry:
%idxprom = sext i32 %idx to i64
%gep = getelementptr i32, ptr addrspace(1) %addr, i64 %idxprom
%val = call <2 x i32> @llvm.amdgcn.global.load.monitor.b64.v2i32(ptr addrspace(1) %gep, i32 1)
store <2 x i32> %val, ptr addrspace(1) %use
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX1250-GISEL: {{.*}}
; GFX1250-SDAG: {{.*}}
41 changes: 12 additions & 29 deletions llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
Original file line number Diff line number Diff line change
Expand Up @@ -265,42 +265,25 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p) {

; Use non-zero shift amounts in v_lshl_add_u64.
define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) {
; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
; GFX942_PTRADD: ; %bb.0:
; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942_PTRADD-NEXT: v_lshlrev_b64 v[2:3], 3, v[2:3]
; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
;
; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
; GFX942_LEGACY: ; %bb.0:
; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
; GFX942-LABEL: select_v_lshl_add_u64:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
; GFX942-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr inbounds i64, ptr %base, i64 %voffset
ret ptr %gep
}

; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
; mul into a mul24.
define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
; GFX942_PTRADD: ; %bb.0:
; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942_PTRADD-NEXT: v_and_b32_e32 v2, 0xfffff, v2
; GFX942_PTRADD-NEXT: v_and_b32_e32 v4, 0xfffff, v4
; GFX942_PTRADD-NEXT: v_mul_hi_u32_u24_e32 v3, v2, v4
; GFX942_PTRADD-NEXT: v_mul_u32_u24_e32 v2, v2, v4
; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
;
; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
; GFX942_LEGACY: ; %bb.0:
; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942_LEGACY-NEXT: v_and_b32_e32 v2, 0xfffff, v2
; GFX942_LEGACY-NEXT: v_and_b32_e32 v3, 0xfffff, v4
; GFX942_LEGACY-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
; GFX942-LABEL: fold_mul24_into_mad:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_and_b32_e32 v2, 0xfffff, v2
; GFX942-NEXT: v_and_b32_e32 v3, 0xfffff, v4
; GFX942-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
; GFX942-NEXT: s_setpc_b64 s[30:31]
%a_masked = and i64 %a, u0xfffff
%b_masked = and i64 %b, u0xfffff
%mul = mul i64 %a_masked, %b_masked
Expand Down
42 changes: 14 additions & 28 deletions llvm/test/CodeGen/AMDGPU/ptradd-sdag.ll
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,12 @@ define ptr @gep_as0(ptr %p, i64 %offset) {
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX942_PTRADD-LABEL: gep_as0:
; GFX942_PTRADD: ; %bb.0: ; %entry
; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942_PTRADD-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, 5
; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
;
; GFX942_LEGACY-LABEL: gep_as0:
; GFX942_LEGACY: ; %bb.0: ; %entry
; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, 5
; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
; GFX942-LABEL: gep_as0:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, 5
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: gep_as0:
; GFX10: ; %bb.0: ; %entry
Expand Down Expand Up @@ -188,20 +180,12 @@ define ptr @multi_gep_as0(ptr %p, i64 %offset) {
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX942_PTRADD-LABEL: multi_gep_as0:
; GFX942_PTRADD: ; %bb.0: ; %entry
; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942_PTRADD-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, 5
; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
;
; GFX942_LEGACY-LABEL: multi_gep_as0:
; GFX942_LEGACY: ; %bb.0: ; %entry
; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, 5
; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
; GFX942-LABEL: multi_gep_as0:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, 5
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: multi_gep_as0:
; GFX10: ; %bb.0: ; %entry
Expand Down Expand Up @@ -537,3 +521,5 @@ entry:
; GFX12_PTRADD: {{.*}}
; GFX8_LEGACY: {{.*}}
; GFX8_PTRADD: {{.*}}
; GFX942_LEGACY: {{.*}}
; GFX942_PTRADD: {{.*}}
Loading