Skip to content

Commit 706ec85

Browse files
committed
List suitable opcodes. Negative test for si_pc_add_rel_offset.
Signed-off-by: John Lu <[email protected]>
1 parent 5c801f6 commit 706ec85

File tree

2 files changed

+70
-19
lines changed

2 files changed

+70
-19
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -710,26 +710,45 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
710710
}
711711

712712
static bool setsSCCifResultIsNonZero(const MachineInstr &MI) {
713-
if (!MI.findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
714-
return false;
715-
// Compares have no result
716-
if (MI.isCompare())
717-
return false;
718713
switch (MI.getOpcode()) {
719-
default:
714+
case AMDGPU::S_ABS_I32:
715+
case AMDGPU::S_ABSDIFF_I32:
716+
case AMDGPU::S_ASHR_I32:
717+
case AMDGPU::S_ASHR_I64:
718+
case AMDGPU::S_LSHL_B32:
719+
case AMDGPU::S_LSHL_B64:
720+
case AMDGPU::S_LSHR_B32:
721+
case AMDGPU::S_LSHR_B64:
722+
case AMDGPU::S_AND_B32:
723+
case AMDGPU::S_AND_B64:
724+
case AMDGPU::S_OR_B32:
725+
case AMDGPU::S_OR_B64:
726+
case AMDGPU::S_XOR_B32:
727+
case AMDGPU::S_XOR_B64:
728+
case AMDGPU::S_NOT_B32:
729+
case AMDGPU::S_NOT_B64:
730+
case AMDGPU::S_NAND_B32:
731+
case AMDGPU::S_NAND_B64:
732+
case AMDGPU::S_NOR_B32:
733+
case AMDGPU::S_NOR_B64:
734+
case AMDGPU::S_XNOR_B32:
735+
case AMDGPU::S_XNOR_B64:
736+
case AMDGPU::S_ANDN2_B32:
737+
case AMDGPU::S_ANDN2_B64:
738+
case AMDGPU::S_ORN2_B32:
739+
case AMDGPU::S_ORN2_B64:
740+
case AMDGPU::S_BFE_I32:
741+
case AMDGPU::S_BFE_I64:
742+
case AMDGPU::S_BFE_U32:
743+
case AMDGPU::S_BFE_U64:
744+
case AMDGPU::S_BCNT0_I32_B32:
745+
case AMDGPU::S_BCNT0_I32_B64:
746+
case AMDGPU::S_BCNT1_I32_B32:
747+
case AMDGPU::S_BCNT1_I32_B64:
748+
case AMDGPU::S_QUADMASK_B32:
749+
case AMDGPU::S_QUADMASK_B64:
720750
return true;
721-
case AMDGPU::S_ADD_I32:
722-
case AMDGPU::S_ADD_U32:
723-
case AMDGPU::S_ADDC_U32:
724-
case AMDGPU::S_SUB_I32:
725-
case AMDGPU::S_SUB_U32:
726-
case AMDGPU::S_SUBB_U32:
727-
case AMDGPU::S_MIN_I32:
728-
case AMDGPU::S_MIN_U32:
729-
case AMDGPU::S_MAX_I32:
730-
case AMDGPU::S_MAX_U32:
731-
case AMDGPU::S_ADDK_I32:
732-
case AMDGPU::SI_PC_ADD_REL_OFFSET:
751+
default:
733752
return false;
734753
}
735754
}

llvm/test/CodeGen/AMDGPU/s_cmp_0.ll

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2-
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck %s
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s
33

44
declare i32 @llvm.ctpop.i32(i32)
55
declare i64 @llvm.ctpop.i64(i64)
@@ -593,3 +593,35 @@ define amdgpu_ps i32 @not64(i64 inreg %val0) {
593593
%zext = zext i1 %cmp to i32
594594
ret i32 %zext
595595
}
596+
597+
598+
; --------------------------------------------------------------------------------
599+
; Negative tests
600+
; --------------------------------------------------------------------------------
601+
602+
@1 = external dso_local addrspace(4) constant i32
603+
604+
define amdgpu_ps i32 @si_pc_add_rel_offset_must_not_optimize(ptr addrspace(1) %out) {
605+
; CHECK-LABEL: si_pc_add_rel_offset_must_not_optimize:
606+
; CHECK: ; %bb.0:
607+
; CHECK-NEXT: s_getpc_b64 s[0:1]
608+
; CHECK-NEXT: s_add_u32 s0, s0, __unnamed_1@rel32@lo+4
609+
; CHECK-NEXT: s_addc_u32 s1, s1, __unnamed_1@rel32@hi+12
610+
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
611+
; CHECK-NEXT: s_cbranch_scc0 .LBB35_2
612+
; CHECK-NEXT: ; %bb.1: ; %endif
613+
; CHECK-NEXT: s_mov_b32 s0, 1
614+
; CHECK-NEXT: s_branch .LBB35_3
615+
; CHECK-NEXT: .LBB35_2: ; %if
616+
; CHECK-NEXT: s_mov_b32 s0, 0
617+
; CHECK-NEXT: s_branch .LBB35_3
618+
; CHECK-NEXT: .LBB35_3:
619+
%cmp = icmp ne ptr addrspace(4) @1, null
620+
br i1 %cmp, label %endif, label %if
621+
622+
if:
623+
ret i32 0
624+
625+
endif:
626+
ret i32 1
627+
}

0 commit comments

Comments
 (0)