Skip to content

Commit b67e465

Browse files
authored
[AMDGPU] Ensure SCC is not live before shrinking to s_bitset* (#167907)
Ensure SCC is not live before shrinking s_and*/s_or* instructions to s_bitset*. --------- Signed-off-by: John Lu <[email protected]>
1 parent f2336d4 commit b67e465

File tree

2 files changed

+46
-7
lines changed

2 files changed

+46
-7
lines changed

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -550,15 +550,17 @@ bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
550550
uint32_t NewImm = 0;
551551

552552
if (Opc == AMDGPU::S_AND_B32) {
553-
if (isPowerOf2_32(~Imm)) {
553+
if (isPowerOf2_32(~Imm) &&
554+
MI.findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr)->isDead()) {
554555
NewImm = llvm::countr_one(Imm);
555556
Opc = AMDGPU::S_BITSET0_B32;
556557
} else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
557558
NewImm = ~Imm;
558559
Opc = AMDGPU::S_ANDN2_B32;
559560
}
560561
} else if (Opc == AMDGPU::S_OR_B32) {
561-
if (isPowerOf2_32(Imm)) {
562+
if (isPowerOf2_32(Imm) &&
563+
MI.findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr)->isDead()) {
562564
NewImm = llvm::countr_zero(Imm);
563565
Opc = AMDGPU::S_BITSET1_B32;
564566
} else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {

llvm/test/CodeGen/AMDGPU/s_cmp_0.ll

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,43 @@ define amdgpu_ps i32 @and64(i64 inreg %val0, i64 inreg %val1) {
153153
ret i32 %zext
154154
}
155155

156+
define amdgpu_ps i32 @and32_clear_one_bit(i32 inreg %val0) {
157+
; CHECK-LABEL: and32_clear_one_bit:
158+
; CHECK: ; %bb.0:
159+
; CHECK-NEXT: s_and_b32 s0, s0, 0x7fffffff
160+
; CHECK-NEXT: ;;#ASMSTART
161+
; CHECK-NEXT: ; use s0
162+
; CHECK-NEXT: ;;#ASMEND
163+
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
164+
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
165+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
166+
; CHECK-NEXT: ; return to shader part epilog
167+
%result = and i32 %val0, 2147483647
168+
call void asm "; use $0", "s"(i32 %result)
169+
%cmp = icmp ne i32 %result, 0
170+
%zext = zext i1 %cmp to i32
171+
ret i32 %zext
172+
}
173+
174+
define amdgpu_ps i32 @and64_clear_one_bit(i64 inreg %val0) {
175+
; CHECK-LABEL: and64_clear_one_bit:
176+
; CHECK: ; %bb.0:
177+
; CHECK-NEXT: s_bitset0_b32 s0, 31
178+
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
179+
; CHECK-NEXT: ;;#ASMSTART
180+
; CHECK-NEXT: ; use s[0:1]
181+
; CHECK-NEXT: ;;#ASMEND
182+
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
183+
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
184+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
185+
; CHECK-NEXT: ; return to shader part epilog
186+
%result = and i64 %val0, -2147483649
187+
call void asm "; use $0", "s"(i64 %result)
188+
%cmp = icmp ne i64 %result, 0
189+
%zext = zext i1 %cmp to i32
190+
ret i32 %zext
191+
}
192+
156193
define amdgpu_ps i32 @or32(i32 inreg %val0, i32 inreg %val1) {
157194
; CHECK-LABEL: or32:
158195
; CHECK: ; %bb.0:
@@ -623,14 +660,14 @@ define amdgpu_ps i32 @si_pc_add_rel_offset_must_not_optimize() {
623660
; CHECK-NEXT: s_add_u32 s0, s0, __unnamed_1@rel32@lo+4
624661
; CHECK-NEXT: s_addc_u32 s1, s1, __unnamed_1@rel32@hi+12
625662
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
626-
; CHECK-NEXT: s_cbranch_scc0 .LBB36_2
663+
; CHECK-NEXT: s_cbranch_scc0 .LBB38_2
627664
; CHECK-NEXT: ; %bb.1: ; %endif
628665
; CHECK-NEXT: s_mov_b32 s0, 1
629-
; CHECK-NEXT: s_branch .LBB36_3
630-
; CHECK-NEXT: .LBB36_2: ; %if
666+
; CHECK-NEXT: s_branch .LBB38_3
667+
; CHECK-NEXT: .LBB38_2: ; %if
631668
; CHECK-NEXT: s_mov_b32 s0, 0
632-
; CHECK-NEXT: s_branch .LBB36_3
633-
; CHECK-NEXT: .LBB36_3:
669+
; CHECK-NEXT: s_branch .LBB38_3
670+
; CHECK-NEXT: .LBB38_3:
634671
%cmp = icmp ne ptr addrspace(4) @1, null
635672
br i1 %cmp, label %endif, label %if
636673

0 commit comments

Comments
 (0)