Skip to content
17 changes: 15 additions & 2 deletions llvm/lib/CodeGen/CodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1839,14 +1839,27 @@ bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
/// lose; some adjustment may be wanted there.
///
/// Return true if any changes are made.
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI,
const DataLayout &DL) {
if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
return false;

// Avoid sinking soft-FP comparisons, since this can move them into a loop.
if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
return false;

bool UsedInPhiOrCurrentBlock = any_of(Cmp->users(), [Cmp](User *U) {
return isa<PHINode>(U) ||
cast<Instruction>(U)->getParent() == Cmp->getParent();
});

// Avoid sinking larger than legal integer comparisons unless its ONLY used in
// another BB.
if (UsedInPhiOrCurrentBlock && Cmp->getOperand(0)->getType()->isIntegerTy() &&
Cmp->getOperand(0)->getType()->getScalarSizeInBits() >
DL.getLargestLegalIntTypeSizeInBits())
return false;

// Only insert a cmp in each block once.
DenseMap<BasicBlock *, CmpInst *> InsertedCmps;

Expand Down Expand Up @@ -2224,7 +2237,7 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
}

bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
if (sinkCmpExpression(Cmp, *TLI))
if (sinkCmpExpression(Cmp, *TLI, *DL))
return true;

if (combineToUAddWithOverflow(Cmp, ModifiedDT))
Expand Down
48 changes: 22 additions & 26 deletions llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
ret i64 %Q
}

; TODO? CGP sinks the compare before we have a chance to form the overflow intrinsic.
; Ensure CGP doesn't sink the compare before we have a chance to form the overflow intrinsic.

define i64 @uaddo4(i64 %a, i64 %b, i1 %c) nounwind ssp {
; RV32-LABEL: uaddo4:
Expand Down Expand Up @@ -1076,41 +1076,37 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) {
; RV32-NEXT: .cfi_offset s4, -24
; RV32-NEXT: .cfi_offset s5, -28
; RV32-NEXT: .cfi_offset s6, -32
; RV32-NEXT: mv s5, a5
; RV32-NEXT: mv s3, a1
; RV32-NEXT: mv s1, a5
; RV32-NEXT: mv s4, a1
; RV32-NEXT: andi a1, a5, 1
; RV32-NEXT: beqz a1, .LBB32_8
; RV32-NEXT: beqz a1, .LBB32_6
; RV32-NEXT: # %bb.1: # %t
; RV32-NEXT: mv s0, a4
; RV32-NEXT: mv s2, a3
; RV32-NEXT: mv s1, a2
; RV32-NEXT: mv s4, a0
; RV32-NEXT: beq s3, a3, .LBB32_3
; RV32-NEXT: mv s3, a3
; RV32-NEXT: mv s2, a2
; RV32-NEXT: mv s5, a0
; RV32-NEXT: beq s4, a3, .LBB32_3
; RV32-NEXT: # %bb.2: # %t
; RV32-NEXT: sltu s6, s3, s2
; RV32-NEXT: sltu s6, s4, s3
; RV32-NEXT: j .LBB32_4
; RV32-NEXT: .LBB32_3:
; RV32-NEXT: sltu s6, s4, s1
; RV32-NEXT: sltu s6, s5, s2
; RV32-NEXT: .LBB32_4: # %t
; RV32-NEXT: mv a0, s6
; RV32-NEXT: call call
; RV32-NEXT: beqz s6, .LBB32_8
; RV32-NEXT: beqz s6, .LBB32_6
; RV32-NEXT: # %bb.5: # %end
; RV32-NEXT: sltu a1, s4, s1
; RV32-NEXT: mv a0, a1
; RV32-NEXT: beq s3, s2, .LBB32_7
; RV32-NEXT: # %bb.6: # %end
; RV32-NEXT: sltu a0, s3, s2
; RV32-NEXT: .LBB32_7: # %end
; RV32-NEXT: sub a2, s3, s2
; RV32-NEXT: sub a3, s4, s1
; RV32-NEXT: sub a2, a2, a1
; RV32-NEXT: sw a3, 0(s0)
; RV32-NEXT: sw a2, 4(s0)
; RV32-NEXT: j .LBB32_9
; RV32-NEXT: .LBB32_8: # %f
; RV32-NEXT: mv a0, s5
; RV32-NEXT: .LBB32_9: # %f
; RV32-NEXT: sltu a0, s5, s2
; RV32-NEXT: sub a1, s4, s3
; RV32-NEXT: sub a2, s5, s2
; RV32-NEXT: sub a1, a1, a0
; RV32-NEXT: sw a2, 0(s0)
; RV32-NEXT: sw a1, 4(s0)
; RV32-NEXT: mv a0, s6
; RV32-NEXT: j .LBB32_7
; RV32-NEXT: .LBB32_6: # %f
; RV32-NEXT: mv a0, s1
; RV32-NEXT: .LBB32_7: # %f
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
Expand Down
68 changes: 16 additions & 52 deletions llvm/test/CodeGen/X86/pr166534.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,100 +7,64 @@
define void @pr166534(ptr %pa, ptr %pb, ptr %pc, ptr %pd) {
; SSE2-LABEL: pr166534:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movq (%rdi), %rax
; SSE2-NEXT: movq 8(%rdi), %r8
; SSE2-NEXT: movdqu (%rdi), %xmm0
; SSE2-NEXT: movq (%rsi), %r9
; SSE2-NEXT: movq 8(%rsi), %rdi
; SSE2-NEXT: movdqu (%rsi), %xmm1
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
; SSE2-NEXT: pmovmskb %xmm1, %esi
; SSE2-NEXT: xorl %r10d, %r10d
; SSE2-NEXT: xorl %eax, %eax
; SSE2-NEXT: cmpl $65535, %esi # imm = 0xFFFF
; SSE2-NEXT: sete %r10b
; SSE2-NEXT: orq %r10, (%rdx)
; SSE2-NEXT: sete %al
; SSE2-NEXT: orq %rax, (%rdx)
; SSE2-NEXT: cmpl $65535, %esi # imm = 0xFFFF
; SSE2-NEXT: jne .LBB0_2
; SSE2-NEXT: # %bb.1: # %if.then
; SSE2-NEXT: xorq %r9, %rax
; SSE2-NEXT: xorq %rdi, %r8
; SSE2-NEXT: xorl %edx, %edx
; SSE2-NEXT: orq %rax, %r8
; SSE2-NEXT: sete %dl
; SSE2-NEXT: orq %rdx, (%rcx)
; SSE2-NEXT: orq %rax, (%rcx)
; SSE2-NEXT: .LBB0_2: # %if.end
; SSE2-NEXT: retq
;
; SSE4-LABEL: pr166534:
; SSE4: # %bb.0: # %entry
; SSE4-NEXT: movq (%rdi), %rax
; SSE4-NEXT: movq 8(%rdi), %r8
; SSE4-NEXT: movdqu (%rdi), %xmm0
; SSE4-NEXT: movq (%rsi), %r9
; SSE4-NEXT: movq 8(%rsi), %rdi
; SSE4-NEXT: movdqu (%rsi), %xmm1
; SSE4-NEXT: pxor %xmm0, %xmm1
; SSE4-NEXT: xorl %esi, %esi
; SSE4-NEXT: xorl %eax, %eax
; SSE4-NEXT: ptest %xmm1, %xmm1
; SSE4-NEXT: sete %sil
; SSE4-NEXT: orq %rsi, (%rdx)
; SSE4-NEXT: sete %al
; SSE4-NEXT: orq %rax, (%rdx)
; SSE4-NEXT: ptest %xmm1, %xmm1
; SSE4-NEXT: jne .LBB0_2
; SSE4-NEXT: # %bb.1: # %if.then
; SSE4-NEXT: xorq %r9, %rax
; SSE4-NEXT: xorq %rdi, %r8
; SSE4-NEXT: xorl %edx, %edx
; SSE4-NEXT: orq %rax, %r8
; SSE4-NEXT: sete %dl
; SSE4-NEXT: orq %rdx, (%rcx)
; SSE4-NEXT: orq %rax, (%rcx)
; SSE4-NEXT: .LBB0_2: # %if.end
; SSE4-NEXT: retq
;
; AVX2-LABEL: pr166534:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: movq (%rdi), %rax
; AVX2-NEXT: movq 8(%rdi), %r8
; AVX2-NEXT: vmovdqu (%rdi), %xmm0
; AVX2-NEXT: movq (%rsi), %rdi
; AVX2-NEXT: vpxor (%rsi), %xmm0, %xmm0
; AVX2-NEXT: movq 8(%rsi), %rsi
; AVX2-NEXT: xorl %r9d, %r9d
; AVX2-NEXT: xorl %eax, %eax
; AVX2-NEXT: vptest %xmm0, %xmm0
; AVX2-NEXT: sete %r9b
; AVX2-NEXT: orq %r9, (%rdx)
; AVX2-NEXT: sete %al
; AVX2-NEXT: orq %rax, (%rdx)
; AVX2-NEXT: vptest %xmm0, %xmm0
; AVX2-NEXT: jne .LBB0_2
; AVX2-NEXT: # %bb.1: # %if.then
; AVX2-NEXT: xorq %rdi, %rax
; AVX2-NEXT: xorq %rsi, %r8
; AVX2-NEXT: xorl %edx, %edx
; AVX2-NEXT: orq %rax, %r8
; AVX2-NEXT: sete %dl
; AVX2-NEXT: orq %rdx, (%rcx)
; AVX2-NEXT: orq %rax, (%rcx)
; AVX2-NEXT: .LBB0_2: # %if.end
; AVX2-NEXT: retq
;
; AVX512-LABEL: pr166534:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: movq (%rdi), %rax
; AVX512-NEXT: movq 8(%rdi), %r8
; AVX512-NEXT: vmovdqu (%rdi), %xmm0
; AVX512-NEXT: movq (%rsi), %r9
; AVX512-NEXT: movq 8(%rsi), %rdi
; AVX512-NEXT: vpxor (%rsi), %xmm0, %xmm0
; AVX512-NEXT: xorl %esi, %esi
; AVX512-NEXT: xorl %eax, %eax
; AVX512-NEXT: vptest %xmm0, %xmm0
; AVX512-NEXT: sete %sil
; AVX512-NEXT: orq %rsi, (%rdx)
; AVX512-NEXT: sete %al
; AVX512-NEXT: orq %rax, (%rdx)
; AVX512-NEXT: vptest %xmm0, %xmm0
; AVX512-NEXT: jne .LBB0_2
; AVX512-NEXT: # %bb.1: # %if.then
; AVX512-NEXT: xorq %r9, %rax
; AVX512-NEXT: xorq %rdi, %r8
; AVX512-NEXT: xorl %edx, %edx
; AVX512-NEXT: orq %rax, %r8
; AVX512-NEXT: sete %dl
; AVX512-NEXT: orq %rdx, (%rcx)
; AVX512-NEXT: orq %rax, (%rcx)
; AVX512-NEXT: .LBB0_2: # %if.end
; AVX512-NEXT: retq
entry:
Expand Down
Loading