diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 0309e225d9df4..b6dd174f9be80 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1839,7 +1839,8 @@ bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
 /// lose; some adjustment may be wanted there.
 ///
 /// Return true if any changes are made.
-static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
+static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI,
+                              const DataLayout &DL) {
   if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
     return false;
 
@@ -1847,6 +1848,18 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
   if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
     return false;
 
+  bool UsedInPhiOrCurrentBlock = any_of(Cmp->users(), [Cmp](User *U) {
+    return isa<PHINode>(U) ||
+           cast<Instruction>(U)->getParent() == Cmp->getParent();
+  });
+
+  // Avoid sinking larger than legal integer comparisons unless its ONLY used in
+  // another BB.
+  if (UsedInPhiOrCurrentBlock && Cmp->getOperand(0)->getType()->isIntegerTy() &&
+      Cmp->getOperand(0)->getType()->getScalarSizeInBits() >
+          DL.getLargestLegalIntTypeSizeInBits())
+    return false;
+
   // Only insert a cmp in each block once.
   DenseMap<BasicBlock *, CmpInst *> InsertedCmps;
 
@@ -2224,7 +2237,7 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
 }
 
 bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
-  if (sinkCmpExpression(Cmp, *TLI))
+  if (sinkCmpExpression(Cmp, *TLI, *DL))
     return true;
 
   if (combineToUAddWithOverflow(Cmp, ModifiedDT))
diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
index ba6769b2aa3e1..0306bb18c2aed 100644
--- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
@@ -232,7 +232,7 @@ define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
   ret i64 %Q
 }
 
-; TODO? CGP sinks the compare before we have a chance to form the overflow intrinsic.
+; Ensure CGP doesn't sink the compare before we have a chance to form the overflow intrinsic.
 
 define i64 @uaddo4(i64 %a, i64 %b, i1 %c) nounwind ssp {
 ; RV32-LABEL: uaddo4:
@@ -1076,41 +1076,37 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) {
 ; RV32-NEXT:    .cfi_offset s4, -24
 ; RV32-NEXT:    .cfi_offset s5, -28
 ; RV32-NEXT:    .cfi_offset s6, -32
-; RV32-NEXT:    mv s5, a5
-; RV32-NEXT:    mv s3, a1
+; RV32-NEXT:    mv s1, a5
+; RV32-NEXT:    mv s4, a1
 ; RV32-NEXT:    andi a1, a5, 1
-; RV32-NEXT:    beqz a1, .LBB32_8
+; RV32-NEXT:    beqz a1, .LBB32_6
 ; RV32-NEXT:  # %bb.1: # %t
 ; RV32-NEXT:    mv s0, a4
-; RV32-NEXT:    mv s2, a3
-; RV32-NEXT:    mv s1, a2
-; RV32-NEXT:    mv s4, a0
-; RV32-NEXT:    beq s3, a3, .LBB32_3
+; RV32-NEXT:    mv s3, a3
+; RV32-NEXT:    mv s2, a2
+; RV32-NEXT:    mv s5, a0
+; RV32-NEXT:    beq s4, a3, .LBB32_3
 ; RV32-NEXT:  # %bb.2: # %t
-; RV32-NEXT:    sltu s6, s3, s2
+; RV32-NEXT:    sltu s6, s4, s3
 ; RV32-NEXT:    j .LBB32_4
 ; RV32-NEXT:  .LBB32_3:
-; RV32-NEXT:    sltu s6, s4, s1
+; RV32-NEXT:    sltu s6, s5, s2
 ; RV32-NEXT:  .LBB32_4: # %t
 ; RV32-NEXT:    mv a0, s6
 ; RV32-NEXT:    call call
-; RV32-NEXT:    beqz s6, .LBB32_8
+; RV32-NEXT:    beqz s6, .LBB32_6
 ; RV32-NEXT:  # %bb.5: # %end
-; RV32-NEXT:    sltu a1, s4, s1
-; RV32-NEXT:    mv a0, a1
-; RV32-NEXT:    beq s3, s2, .LBB32_7
-; RV32-NEXT:  # %bb.6: # %end
-; RV32-NEXT:    sltu a0, s3, s2
-; RV32-NEXT:  .LBB32_7: # %end
-; RV32-NEXT:    sub a2, s3, s2
-; RV32-NEXT:    sub a3, s4, s1
-; RV32-NEXT:    sub a2, a2, a1
-; RV32-NEXT:    sw a3, 0(s0)
-; RV32-NEXT:    sw a2, 4(s0)
-; RV32-NEXT:    j .LBB32_9
-; RV32-NEXT:  .LBB32_8: # %f
-; RV32-NEXT:    mv a0, s5
-; RV32-NEXT:  .LBB32_9: # %f
+; RV32-NEXT:    sltu a0, s5, s2
+; RV32-NEXT:    sub a1, s4, s3
+; RV32-NEXT:    sub a2, s5, s2
+; RV32-NEXT:    sub a1, a1, a0
+; RV32-NEXT:    sw a2, 0(s0)
+; RV32-NEXT:    sw a1, 4(s0)
+; RV32-NEXT:    mv a0, s6
+; RV32-NEXT:    j .LBB32_7
+; RV32-NEXT:  .LBB32_6: # %f
+; RV32-NEXT:    mv a0, s1
+; RV32-NEXT:  .LBB32_7: # %f
 ; RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/X86/pr166534.ll b/llvm/test/CodeGen/X86/pr166534.ll
index aef44cc3e40d0..162a0c93bfcf4 100644
--- a/llvm/test/CodeGen/X86/pr166534.ll
+++ b/llvm/test/CodeGen/X86/pr166534.ll
@@ -7,100 +7,64 @@
 define void @pr166534(ptr %pa, ptr %pb, ptr %pc, ptr %pd) {
 ; SSE2-LABEL: pr166534:
 ; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    movq (%rdi), %rax
-; SSE2-NEXT:    movq 8(%rdi), %r8
 ; SSE2-NEXT:    movdqu (%rdi), %xmm0
-; SSE2-NEXT:    movq (%rsi), %r9
-; SSE2-NEXT:    movq 8(%rsi), %rdi
 ; SSE2-NEXT:    movdqu (%rsi), %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
 ; SSE2-NEXT:    pmovmskb %xmm1, %esi
-; SSE2-NEXT:    xorl %r10d, %r10d
+; SSE2-NEXT:    xorl %eax, %eax
 ; SSE2-NEXT:    cmpl $65535, %esi # imm = 0xFFFF
-; SSE2-NEXT:    sete %r10b
-; SSE2-NEXT:    orq %r10, (%rdx)
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    orq %rax, (%rdx)
 ; SSE2-NEXT:    cmpl $65535, %esi # imm = 0xFFFF
 ; SSE2-NEXT:    jne .LBB0_2
 ; SSE2-NEXT:  # %bb.1: # %if.then
-; SSE2-NEXT:    xorq %r9, %rax
-; SSE2-NEXT:    xorq %rdi, %r8
-; SSE2-NEXT:    xorl %edx, %edx
-; SSE2-NEXT:    orq %rax, %r8
-; SSE2-NEXT:    sete %dl
-; SSE2-NEXT:    orq %rdx, (%rcx)
+; SSE2-NEXT:    orq %rax, (%rcx)
 ; SSE2-NEXT:  .LBB0_2: # %if.end
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: pr166534:
 ; SSE4:       # %bb.0: # %entry
-; SSE4-NEXT:    movq (%rdi), %rax
-; SSE4-NEXT:    movq 8(%rdi), %r8
 ; SSE4-NEXT:    movdqu (%rdi), %xmm0
-; SSE4-NEXT:    movq (%rsi), %r9
-; SSE4-NEXT:    movq 8(%rsi), %rdi
 ; SSE4-NEXT:    movdqu (%rsi), %xmm1
 ; SSE4-NEXT:    pxor %xmm0, %xmm1
-; SSE4-NEXT:    xorl %esi, %esi
+; SSE4-NEXT:    xorl %eax, %eax
 ; SSE4-NEXT:    ptest %xmm1, %xmm1
-; SSE4-NEXT:    sete %sil
-; SSE4-NEXT:    orq %rsi, (%rdx)
+; SSE4-NEXT:    sete %al
+; SSE4-NEXT:    orq %rax, (%rdx)
 ; SSE4-NEXT:    ptest %xmm1, %xmm1
 ; SSE4-NEXT:    jne .LBB0_2
 ; SSE4-NEXT:  # %bb.1: # %if.then
-; SSE4-NEXT:    xorq %r9, %rax
-; SSE4-NEXT:    xorq %rdi, %r8
-; SSE4-NEXT:    xorl %edx, %edx
-; SSE4-NEXT:    orq %rax, %r8
-; SSE4-NEXT:    sete %dl
-; SSE4-NEXT:    orq %rdx, (%rcx)
+; SSE4-NEXT:    orq %rax, (%rcx)
 ; SSE4-NEXT:  .LBB0_2: # %if.end
 ; SSE4-NEXT:    retq
 ;
 ; AVX2-LABEL: pr166534:
 ; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    movq (%rdi), %rax
-; AVX2-NEXT:    movq 8(%rdi), %r8
 ; AVX2-NEXT:    vmovdqu (%rdi), %xmm0
-; AVX2-NEXT:    movq (%rsi), %rdi
 ; AVX2-NEXT:    vpxor (%rsi), %xmm0, %xmm0
-; AVX2-NEXT:    movq 8(%rsi), %rsi
-; AVX2-NEXT:    xorl %r9d, %r9d
+; AVX2-NEXT:    xorl %eax, %eax
 ; AVX2-NEXT:    vptest %xmm0, %xmm0
-; AVX2-NEXT:    sete %r9b
-; AVX2-NEXT:    orq %r9, (%rdx)
+; AVX2-NEXT:    sete %al
+; AVX2-NEXT:    orq %rax, (%rdx)
 ; AVX2-NEXT:    vptest %xmm0, %xmm0
 ; AVX2-NEXT:    jne .LBB0_2
 ; AVX2-NEXT:  # %bb.1: # %if.then
-; AVX2-NEXT:    xorq %rdi, %rax
-; AVX2-NEXT:    xorq %rsi, %r8
-; AVX2-NEXT:    xorl %edx, %edx
-; AVX2-NEXT:    orq %rax, %r8
-; AVX2-NEXT:    sete %dl
-; AVX2-NEXT:    orq %rdx, (%rcx)
+; AVX2-NEXT:    orq %rax, (%rcx)
 ; AVX2-NEXT:  .LBB0_2: # %if.end
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: pr166534:
 ; AVX512:       # %bb.0: # %entry
-; AVX512-NEXT:    movq (%rdi), %rax
-; AVX512-NEXT:    movq 8(%rdi), %r8
 ; AVX512-NEXT:    vmovdqu (%rdi), %xmm0
-; AVX512-NEXT:    movq (%rsi), %r9
-; AVX512-NEXT:    movq 8(%rsi), %rdi
 ; AVX512-NEXT:    vpxor (%rsi), %xmm0, %xmm0
-; AVX512-NEXT:    xorl %esi, %esi
+; AVX512-NEXT:    xorl %eax, %eax
 ; AVX512-NEXT:    vptest %xmm0, %xmm0
-; AVX512-NEXT:    sete %sil
-; AVX512-NEXT:    orq %rsi, (%rdx)
+; AVX512-NEXT:    sete %al
+; AVX512-NEXT:    orq %rax, (%rdx)
 ; AVX512-NEXT:    vptest %xmm0, %xmm0
 ; AVX512-NEXT:    jne .LBB0_2
 ; AVX512-NEXT:  # %bb.1: # %if.then
-; AVX512-NEXT:    xorq %r9, %rax
-; AVX512-NEXT:    xorq %rdi, %r8
-; AVX512-NEXT:    xorl %edx, %edx
-; AVX512-NEXT:    orq %rax, %r8
-; AVX512-NEXT:    sete %dl
-; AVX512-NEXT:    orq %rdx, (%rcx)
+; AVX512-NEXT:    orq %rax, (%rcx)
 ; AVX512-NEXT:  .LBB0_2: # %if.end
 ; AVX512-NEXT:    retq
 entry: