From 74a1f98e1a01ded01cbb3c46574a60f16f032700 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 6 Nov 2025 14:09:45 +0000 Subject: [PATCH 1/4] [CodeGenPrepare] sinkCmpExpression - don't sink larger than legal integer comparisons A generic alternative to #166564 - make the assumption that expanding integer comparisons will be expensive if the are larger than the largest legal type Thumb codegen seems to suffer more than most Fixes #166534 --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 11 ++- llvm/test/CodeGen/ARM/consthoist-icmpimm.ll | 40 +++++---- llvm/test/CodeGen/RISCV/branch-on-zero.ll | 7 +- .../test/CodeGen/RISCV/overflow-intrinsics.ll | 82 +++++++++---------- .../RISCV/rvv/vxrm-insert-out-of-loop.ll | 2 +- .../X86/2012-01-10-UndefExceptionEdge.ll | 55 ++++++------- llvm/test/CodeGen/X86/pr166534.ll | 68 ++++----------- 7 files changed, 116 insertions(+), 149 deletions(-) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 0309e225d9df4..9434de8f02e0a 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1839,7 +1839,8 @@ bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) { /// lose; some adjustment may be wanted there. /// /// Return true if any changes are made. -static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) { +static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI, + const DataLayout &DL) { if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType()))) return false; @@ -1847,6 +1848,12 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) { if (TLI.useSoftFloat() && isa(Cmp)) return false; + // Avoid sinking larger than legal integer comparisons. + if (Cmp->getOperand(0)->getType()->isIntegerTy() && + Cmp->getOperand(0)->getType()->getScalarSizeInBits() > + DL.getLargestLegalIntTypeSizeInBits()) + return false; + // Only insert a cmp in each block once. DenseMap InsertedCmps; @@ -2224,7 +2231,7 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) { } bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) { - if (sinkCmpExpression(Cmp, *TLI)) + if (sinkCmpExpression(Cmp, *TLI, *DL)) return true; if (combineToUAddWithOverflow(Cmp, ModifiedDT)) diff --git a/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll b/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll index 16b7403bdb932..e64707769b809 100644 --- a/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll +++ b/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll @@ -39,46 +39,50 @@ define i32 @icmp64_sge_0(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) { ; ; CHECKV7M-LABEL: icmp64_sge_0: ; CHECKV7M: @ %bb.0: +; CHECKV7M-NEXT: mvns r0, r1 +; CHECKV7M-NEXT: lsrs r2, r0, #31 ; CHECKV7M-NEXT: ldr r0, [sp, #8] ; CHECKV7M-NEXT: lsls r0, r0, #31 -; CHECKV7M-NEXT: ldrd r2, r0, [sp] +; CHECKV7M-NEXT: ldrd r1, r0, [sp] ; CHECKV7M-NEXT: beq .LBB0_2 ; CHECKV7M-NEXT: @ %bb.1: @ %then +; CHECKV7M-NEXT: cmp r2, #0 +; CHECKV7M-NEXT: mov r2, r0 +; CHECKV7M-NEXT: it ne +; CHECKV7M-NEXT: movne r2, r1 ; CHECKV7M-NEXT: cmp.w r3, #-1 -; CHECKV7M-NEXT: mov r3, r0 -; CHECKV7M-NEXT: it gt -; CHECKV7M-NEXT: movgt r3, r2 -; CHECKV7M-NEXT: cmp.w r1, #-1 ; CHECKV7M-NEXT: it gt -; CHECKV7M-NEXT: movgt r0, r2 -; CHECKV7M-NEXT: add r0, r3 +; CHECKV7M-NEXT: movgt r0, r1 +; CHECKV7M-NEXT: add r0, r2 ; CHECKV7M-NEXT: bx lr ; CHECKV7M-NEXT: .LBB0_2: @ %else -; CHECKV7M-NEXT: cmp.w r1, #-1 -; CHECKV7M-NEXT: it gt -; CHECKV7M-NEXT: movgt r0, r2 +; CHECKV7M-NEXT: cmp r2, #0 +; CHECKV7M-NEXT: it ne +; CHECKV7M-NEXT: movne r0, r1 ; CHECKV7M-NEXT: bx lr ; ; CHECKV7A-LABEL: icmp64_sge_0: ; CHECKV7A: @ %bb.0: ; CHECKV7A-NEXT: ldr r2, [sp, #8] +; CHECKV7A-NEXT: mvns r1, r1 ; CHECKV7A-NEXT: ldrd r12, r0, [sp] +; CHECKV7A-NEXT: lsrs r1, r1, #31 ; CHECKV7A-NEXT: lsls r2, r2, #31 ; CHECKV7A-NEXT: beq .LBB0_2 ; CHECKV7A-NEXT: @ %bb.1: @ %then +; CHECKV7A-NEXT: cmp r1, #0 +; CHECKV7A-NEXT: mov r1, r0 +; CHECKV7A-NEXT: it ne +; CHECKV7A-NEXT: movne r1, r12 ; CHECKV7A-NEXT: cmp.w r3, #-1 -; CHECKV7A-NEXT: mov r2, r0 -; CHECKV7A-NEXT: it gt -; CHECKV7A-NEXT: movgt r2, r12 -; CHECKV7A-NEXT: cmp.w r1, #-1 ; CHECKV7A-NEXT: it gt ; CHECKV7A-NEXT: movgt r0, r12 -; CHECKV7A-NEXT: add r0, r2 +; CHECKV7A-NEXT: add r0, r1 ; CHECKV7A-NEXT: bx lr ; CHECKV7A-NEXT: .LBB0_2: @ %else -; CHECKV7A-NEXT: cmp.w r1, #-1 -; CHECKV7A-NEXT: it gt -; CHECKV7A-NEXT: movgt r0, r12 +; CHECKV7A-NEXT: cmp r1, #0 +; CHECKV7A-NEXT: it ne +; CHECKV7A-NEXT: movne r0, r12 ; CHECKV7A-NEXT: bx lr br i1 %c, label %then, label %else then: diff --git a/llvm/test/CodeGen/RISCV/branch-on-zero.ll b/llvm/test/CodeGen/RISCV/branch-on-zero.ll index 02aeebdeb3775..5524300fd36be 100644 --- a/llvm/test/CodeGen/RISCV/branch-on-zero.ll +++ b/llvm/test/CodeGen/RISCV/branch-on-zero.ll @@ -41,11 +41,12 @@ define i64 @optbranch_64(i64 %Arg) { ; RV32-NEXT: seqz a2, a0 ; RV32-NEXT: add a1, a1, a2 ; RV32-NEXT: or a2, a0, a1 -; RV32-NEXT: bnez a2, .LBB1_2 -; RV32-NEXT: # %bb.1: # %bb2 +; RV32-NEXT: beqz a2, .LBB1_2 +; RV32-NEXT: # %bb.1: # %bb3 +; RV32-NEXT: ret +; RV32-NEXT: .LBB1_2: # %bb2 ; RV32-NEXT: li a0, -1 ; RV32-NEXT: li a1, -1 -; RV32-NEXT: .LBB1_2: # %bb3 ; RV32-NEXT: ret ; ; RV64-LABEL: optbranch_64: diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll index ba6769b2aa3e1..3014c2a524a5e 100644 --- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll @@ -232,22 +232,22 @@ define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp { ret i64 %Q } -; TODO? CGP sinks the compare before we have a chance to form the overflow intrinsic. +; Ensure CGP doesn't sink the compare before we have a chance to form the overflow intrinsic. define i64 @uaddo4(i64 %a, i64 %b, i1 %c) nounwind ssp { ; RV32-LABEL: uaddo4: ; RV32: # %bb.0: # %entry -; RV32-NEXT: andi a4, a4, 1 -; RV32-NEXT: beqz a4, .LBB6_6 -; RV32-NEXT: # %bb.1: # %next ; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a0, a2, a0 ; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: beq a3, a1, .LBB6_3 -; RV32-NEXT: # %bb.2: # %next +; RV32-NEXT: andi a4, a4, 1 +; RV32-NEXT: beq a3, a1, .LBB6_2 +; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a0, a1, a3 -; RV32-NEXT: .LBB6_3: # %next +; RV32-NEXT: .LBB6_2: # %entry +; RV32-NEXT: beqz a4, .LBB6_6 +; RV32-NEXT: # %bb.3: # %next ; RV32-NEXT: bnez a0, .LBB6_5 ; RV32-NEXT: # %bb.4: # %next ; RV32-NEXT: li a2, 42 @@ -292,19 +292,19 @@ exit: define i64 @uaddo5(i64 %a, i64 %b, ptr %ptr, i1 %c) nounwind ssp { ; RV32-LABEL: uaddo5: ; RV32: # %bb.0: # %entry +; RV32-NEXT: add a6, a3, a1 +; RV32-NEXT: add a1, a2, a0 +; RV32-NEXT: sltu a0, a1, a2 +; RV32-NEXT: add a6, a6, a0 ; RV32-NEXT: andi a5, a5, 1 -; RV32-NEXT: add a1, a3, a1 -; RV32-NEXT: add a6, a2, a0 -; RV32-NEXT: sltu a0, a6, a2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: sw a6, 0(a4) -; RV32-NEXT: sw a1, 4(a4) +; RV32-NEXT: beq a6, a3, .LBB7_2 +; RV32-NEXT: # %bb.1: # %entry +; RV32-NEXT: sltu a0, a6, a3 +; RV32-NEXT: .LBB7_2: # %entry +; RV32-NEXT: sw a1, 0(a4) +; RV32-NEXT: sw a6, 4(a4) ; RV32-NEXT: beqz a5, .LBB7_6 -; RV32-NEXT: # %bb.1: # %next -; RV32-NEXT: beq a3, a1, .LBB7_3 -; RV32-NEXT: # %bb.2: # %next -; RV32-NEXT: sltu a0, a1, a3 -; RV32-NEXT: .LBB7_3: # %next +; RV32-NEXT: # %bb.3: # %next ; RV32-NEXT: bnez a0, .LBB7_5 ; RV32-NEXT: # %bb.4: # %next ; RV32-NEXT: li a2, 42 @@ -1076,41 +1076,37 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) { ; RV32-NEXT: .cfi_offset s4, -24 ; RV32-NEXT: .cfi_offset s5, -28 ; RV32-NEXT: .cfi_offset s6, -32 -; RV32-NEXT: mv s5, a5 -; RV32-NEXT: mv s3, a1 +; RV32-NEXT: mv s1, a5 +; RV32-NEXT: mv s4, a1 ; RV32-NEXT: andi a1, a5, 1 -; RV32-NEXT: beqz a1, .LBB32_8 +; RV32-NEXT: beqz a1, .LBB32_6 ; RV32-NEXT: # %bb.1: # %t ; RV32-NEXT: mv s0, a4 -; RV32-NEXT: mv s2, a3 -; RV32-NEXT: mv s1, a2 -; RV32-NEXT: mv s4, a0 -; RV32-NEXT: beq s3, a3, .LBB32_3 +; RV32-NEXT: mv s3, a3 +; RV32-NEXT: mv s2, a2 +; RV32-NEXT: mv s5, a0 +; RV32-NEXT: beq s4, a3, .LBB32_3 ; RV32-NEXT: # %bb.2: # %t -; RV32-NEXT: sltu s6, s3, s2 +; RV32-NEXT: sltu s6, s4, s3 ; RV32-NEXT: j .LBB32_4 ; RV32-NEXT: .LBB32_3: -; RV32-NEXT: sltu s6, s4, s1 +; RV32-NEXT: sltu s6, s5, s2 ; RV32-NEXT: .LBB32_4: # %t ; RV32-NEXT: mv a0, s6 ; RV32-NEXT: call call -; RV32-NEXT: beqz s6, .LBB32_8 +; RV32-NEXT: beqz s6, .LBB32_6 ; RV32-NEXT: # %bb.5: # %end -; RV32-NEXT: sltu a1, s4, s1 -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: beq s3, s2, .LBB32_7 -; RV32-NEXT: # %bb.6: # %end -; RV32-NEXT: sltu a0, s3, s2 -; RV32-NEXT: .LBB32_7: # %end -; RV32-NEXT: sub a2, s3, s2 -; RV32-NEXT: sub a3, s4, s1 -; RV32-NEXT: sub a2, a2, a1 -; RV32-NEXT: sw a3, 0(s0) -; RV32-NEXT: sw a2, 4(s0) -; RV32-NEXT: j .LBB32_9 -; RV32-NEXT: .LBB32_8: # %f -; RV32-NEXT: mv a0, s5 -; RV32-NEXT: .LBB32_9: # %f +; RV32-NEXT: sltu a0, s5, s2 +; RV32-NEXT: sub a1, s4, s3 +; RV32-NEXT: sub a2, s5, s2 +; RV32-NEXT: sub a1, a1, a0 +; RV32-NEXT: sw a2, 0(s0) +; RV32-NEXT: sw a1, 4(s0) +; RV32-NEXT: mv a0, s6 +; RV32-NEXT: j .LBB32_7 +; RV32-NEXT: .LBB32_6: # %f +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: .LBB32_7: # %f ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll index ead79fcf53d8b..67dac88b86560 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll @@ -59,6 +59,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV32-NEXT: sltu t5, a0, t6 ; RV32-NEXT: sltu t6, a2, t3 ; RV32-NEXT: and t5, t5, t6 +; RV32-NEXT: sltu t1, a6, t1 ; RV32-NEXT: sltu t4, a0, t4 ; RV32-NEXT: sltu t3, a4, t3 ; RV32-NEXT: and t3, t4, t3 @@ -66,7 +67,6 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV32-NEXT: srli t4, t4, 31 ; RV32-NEXT: or t4, t5, t4 ; RV32-NEXT: or t5, a1, a5 -; RV32-NEXT: sltu t1, a6, t1 ; RV32-NEXT: srli t5, t5, 31 ; RV32-NEXT: or t3, t3, t5 ; RV32-NEXT: or t3, t4, t3 diff --git a/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll b/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll index 1962ddebc2115..5a5feaa7734e9 100644 --- a/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll +++ b/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll @@ -34,16 +34,16 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr ; CHECK-NEXT: .cfi_offset %edi, -16 ; CHECK-NEXT: .cfi_offset %ebx, -12 ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: Ltmp0: -; CHECK-NEXT: ## implicit-def: $ebx +; CHECK-NEXT: Ltmp0: ## EH_LABEL +; CHECK-NEXT: ## implicit-def: $edi ; CHECK-NEXT: calll __Znam -; CHECK-NEXT: Ltmp1: +; CHECK-NEXT: Ltmp1: ## EH_LABEL ; CHECK-NEXT: ## %bb.1: ## %bb11 ; CHECK-NEXT: movl %eax, %esi -; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: movb $1, %bl +; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne LBB0_2 ; CHECK-NEXT: ## %bb.7: ## %bb31 ; CHECK-NEXT: ## implicit-def: $eax @@ -53,23 +53,20 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr ; CHECK-NEXT: ## Child Loop BB0_13 Depth 2 ; CHECK-NEXT: ## Child Loop BB0_16 Depth 3 ; CHECK-NEXT: ## Child Loop BB0_21 Depth 2 -; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne LBB0_9 ; CHECK-NEXT: ## %bb.10: ## %bb41 ; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1 -; CHECK-NEXT: Ltmp2: +; CHECK-NEXT: Ltmp2: ## EH_LABEL ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl %esi, (%esp) ; CHECK-NEXT: calll _Pjii -; CHECK-NEXT: Ltmp3: +; CHECK-NEXT: Ltmp3: ## EH_LABEL ; CHECK-NEXT: ## %bb.11: ## %bb42 ; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1 -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: decl %eax -; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne LBB0_18 ; CHECK-NEXT: ## %bb.12: ## %bb45.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1 @@ -78,8 +75,7 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr ; CHECK-NEXT: ## Parent Loop BB0_8 Depth=1 ; CHECK-NEXT: ## => This Loop Header: Depth=2 ; CHECK-NEXT: ## Child Loop BB0_16 Depth 3 -; CHECK-NEXT: movb $1, %cl -; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne LBB0_19 ; CHECK-NEXT: ## %bb.14: ## %bb48 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=2 @@ -88,14 +84,14 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=2 ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: movl %esi, %edx -; CHECK-NEXT: movl %edi, %ebx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload ; CHECK-NEXT: LBB0_16: ## %bb49 ; CHECK-NEXT: ## Parent Loop BB0_8 Depth=1 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=2 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=3 ; CHECK-NEXT: incl %ecx ; CHECK-NEXT: addl $4, %edx -; CHECK-NEXT: decl %ebx +; CHECK-NEXT: decl %edi ; CHECK-NEXT: jne LBB0_16 ; CHECK-NEXT: LBB0_17: ## %bb57 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=2 @@ -107,13 +103,12 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, (%esp) ; CHECK-NEXT: calll ___bzero -; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne LBB0_22 ; CHECK-NEXT: ## %bb.20: ## %bb61.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1 ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload ; CHECK-NEXT: LBB0_21: ## %bb61 ; CHECK-NEXT: ## Parent Loop BB0_8 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 @@ -126,24 +121,24 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr ; CHECK-NEXT: decl {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; CHECK-NEXT: jmp LBB0_8 ; CHECK-NEXT: LBB0_18: ## %bb43 -; CHECK-NEXT: Ltmp5: -; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: Ltmp5: ## EH_LABEL +; CHECK-NEXT: movl %esi, %edi ; CHECK-NEXT: calll _OnOverFlow -; CHECK-NEXT: Ltmp6: +; CHECK-NEXT: Ltmp6: ## EH_LABEL ; CHECK-NEXT: jmp LBB0_3 ; CHECK-NEXT: LBB0_2: ## %bb29 -; CHECK-NEXT: Ltmp7: -; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: Ltmp7: ## EH_LABEL +; CHECK-NEXT: movl %esi, %edi ; CHECK-NEXT: calll _OnOverFlow -; CHECK-NEXT: Ltmp8: +; CHECK-NEXT: Ltmp8: ## EH_LABEL ; CHECK-NEXT: LBB0_3: ## %bb30 ; CHECK-NEXT: ud2 ; CHECK-NEXT: LBB0_4: ## %bb20.loopexit -; CHECK-NEXT: Ltmp4: +; CHECK-NEXT: Ltmp4: ## EH_LABEL ; CHECK-NEXT: LBB0_9: -; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %esi, %edi ; CHECK-NEXT: LBB0_6: ## %bb23 -; CHECK-NEXT: testl %ebx, %ebx +; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: addl $28, %esp ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %edi @@ -151,7 +146,7 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl ; CHECK-NEXT: LBB0_5: ## %bb20.loopexit.split-lp -; CHECK-NEXT: Ltmp9: +; CHECK-NEXT: Ltmp9: ## EH_LABEL ; CHECK-NEXT: jmp LBB0_6 ; CHECK-NEXT: Lfunc_end0: bb: diff --git a/llvm/test/CodeGen/X86/pr166534.ll b/llvm/test/CodeGen/X86/pr166534.ll index aef44cc3e40d0..162a0c93bfcf4 100644 --- a/llvm/test/CodeGen/X86/pr166534.ll +++ b/llvm/test/CodeGen/X86/pr166534.ll @@ -7,100 +7,64 @@ define void @pr166534(ptr %pa, ptr %pb, ptr %pc, ptr %pd) { ; SSE2-LABEL: pr166534: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movq (%rdi), %rax -; SSE2-NEXT: movq 8(%rdi), %r8 ; SSE2-NEXT: movdqu (%rdi), %xmm0 -; SSE2-NEXT: movq (%rsi), %r9 -; SSE2-NEXT: movq 8(%rsi), %rdi ; SSE2-NEXT: movdqu (%rsi), %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %esi -; SSE2-NEXT: xorl %r10d, %r10d +; SSE2-NEXT: xorl %eax, %eax ; SSE2-NEXT: cmpl $65535, %esi # imm = 0xFFFF -; SSE2-NEXT: sete %r10b -; SSE2-NEXT: orq %r10, (%rdx) +; SSE2-NEXT: sete %al +; SSE2-NEXT: orq %rax, (%rdx) ; SSE2-NEXT: cmpl $65535, %esi # imm = 0xFFFF ; SSE2-NEXT: jne .LBB0_2 ; SSE2-NEXT: # %bb.1: # %if.then -; SSE2-NEXT: xorq %r9, %rax -; SSE2-NEXT: xorq %rdi, %r8 -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: orq %rax, %r8 -; SSE2-NEXT: sete %dl -; SSE2-NEXT: orq %rdx, (%rcx) +; SSE2-NEXT: orq %rax, (%rcx) ; SSE2-NEXT: .LBB0_2: # %if.end ; SSE2-NEXT: retq ; ; SSE4-LABEL: pr166534: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movq (%rdi), %rax -; SSE4-NEXT: movq 8(%rdi), %r8 ; SSE4-NEXT: movdqu (%rdi), %xmm0 -; SSE4-NEXT: movq (%rsi), %r9 -; SSE4-NEXT: movq 8(%rsi), %rdi ; SSE4-NEXT: movdqu (%rsi), %xmm1 ; SSE4-NEXT: pxor %xmm0, %xmm1 -; SSE4-NEXT: xorl %esi, %esi +; SSE4-NEXT: xorl %eax, %eax ; SSE4-NEXT: ptest %xmm1, %xmm1 -; SSE4-NEXT: sete %sil -; SSE4-NEXT: orq %rsi, (%rdx) +; SSE4-NEXT: sete %al +; SSE4-NEXT: orq %rax, (%rdx) ; SSE4-NEXT: ptest %xmm1, %xmm1 ; SSE4-NEXT: jne .LBB0_2 ; SSE4-NEXT: # %bb.1: # %if.then -; SSE4-NEXT: xorq %r9, %rax -; SSE4-NEXT: xorq %rdi, %r8 -; SSE4-NEXT: xorl %edx, %edx -; SSE4-NEXT: orq %rax, %r8 -; SSE4-NEXT: sete %dl -; SSE4-NEXT: orq %rdx, (%rcx) +; SSE4-NEXT: orq %rax, (%rcx) ; SSE4-NEXT: .LBB0_2: # %if.end ; SSE4-NEXT: retq ; ; AVX2-LABEL: pr166534: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: movq (%rdi), %rax -; AVX2-NEXT: movq 8(%rdi), %r8 ; AVX2-NEXT: vmovdqu (%rdi), %xmm0 -; AVX2-NEXT: movq (%rsi), %rdi ; AVX2-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; AVX2-NEXT: movq 8(%rsi), %rsi -; AVX2-NEXT: xorl %r9d, %r9d +; AVX2-NEXT: xorl %eax, %eax ; AVX2-NEXT: vptest %xmm0, %xmm0 -; AVX2-NEXT: sete %r9b -; AVX2-NEXT: orq %r9, (%rdx) +; AVX2-NEXT: sete %al +; AVX2-NEXT: orq %rax, (%rdx) ; AVX2-NEXT: vptest %xmm0, %xmm0 ; AVX2-NEXT: jne .LBB0_2 ; AVX2-NEXT: # %bb.1: # %if.then -; AVX2-NEXT: xorq %rdi, %rax -; AVX2-NEXT: xorq %rsi, %r8 -; AVX2-NEXT: xorl %edx, %edx -; AVX2-NEXT: orq %rax, %r8 -; AVX2-NEXT: sete %dl -; AVX2-NEXT: orq %rdx, (%rcx) +; AVX2-NEXT: orq %rax, (%rcx) ; AVX2-NEXT: .LBB0_2: # %if.end ; AVX2-NEXT: retq ; ; AVX512-LABEL: pr166534: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: movq (%rdi), %rax -; AVX512-NEXT: movq 8(%rdi), %r8 ; AVX512-NEXT: vmovdqu (%rdi), %xmm0 -; AVX512-NEXT: movq (%rsi), %r9 -; AVX512-NEXT: movq 8(%rsi), %rdi ; AVX512-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; AVX512-NEXT: xorl %esi, %esi +; AVX512-NEXT: xorl %eax, %eax ; AVX512-NEXT: vptest %xmm0, %xmm0 -; AVX512-NEXT: sete %sil -; AVX512-NEXT: orq %rsi, (%rdx) +; AVX512-NEXT: sete %al +; AVX512-NEXT: orq %rax, (%rdx) ; AVX512-NEXT: vptest %xmm0, %xmm0 ; AVX512-NEXT: jne .LBB0_2 ; AVX512-NEXT: # %bb.1: # %if.then -; AVX512-NEXT: xorq %r9, %rax -; AVX512-NEXT: xorq %rdi, %r8 -; AVX512-NEXT: xorl %edx, %edx -; AVX512-NEXT: orq %rax, %r8 -; AVX512-NEXT: sete %dl -; AVX512-NEXT: orq %rdx, (%rcx) +; AVX512-NEXT: orq %rax, (%rcx) ; AVX512-NEXT: .LBB0_2: # %if.end ; AVX512-NEXT: retq entry: From 1b91173d6560289e10e18567f39b9ad74b3cdd42 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 10 Nov 2025 11:41:10 +0000 Subject: [PATCH 2/4] branch-on-zero.ll - regenerate test --- llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll index 25dfb3c53a077..d08439ea3c8d3 100644 --- a/llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s +; RUN: opt -S -passes="require,function(codegenprepare)" < %s | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main-none-eabi" @@ -260,7 +260,8 @@ else: define i64 @lshr64(i64 %a) { ; CHECK-LABEL: @lshr64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[L:%.*]] = lshr i64 [[A:%.*]], 40 +; CHECK-NEXT: [[C:%.*]] = icmp ult i64 [[A:%.*]], 1099511627776 +; CHECK-NEXT: [[L:%.*]] = lshr i64 [[A]], 40 ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[L]], 0 ; CHECK-NEXT: br i1 [[TMP0]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: @@ -283,7 +284,8 @@ else: define i128 @lshr128(i128 %a) { ; CHECK-LABEL: @lshr128( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[L:%.*]] = lshr i128 [[A:%.*]], 65 +; CHECK-NEXT: [[C:%.*]] = icmp ult i128 [[A:%.*]], 36893488147419103232 +; CHECK-NEXT: [[L:%.*]] = lshr i128 [[A]], 65 ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i128 [[L]], 0 ; CHECK-NEXT: br i1 [[TMP0]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: From 4e9b617ac52ff37cdd35d9ac3823ceed9b5b809a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 10 Nov 2025 12:02:55 +0000 Subject: [PATCH 3/4] Only sink larger than legal integer comparisons if its only used in other blocks --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 10 ++++- llvm/test/CodeGen/ARM/consthoist-icmpimm.ll | 40 +++++++++---------- llvm/test/CodeGen/RISCV/branch-on-zero.ll | 7 ++-- .../test/CodeGen/RISCV/overflow-intrinsics.ll | 34 ++++++++-------- .../RISCV/rvv/vxrm-insert-out-of-loop.ll | 2 +- .../X86/2012-01-10-UndefExceptionEdge.ll | 35 +++++++++------- .../CodeGenPrepare/ARM/branch-on-zero.ll | 6 +-- 7 files changed, 69 insertions(+), 65 deletions(-) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 9434de8f02e0a..b6dd174f9be80 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1848,8 +1848,14 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI, if (TLI.useSoftFloat() && isa(Cmp)) return false; - // Avoid sinking larger than legal integer comparisons. - if (Cmp->getOperand(0)->getType()->isIntegerTy() && + bool UsedInPhiOrCurrentBlock = any_of(Cmp->users(), [Cmp](User *U) { + return isa(U) || + cast(U)->getParent() == Cmp->getParent(); + }); + + // Avoid sinking larger than legal integer comparisons unless its ONLY used in + // another BB. + if (UsedInPhiOrCurrentBlock && Cmp->getOperand(0)->getType()->isIntegerTy() && Cmp->getOperand(0)->getType()->getScalarSizeInBits() > DL.getLargestLegalIntTypeSizeInBits()) return false; diff --git a/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll b/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll index e64707769b809..16b7403bdb932 100644 --- a/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll +++ b/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll @@ -39,50 +39,46 @@ define i32 @icmp64_sge_0(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) { ; ; CHECKV7M-LABEL: icmp64_sge_0: ; CHECKV7M: @ %bb.0: -; CHECKV7M-NEXT: mvns r0, r1 -; CHECKV7M-NEXT: lsrs r2, r0, #31 ; CHECKV7M-NEXT: ldr r0, [sp, #8] ; CHECKV7M-NEXT: lsls r0, r0, #31 -; CHECKV7M-NEXT: ldrd r1, r0, [sp] +; CHECKV7M-NEXT: ldrd r2, r0, [sp] ; CHECKV7M-NEXT: beq .LBB0_2 ; CHECKV7M-NEXT: @ %bb.1: @ %then -; CHECKV7M-NEXT: cmp r2, #0 -; CHECKV7M-NEXT: mov r2, r0 -; CHECKV7M-NEXT: it ne -; CHECKV7M-NEXT: movne r2, r1 ; CHECKV7M-NEXT: cmp.w r3, #-1 +; CHECKV7M-NEXT: mov r3, r0 ; CHECKV7M-NEXT: it gt -; CHECKV7M-NEXT: movgt r0, r1 -; CHECKV7M-NEXT: add r0, r2 +; CHECKV7M-NEXT: movgt r3, r2 +; CHECKV7M-NEXT: cmp.w r1, #-1 +; CHECKV7M-NEXT: it gt +; CHECKV7M-NEXT: movgt r0, r2 +; CHECKV7M-NEXT: add r0, r3 ; CHECKV7M-NEXT: bx lr ; CHECKV7M-NEXT: .LBB0_2: @ %else -; CHECKV7M-NEXT: cmp r2, #0 -; CHECKV7M-NEXT: it ne -; CHECKV7M-NEXT: movne r0, r1 +; CHECKV7M-NEXT: cmp.w r1, #-1 +; CHECKV7M-NEXT: it gt +; CHECKV7M-NEXT: movgt r0, r2 ; CHECKV7M-NEXT: bx lr ; ; CHECKV7A-LABEL: icmp64_sge_0: ; CHECKV7A: @ %bb.0: ; CHECKV7A-NEXT: ldr r2, [sp, #8] -; CHECKV7A-NEXT: mvns r1, r1 ; CHECKV7A-NEXT: ldrd r12, r0, [sp] -; CHECKV7A-NEXT: lsrs r1, r1, #31 ; CHECKV7A-NEXT: lsls r2, r2, #31 ; CHECKV7A-NEXT: beq .LBB0_2 ; CHECKV7A-NEXT: @ %bb.1: @ %then -; CHECKV7A-NEXT: cmp r1, #0 -; CHECKV7A-NEXT: mov r1, r0 -; CHECKV7A-NEXT: it ne -; CHECKV7A-NEXT: movne r1, r12 ; CHECKV7A-NEXT: cmp.w r3, #-1 +; CHECKV7A-NEXT: mov r2, r0 +; CHECKV7A-NEXT: it gt +; CHECKV7A-NEXT: movgt r2, r12 +; CHECKV7A-NEXT: cmp.w r1, #-1 ; CHECKV7A-NEXT: it gt ; CHECKV7A-NEXT: movgt r0, r12 -; CHECKV7A-NEXT: add r0, r1 +; CHECKV7A-NEXT: add r0, r2 ; CHECKV7A-NEXT: bx lr ; CHECKV7A-NEXT: .LBB0_2: @ %else -; CHECKV7A-NEXT: cmp r1, #0 -; CHECKV7A-NEXT: it ne -; CHECKV7A-NEXT: movne r0, r12 +; CHECKV7A-NEXT: cmp.w r1, #-1 +; CHECKV7A-NEXT: it gt +; CHECKV7A-NEXT: movgt r0, r12 ; CHECKV7A-NEXT: bx lr br i1 %c, label %then, label %else then: diff --git a/llvm/test/CodeGen/RISCV/branch-on-zero.ll b/llvm/test/CodeGen/RISCV/branch-on-zero.ll index 5524300fd36be..02aeebdeb3775 100644 --- a/llvm/test/CodeGen/RISCV/branch-on-zero.ll +++ b/llvm/test/CodeGen/RISCV/branch-on-zero.ll @@ -41,12 +41,11 @@ define i64 @optbranch_64(i64 %Arg) { ; RV32-NEXT: seqz a2, a0 ; RV32-NEXT: add a1, a1, a2 ; RV32-NEXT: or a2, a0, a1 -; RV32-NEXT: beqz a2, .LBB1_2 -; RV32-NEXT: # %bb.1: # %bb3 -; RV32-NEXT: ret -; RV32-NEXT: .LBB1_2: # %bb2 +; RV32-NEXT: bnez a2, .LBB1_2 +; RV32-NEXT: # %bb.1: # %bb2 ; RV32-NEXT: li a0, -1 ; RV32-NEXT: li a1, -1 +; RV32-NEXT: .LBB1_2: # %bb3 ; RV32-NEXT: ret ; ; RV64-LABEL: optbranch_64: diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll index 3014c2a524a5e..0306bb18c2aed 100644 --- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll @@ -237,17 +237,17 @@ define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp { define i64 @uaddo4(i64 %a, i64 %b, i1 %c) nounwind ssp { ; RV32-LABEL: uaddo4: ; RV32: # %bb.0: # %entry +; RV32-NEXT: andi a4, a4, 1 +; RV32-NEXT: beqz a4, .LBB6_6 +; RV32-NEXT: # %bb.1: # %next ; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a0, a2, a0 ; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: andi a4, a4, 1 -; RV32-NEXT: beq a3, a1, .LBB6_2 -; RV32-NEXT: # %bb.1: # %entry +; RV32-NEXT: beq a3, a1, .LBB6_3 +; RV32-NEXT: # %bb.2: # %next ; RV32-NEXT: sltu a0, a1, a3 -; RV32-NEXT: .LBB6_2: # %entry -; RV32-NEXT: beqz a4, .LBB6_6 -; RV32-NEXT: # %bb.3: # %next +; RV32-NEXT: .LBB6_3: # %next ; RV32-NEXT: bnez a0, .LBB6_5 ; RV32-NEXT: # %bb.4: # %next ; RV32-NEXT: li a2, 42 @@ -292,19 +292,19 @@ exit: define i64 @uaddo5(i64 %a, i64 %b, ptr %ptr, i1 %c) nounwind ssp { ; RV32-LABEL: uaddo5: ; RV32: # %bb.0: # %entry -; RV32-NEXT: add a6, a3, a1 -; RV32-NEXT: add a1, a2, a0 -; RV32-NEXT: sltu a0, a1, a2 -; RV32-NEXT: add a6, a6, a0 ; RV32-NEXT: andi a5, a5, 1 -; RV32-NEXT: beq a6, a3, .LBB7_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: sltu a0, a6, a3 -; RV32-NEXT: .LBB7_2: # %entry -; RV32-NEXT: sw a1, 0(a4) -; RV32-NEXT: sw a6, 4(a4) +; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: add a6, a2, a0 +; RV32-NEXT: sltu a0, a6, a2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: sw a6, 0(a4) +; RV32-NEXT: sw a1, 4(a4) ; RV32-NEXT: beqz a5, .LBB7_6 -; RV32-NEXT: # %bb.3: # %next +; RV32-NEXT: # %bb.1: # %next +; RV32-NEXT: beq a3, a1, .LBB7_3 +; RV32-NEXT: # %bb.2: # %next +; RV32-NEXT: sltu a0, a1, a3 +; RV32-NEXT: .LBB7_3: # %next ; RV32-NEXT: bnez a0, .LBB7_5 ; RV32-NEXT: # %bb.4: # %next ; RV32-NEXT: li a2, 42 diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll index 67dac88b86560..ead79fcf53d8b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll @@ -59,7 +59,6 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV32-NEXT: sltu t5, a0, t6 ; RV32-NEXT: sltu t6, a2, t3 ; RV32-NEXT: and t5, t5, t6 -; RV32-NEXT: sltu t1, a6, t1 ; RV32-NEXT: sltu t4, a0, t4 ; RV32-NEXT: sltu t3, a4, t3 ; RV32-NEXT: and t3, t4, t3 @@ -67,6 +66,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV32-NEXT: srli t4, t4, 31 ; RV32-NEXT: or t4, t5, t4 ; RV32-NEXT: or t5, a1, a5 +; RV32-NEXT: sltu t1, a6, t1 ; RV32-NEXT: srli t5, t5, 31 ; RV32-NEXT: or t3, t3, t5 ; RV32-NEXT: or t3, t4, t3 diff --git a/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll b/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll index 5a5feaa7734e9..f2b4c49b1dbcd 100644 --- a/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll +++ b/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll @@ -34,16 +34,16 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr ; CHECK-NEXT: .cfi_offset %edi, -16 ; CHECK-NEXT: .cfi_offset %ebx, -12 ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: Ltmp0: ## EH_LABEL -; CHECK-NEXT: ## implicit-def: $edi +; CHECK-NEXT: ## implicit-def: $ebx ; CHECK-NEXT: calll __Znam ; CHECK-NEXT: Ltmp1: ## EH_LABEL ; CHECK-NEXT: ## %bb.1: ## %bb11 ; CHECK-NEXT: movl %eax, %esi -; CHECK-NEXT: movb $1, %bl -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne LBB0_2 ; CHECK-NEXT: ## %bb.7: ## %bb31 ; CHECK-NEXT: ## implicit-def: $eax @@ -53,7 +53,8 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr ; CHECK-NEXT: ## Child Loop BB0_13 Depth 2 ; CHECK-NEXT: ## Child Loop BB0_16 Depth 3 ; CHECK-NEXT: ## Child Loop BB0_21 Depth 2 -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne LBB0_9 ; CHECK-NEXT: ## %bb.10: ## %bb41 ; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1 @@ -66,7 +67,9 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr ; CHECK-NEXT: Ltmp3: ## EH_LABEL ; CHECK-NEXT: ## %bb.11: ## %bb42 ; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1 -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: decl %eax +; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: jne LBB0_18 ; CHECK-NEXT: ## %bb.12: ## %bb45.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1 @@ -75,7 +78,8 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr ; CHECK-NEXT: ## Parent Loop BB0_8 Depth=1 ; CHECK-NEXT: ## => This Loop Header: Depth=2 ; CHECK-NEXT: ## Child Loop BB0_16 Depth 3 -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: movb $1, %cl +; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: jne LBB0_19 ; CHECK-NEXT: ## %bb.14: ## %bb48 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=2 @@ -84,14 +88,14 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=2 ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: movl %esi, %edx -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload +; CHECK-NEXT: movl %edi, %ebx ; CHECK-NEXT: LBB0_16: ## %bb49 ; CHECK-NEXT: ## Parent Loop BB0_8 Depth=1 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=2 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=3 ; CHECK-NEXT: incl %ecx ; CHECK-NEXT: addl $4, %edx -; CHECK-NEXT: decl %edi +; CHECK-NEXT: decl %ebx ; CHECK-NEXT: jne LBB0_16 ; CHECK-NEXT: LBB0_17: ## %bb57 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=2 @@ -103,12 +107,13 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, (%esp) ; CHECK-NEXT: calll ___bzero -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne LBB0_22 ; CHECK-NEXT: ## %bb.20: ## %bb61.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1 ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; CHECK-NEXT: movl %edi, %ecx ; CHECK-NEXT: LBB0_21: ## %bb61 ; CHECK-NEXT: ## Parent Loop BB0_8 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 @@ -122,13 +127,13 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr ; CHECK-NEXT: jmp LBB0_8 ; CHECK-NEXT: LBB0_18: ## %bb43 ; CHECK-NEXT: Ltmp5: ## EH_LABEL -; CHECK-NEXT: movl %esi, %edi +; CHECK-NEXT: movl %esi, %ebx ; CHECK-NEXT: calll _OnOverFlow ; CHECK-NEXT: Ltmp6: ## EH_LABEL ; CHECK-NEXT: jmp LBB0_3 ; CHECK-NEXT: LBB0_2: ## %bb29 ; CHECK-NEXT: Ltmp7: ## EH_LABEL -; CHECK-NEXT: movl %esi, %edi +; CHECK-NEXT: movl %esi, %ebx ; CHECK-NEXT: calll _OnOverFlow ; CHECK-NEXT: Ltmp8: ## EH_LABEL ; CHECK-NEXT: LBB0_3: ## %bb30 @@ -136,9 +141,9 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr ; CHECK-NEXT: LBB0_4: ## %bb20.loopexit ; CHECK-NEXT: Ltmp4: ## EH_LABEL ; CHECK-NEXT: LBB0_9: -; CHECK-NEXT: movl %esi, %edi +; CHECK-NEXT: movl %esi, %ebx ; CHECK-NEXT: LBB0_6: ## %bb23 -; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: testl %ebx, %ebx ; CHECK-NEXT: addl $28, %esp ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %edi diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll index d08439ea3c8d3..fbb24daae39e5 100644 --- a/llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll @@ -260,8 +260,7 @@ else: define i64 @lshr64(i64 %a) { ; CHECK-LABEL: @lshr64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = icmp ult i64 [[A:%.*]], 1099511627776 -; CHECK-NEXT: [[L:%.*]] = lshr i64 [[A]], 40 +; CHECK-NEXT: [[L:%.*]] = lshr i64 [[A:%.*]], 40 ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[L]], 0 ; CHECK-NEXT: br i1 [[TMP0]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: @@ -284,8 +283,7 @@ else: define i128 @lshr128(i128 %a) { ; CHECK-LABEL: @lshr128( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = icmp ult i128 [[A:%.*]], 36893488147419103232 -; CHECK-NEXT: [[L:%.*]] = lshr i128 [[A]], 65 +; CHECK-NEXT: [[L:%.*]] = lshr i128 [[A:%.*]], 65 ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i128 [[L]], 0 ; CHECK-NEXT: br i1 [[TMP0]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: From fc9e00a5ca542c31c27aeef752ed0e38e5af3411 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 10 Nov 2025 12:07:56 +0000 Subject: [PATCH 4/4] Update branch-on-zero.ll remove unnecessary edit --- llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll index fbb24daae39e5..25dfb3c53a077 100644 --- a/llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes="require,function(codegenprepare)" < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main-none-eabi"