From 03aa99e178b06c7e70330ca5ba8b9eb932183cce Mon Sep 17 00:00:00 2001 From: Jin Huang Date: Mon, 3 Nov 2025 20:47:31 +0000 Subject: [PATCH] [profcheck] Add unknown branch weights to expand icmp loop. --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 12 ++++++-- .../AArch64/expand-atomicrmw-xchg-fp.ll | 28 ++++++++++++++----- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 53f1cfe24a68d..6412949948c07 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -38,6 +38,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/MemoryModelRelaxationAnnotations.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -1259,8 +1260,7 @@ Value *AtomicExpandImpl::insertRMWLLSCLoop( BasicBlock *BB = Builder.GetInsertBlock(); Function *F = BB->getParent(); - assert(AddrAlign >= - F->getDataLayout().getTypeStoreSize(ResultTy) && + assert(AddrAlign >= F->getDataLayout().getTypeStoreSize(ResultTy) && "Expected at least natural alignment at this point."); // Given: atomicrmw some_op iN* %addr, iN %incr ordering @@ -1295,7 +1295,13 @@ Value *AtomicExpandImpl::insertRMWLLSCLoop( TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); Value *TryAgain = Builder.CreateICmpNE( StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); - Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); + + Instruction *CondBr = Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); + + // Atomic RMW expands to a Load-linked / Store-Conditional loop, because it is + // hard to predict precise branch weigths we mark the branch as "unknown" + // (50/50) to prevent misleading optimizations. + setExplicitlyUnknownBranchWeightsIfProfiled(*CondBr, *F, DEBUG_TYPE); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); return Loaded; diff --git a/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll b/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll index 95a52aa0f7f52..b509b2469cfdc 100644 --- a/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll +++ b/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll @@ -1,8 +1,8 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt -codegen-opt-level=1 -S -mtriple=aarch64-- -passes=atomic-expand %s | FileCheck %s ; RUN: opt -codegen-opt-level=1 -S -mtriple=aarch64-- -mattr=+outline-atomics -passes=atomic-expand %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS -define void @atomic_swap_f16(ptr %ptr, half %val) nounwind { +define void @atomic_swap_f16(ptr %ptr, half %val) !prof !0 { ; CHECK-LABEL: @atomic_swap_f16( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[VAL:%.*]] to i16 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] @@ -12,7 +12,7 @@ define void @atomic_swap_f16(ptr %ptr, half %val) nounwind { ; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP1]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.aarch64.stxr.p0(i64 [[TMP4]], ptr elementtype(i16) [[PTR]]) ; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP5]], 0 -; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to half ; CHECK-NEXT: ret void @@ -27,7 +27,7 @@ define void @atomic_swap_f16(ptr %ptr, half %val) nounwind { ret void } -define void @atomic_swap_f32(ptr %ptr, float %val) nounwind { +define void @atomic_swap_f32(ptr %ptr, float %val) nounwind !prof !0 { ; CHECK-LABEL: @atomic_swap_f32( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[VAL:%.*]] to i32 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] @@ -37,7 +37,7 @@ define void @atomic_swap_f32(ptr %ptr, float %val) nounwind { ; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.aarch64.stxr.p0(i64 [[TMP4]], ptr elementtype(i32) [[PTR]]) ; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP5]], 0 -; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]], !prof [[PROF1]] ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to float ; CHECK-NEXT: ret void @@ -52,7 +52,7 @@ define void @atomic_swap_f32(ptr %ptr, float %val) nounwind { ret void } -define void @atomic_swap_f64(ptr %ptr, double %val) nounwind { +define void @atomic_swap_f64(ptr %ptr, double %val) nounwind !prof !0 { ; CHECK-LABEL: @atomic_swap_f64( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[VAL:%.*]] to i64 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] @@ -60,7 +60,7 @@ define void @atomic_swap_f64(ptr %ptr, double %val) nounwind { ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.aarch64.ldaxr.p0(ptr elementtype(i64) [[PTR:%.*]]) ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.aarch64.stxr.p0(i64 [[TMP1]], ptr elementtype(i64) [[PTR]]) ; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP3]], 0 -; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]], !prof [[PROF1]] ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP2]] to double ; CHECK-NEXT: ret void @@ -74,3 +74,17 @@ define void @atomic_swap_f64(ptr %ptr, double %val) nounwind { %t1 = atomicrmw xchg ptr %ptr, double %val acquire ret void } + +!0 = !{!"function_entry_count", i64 1000} +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nofree nounwind willreturn } +;. +; OUTLINE-ATOMICS: attributes #[[ATTR0:[0-9]+]] = { "target-features"="+outline-atomics" } +; OUTLINE-ATOMICS: attributes #[[ATTR1:[0-9]+]] = { nounwind "target-features"="+outline-atomics" } +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF1]] = !{!"unknown", !"atomic-expand"} +;. +; OUTLINE-ATOMICS: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000} +;.