Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions clang/lib/Basic/Targets/X86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasCF = true;
} else if (Feature == "+zu") {
HasZU = true;
} else if (Feature == "+branch-hint") {
HasBranchHint = true;
}

X86SSEEnum Level = llvm::StringSwitch<X86SSEEnum>(Feature)
Expand Down Expand Up @@ -1292,6 +1294,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("nf", HasNF)
.Case("cf", HasCF)
.Case("zu", HasZU)
.Case("branch-hint", HasBranchHint)
.Default(false);
}

Expand Down
1 change: 1 addition & 0 deletions clang/lib/Basic/Targets/X86.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasCF = false;
bool HasZU = false;
bool HasInlineAsmUseGPR32 = false;
bool HasBranchHint = false;

protected:
llvm::X86::CPUKind CPU = llvm::X86::CK_None;
Expand Down
13 changes: 10 additions & 3 deletions llvm/lib/Target/X86/X86.td
Original file line number Diff line number Diff line change
Expand Up @@ -749,6 +749,11 @@ def TuningUseGLMDivSqrtCosts
: SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
"Use Goldmont specific floating point div/sqrt costs">;

// Starting with Redwood Cove architecture, the branch has branch taken hint
// (i.e., instruction prefix 3EH).
def TuningBranchHint: SubtargetFeature<"branch-hint", "HasBranchHint", "true",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why have you added this as a Tuning bit and not a Feature bit? Tuning bits are guaranteed to work on all applicable CPUs - they just might not be performant.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can always add 0x3e to JCC in 64-bit mode. So it does work on all applicable CPUs.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See Intel SDM

— Branch hints1:
• 2EH—Branch not taken (used only with Jcc instructions).
• 3EH—Branch taken (used only with Jcc instructions).

1. Some earlier microarchitectures used these as branch hints, but recent generations have not and they are reserved for future hint usage.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@KanRobert, Thank you for helping this.

"Target has branch hint feature">;

//===----------------------------------------------------------------------===//
// X86 CPU Families
// TODO: Remove these - use general tuning features to determine codegen.
Expand Down Expand Up @@ -1124,6 +1129,8 @@ def ProcessorFeatures {
FeaturePREFETCHI];
list<SubtargetFeature> GNRFeatures =
!listconcat(SPRFeatures, GNRAdditionalFeatures);
list<SubtargetFeature> GNRAdditionalTuning = [TuningBranchHint];
list<SubtargetFeature> GNRTuning = !listconcat(SPRTuning, GNRAdditionalTuning);

// Graniterapids D
list<SubtargetFeature> GNRDAdditionalFeatures = [FeatureAMXCOMPLEX];
Expand Down Expand Up @@ -1815,12 +1822,12 @@ def : ProcModel<"pantherlake", AlderlakePModel,
def : ProcModel<"clearwaterforest", AlderlakePModel,
ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>;
def : ProcModel<"graniterapids", SapphireRapidsModel,
ProcessorFeatures.GNRFeatures, ProcessorFeatures.SPRTuning>;
ProcessorFeatures.GNRFeatures, ProcessorFeatures.GNRTuning>;
def : ProcModel<"emeraldrapids", SapphireRapidsModel,
ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
ProcessorFeatures.SPRFeatures, ProcessorFeatures.GNRTuning>;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't Emerald Rapids based on Raptor Lake microarchitecture (and not Redwood Cove)? If so, wouldn't branch hints be unsupported for this subtarget?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice catch. I'll correct it.

foreach P = ["graniterapids-d", "graniterapids_d"] in {
def : ProcModel<P, SapphireRapidsModel,
ProcessorFeatures.GNRDFeatures, ProcessorFeatures.SPRTuning>;
ProcessorFeatures.GNRDFeatures, ProcessorFeatures.GNRTuning>;
}

// AMD CPUs.
Expand Down
24 changes: 24 additions & 0 deletions llvm/lib/Target/X86/X86MCInstLower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
Expand Down Expand Up @@ -54,6 +55,14 @@

using namespace llvm;

static cl::opt<bool> EnableBranchHint("enable-branch-hint",
cl::desc("Enable branch hint."),
cl::init(false), cl::Hidden);
static cl::opt<unsigned> BranchHintProbabilityThreshold(
"branch-hint-probability-threshold",
cl::desc("The probability threshold of enabling branch hint."),
cl::init(50), cl::Hidden);

namespace {

/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
Expand Down Expand Up @@ -2444,6 +2453,21 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11))
EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX));
break;
case X86::JCC_1:
// Two instruction prefixes (2EH for branch not-taken and 3EH for branch
// taken) are used as branch hints. Here we add branch taken prefix for
// jump instruction with higher probability than threshold.
if (getSubtarget().hasBranchHint() && EnableBranchHint) {
const MachineBranchProbabilityInfo *MBPI =
&getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
BranchProbability EdgeProb =
MBPI->getEdgeProbability(MI->getParent(), DestBB);
BranchProbability Threshold(BranchHintProbabilityThreshold, 100);
if (EdgeProb > Threshold)
EmitAndCountInstruction(MCInstBuilder(X86::DS_PREFIX));
}
break;
}

MCInst TmpInst;
Expand Down
75 changes: 75 additions & 0 deletions llvm/test/CodeGen/X86/branch-hint.ll
Copy link
Contributor

@KanRobert KanRobert Jul 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is too verbose, not robust and miss coverage. I would suggest the following

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=x86_64 -mattr=+branch-hint -branch-hint | FileCheck %s
; RUN: llc < %s -mtriple=x86_64 -mattr=+branch-hint -branch-hint -branch-hint-probability-threshold=50 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64 -mattr=+branch-hint -branch-hint -branch-hint-probability-threshold=60 -tail-dup-placement=false | FileCheck --check-prefix=TH60 %s

define void @p51(i32 %x, ptr %p) {
; CHECK-LABEL: p51:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    testl %edi, %edi
; CHECK-NEXT:    ds
; CHECK-NEXT:    je .LBB0_2
; CHECK-NEXT:  # %bb.1: # %if.then
; CHECK-NEXT:    movl %edi, (%rsi)
; CHECK-NEXT:  .LBB0_2: # %if.end
; CHECK-NEXT:    retq
;
; TH60-LABEL: p51:
; TH60:       # %bb.0: # %entry
; TH60-NEXT:    testl %edi, %edi
; TH60-NEXT:    je .LBB0_2
; TH60-NEXT:  # %bb.1: # %if.then
; TH60-NEXT:    movl %edi, (%rsi)
; TH60-NEXT:  .LBB0_2: # %if.end
; TH60-NEXT:    retq
entry:
  %tobool.not = icmp eq i32 %x, 0
  br i1 %tobool.not, label %if.end, label %if.then, !prof !0

if.then:
  store i32 %x, ptr %p, align 4
  br label %if.end

if.end:
  ret void
}

define void @p61(i32 %x, ptr %p) {
; CHECK-LABEL: p61:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    testl %edi, %edi
; CHECK-NEXT:    jne .LBB1_1
; CHECK-NEXT:  # %bb.2: # %if.end
; CHECK-NEXT:    retq
; CHECK-NEXT:  .LBB1_1: # %if.then
; CHECK-NEXT:    movl %edi, (%rsi)
; CHECK-NEXT:    retq
;
; TH60-LABEL: p61:
; TH60:       # %bb.0: # %entry
; TH60-NEXT:    testl %edi, %edi
; TH60-NEXT:    ds
; TH60-NEXT:    je .LBB1_2
; TH60-NEXT:  # %bb.1: # %if.then
; TH60-NEXT:    movl %edi, (%rsi)
; TH60-NEXT:  .LBB1_2: # %if.end
; TH60-NEXT:    retq
entry:
  %tobool.not = icmp eq i32 %x, 0
  br i1 %tobool.not, label %if.end, label %if.then, !prof !1

if.then:
  store i32 %x, ptr %p, align 4
  br label %if.end

if.end:
  ret void
}

!0 = !{!"branch_weights", i32 51, i32 49}
!1 = !{!"branch_weights", i32 61, i32 39}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. It's simpler. What's the metadata for "!prof !0" and "!prof !1"?

Copy link
Contributor

@KanRobert KanRobert Jul 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated. We should tell the branch probability explicitly instead of letting other passes to guess the probability.

Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=x86_64 -mattr=+branch-hint -enable-branch-hint | FileCheck %s
; RUN: llc < %s -mtriple=x86_64 -mattr=+branch-hint -enable-branch-hint -branch-hint-probability-threshold=50 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64 -mattr=+branch-hint -enable-branch-hint -branch-hint-probability-threshold=60 -tail-dup-placement=false | FileCheck --check-prefix=TH60 %s


; Design: Add DS segment override prefix for condition branch who has high
; probability to take (which is greater than the probability threshold of
; enabling branch hint).

define void @p51(i32 %x, ptr %p) {
; CHECK-LABEL: p51:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: ds
; CHECK-NEXT: je .LBB0_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: movl %edi, (%rsi)
; CHECK-NEXT: .LBB0_2: # %if.end
; CHECK-NEXT: retq
;
; TH60-LABEL: p51:
; TH60: # %bb.0: # %entry
; TH60-NEXT: testl %edi, %edi
; TH60-NEXT: je .LBB0_2
; TH60-NEXT: # %bb.1: # %if.then
; TH60-NEXT: movl %edi, (%rsi)
; TH60-NEXT: .LBB0_2: # %if.end
; TH60-NEXT: retq
entry:
%tobool.not = icmp eq i32 %x, 0
br i1 %tobool.not, label %if.end, label %if.then, !prof !0

if.then:
store i32 %x, ptr %p, align 4
br label %if.end

if.end:
ret void
}

define void @p61(i32 %x, ptr %p) {
; CHECK-LABEL: p61:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: jne .LBB1_1
; CHECK-NEXT: # %bb.2: # %if.end
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB1_1: # %if.then
; CHECK-NEXT: movl %edi, (%rsi)
; CHECK-NEXT: retq
;
; TH60-LABEL: p61:
; TH60: # %bb.0: # %entry
; TH60-NEXT: testl %edi, %edi
; TH60-NEXT: ds
; TH60-NEXT: je .LBB1_2
; TH60-NEXT: # %bb.1: # %if.then
; TH60-NEXT: movl %edi, (%rsi)
; TH60-NEXT: .LBB1_2: # %if.end
; TH60-NEXT: retq
entry:
%tobool.not = icmp eq i32 %x, 0
br i1 %tobool.not, label %if.end, label %if.then, !prof !1

if.then:
store i32 %x, ptr %p, align 4
br label %if.end

if.end:
ret void
}

!0 = !{!"branch_weights", i32 51, i32 49}
!1 = !{!"branch_weights", i32 61, i32 39}