-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[X86] Support branch hint #97721
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X86] Support branch hint #97721
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -749,6 +749,11 @@ def TuningUseGLMDivSqrtCosts | |
| : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true", | ||
| "Use Goldmont specific floating point div/sqrt costs">; | ||
|
|
||
| // Starting with Redwood Cove architecture, the branch has branch taken hint | ||
| // (i.e., instruction prefix 3EH). | ||
| def TuningBranchHint: SubtargetFeature<"branch-hint", "HasBranchHint", "true", | ||
| "Target has branch hint feature">; | ||
|
|
||
| //===----------------------------------------------------------------------===// | ||
| // X86 CPU Families | ||
| // TODO: Remove these - use general tuning features to determine codegen. | ||
|
|
@@ -1124,6 +1129,8 @@ def ProcessorFeatures { | |
| FeaturePREFETCHI]; | ||
| list<SubtargetFeature> GNRFeatures = | ||
| !listconcat(SPRFeatures, GNRAdditionalFeatures); | ||
| list<SubtargetFeature> GNRAdditionalTuning = [TuningBranchHint]; | ||
| list<SubtargetFeature> GNRTuning = !listconcat(SPRTuning, GNRAdditionalTuning); | ||
|
|
||
| // Graniterapids D | ||
| list<SubtargetFeature> GNRDAdditionalFeatures = [FeatureAMXCOMPLEX]; | ||
|
|
@@ -1815,12 +1822,12 @@ def : ProcModel<"pantherlake", AlderlakePModel, | |
| def : ProcModel<"clearwaterforest", AlderlakePModel, | ||
| ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>; | ||
| def : ProcModel<"graniterapids", SapphireRapidsModel, | ||
| ProcessorFeatures.GNRFeatures, ProcessorFeatures.SPRTuning>; | ||
| ProcessorFeatures.GNRFeatures, ProcessorFeatures.GNRTuning>; | ||
| def : ProcModel<"emeraldrapids", SapphireRapidsModel, | ||
| ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>; | ||
| ProcessorFeatures.SPRFeatures, ProcessorFeatures.GNRTuning>; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't Emerald Rapids based on Raptor Lake microarchitecture (and not Redwood Cove)? If so, wouldn't branch hints be unsupported for this subtarget?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice catch. I'll correct it. |
||
| foreach P = ["graniterapids-d", "graniterapids_d"] in { | ||
| def : ProcModel<P, SapphireRapidsModel, | ||
| ProcessorFeatures.GNRDFeatures, ProcessorFeatures.SPRTuning>; | ||
| ProcessorFeatures.GNRDFeatures, ProcessorFeatures.GNRTuning>; | ||
| } | ||
|
|
||
| // AMD CPUs. | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test is too verbose, not robust and miss coverage. I would suggest the following
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks. It's simpler. What's the metadata for "!prof !0" and "!prof !1"?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated. We should tell the branch probability explicitly instead of letting other passes to guess the probability. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,75 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: llc < %s -mtriple=x86_64 -mattr=+branch-hint -enable-branch-hint | FileCheck %s | ||
| ; RUN: llc < %s -mtriple=x86_64 -mattr=+branch-hint -enable-branch-hint -branch-hint-probability-threshold=50 | FileCheck %s | ||
| ; RUN: llc < %s -mtriple=x86_64 -mattr=+branch-hint -enable-branch-hint -branch-hint-probability-threshold=60 -tail-dup-placement=false | FileCheck --check-prefix=TH60 %s | ||
|
|
||
|
|
||
| ; Design: Add DS segment override prefix for condition branch who has high | ||
| ; probability to take (which is greater than the probability threshold of | ||
| ; enabling branch hint). | ||
|
|
||
| define void @p51(i32 %x, ptr %p) { | ||
| ; CHECK-LABEL: p51: | ||
| ; CHECK: # %bb.0: # %entry | ||
| ; CHECK-NEXT: testl %edi, %edi | ||
| ; CHECK-NEXT: ds | ||
| ; CHECK-NEXT: je .LBB0_2 | ||
| ; CHECK-NEXT: # %bb.1: # %if.then | ||
| ; CHECK-NEXT: movl %edi, (%rsi) | ||
| ; CHECK-NEXT: .LBB0_2: # %if.end | ||
| ; CHECK-NEXT: retq | ||
| ; | ||
| ; TH60-LABEL: p51: | ||
| ; TH60: # %bb.0: # %entry | ||
| ; TH60-NEXT: testl %edi, %edi | ||
| ; TH60-NEXT: je .LBB0_2 | ||
| ; TH60-NEXT: # %bb.1: # %if.then | ||
| ; TH60-NEXT: movl %edi, (%rsi) | ||
| ; TH60-NEXT: .LBB0_2: # %if.end | ||
| ; TH60-NEXT: retq | ||
| entry: | ||
| %tobool.not = icmp eq i32 %x, 0 | ||
| br i1 %tobool.not, label %if.end, label %if.then, !prof !0 | ||
|
|
||
| if.then: | ||
| store i32 %x, ptr %p, align 4 | ||
| br label %if.end | ||
|
|
||
| if.end: | ||
| ret void | ||
| } | ||
|
|
||
| define void @p61(i32 %x, ptr %p) { | ||
| ; CHECK-LABEL: p61: | ||
| ; CHECK: # %bb.0: # %entry | ||
| ; CHECK-NEXT: testl %edi, %edi | ||
| ; CHECK-NEXT: jne .LBB1_1 | ||
| ; CHECK-NEXT: # %bb.2: # %if.end | ||
| ; CHECK-NEXT: retq | ||
| ; CHECK-NEXT: .LBB1_1: # %if.then | ||
| ; CHECK-NEXT: movl %edi, (%rsi) | ||
| ; CHECK-NEXT: retq | ||
| ; | ||
| ; TH60-LABEL: p61: | ||
| ; TH60: # %bb.0: # %entry | ||
| ; TH60-NEXT: testl %edi, %edi | ||
| ; TH60-NEXT: ds | ||
| ; TH60-NEXT: je .LBB1_2 | ||
| ; TH60-NEXT: # %bb.1: # %if.then | ||
| ; TH60-NEXT: movl %edi, (%rsi) | ||
| ; TH60-NEXT: .LBB1_2: # %if.end | ||
| ; TH60-NEXT: retq | ||
| entry: | ||
| %tobool.not = icmp eq i32 %x, 0 | ||
| br i1 %tobool.not, label %if.end, label %if.then, !prof !1 | ||
|
|
||
| if.then: | ||
| store i32 %x, ptr %p, align 4 | ||
| br label %if.end | ||
|
|
||
| if.end: | ||
| ret void | ||
| } | ||
|
|
||
| !0 = !{!"branch_weights", i32 51, i32 49} | ||
| !1 = !{!"branch_weights", i32 61, i32 39} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why have you added this as a Tuning bit and not a Feature bit? Tuning bits are guaranteed to work on all applicable CPUs - they just might not be performant.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can always add
0x3eto JCC in 64-bit mode. So it does work on all applicable CPUs.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See Intel SDM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@KanRobert, Thank you for helping this.