-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[MemProf] Ensure all callsite clones are assigned a function clone #150735
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,145 @@ | ||
| ;; Make sure we assign the original callsite to a function clone (which will be | ||
| ;; the original function clone), even when we cannot update its caller (due to | ||
| ;; missing metadata e.g. from mismatched profiles). Otherwise we will try to use | ||
| ;; the original function for a different clone, leading to confusion later when | ||
| ;; rewriting the calls. | ||
|
|
||
| ;; -stats requires asserts | ||
| ; REQUIRES: asserts | ||
|
|
||
| ; RUN: opt -thinlto-bc %s >%t.o | ||
| ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ | ||
| ; RUN: -supports-hot-cold-new \ | ||
| ; RUN: -r=%t.o,A,plx \ | ||
| ; RUN: -r=%t.o,B,plx \ | ||
| ; RUN: -r=%t.o,C,plx \ | ||
| ; RUN: -r=%t.o,D,plx \ | ||
| ; RUN: -r=%t.o,E,plx \ | ||
| ; RUN: -r=%t.o,F,plx \ | ||
| ; RUN: -r=%t.o,G,plx \ | ||
| ; RUN: -r=%t.o,A1,plx \ | ||
| ; RUN: -r=%t.o,B1,plx \ | ||
| ; RUN: -r=%t.o,_Znwm, \ | ||
| ; RUN: -memprof-verify-ccg -memprof-verify-nodes -debug-only=memprof-context-disambiguation \ | ||
| ; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \ | ||
| ; RUN: -o %t.out 2>&1 | FileCheck %s \ | ||
| ; RUN: --implicit-check-not="Mismatch in call clone assignment" \ | ||
| ; RUN: --implicit-check-not="Number of callsites assigned to call multiple non-matching clones" | ||
|
|
||
| ; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR | ||
|
|
||
| ; ModuleID = '<stdin>' | ||
| source_filename = "reduced.ll" | ||
| target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" | ||
| target triple = "x86_64-grtev4-linux-gnu" | ||
|
|
||
| ; IR-LABEL: define dso_local void @A() | ||
| define void @A() #0 { | ||
| ; IR: call void @C() | ||
| call void @C() | ||
| ret void | ||
| } | ||
|
|
||
| ; IR-LABEL: define dso_local void @B() | ||
| define void @B() #0 { | ||
| ; IR: call void @C.memprof.1() | ||
| call void @C(), !callsite !1 | ||
| ret void | ||
| } | ||
|
|
||
| ; IR-LABEL: define dso_local void @C() | ||
| define void @C() #0 { | ||
| ; IR: call void @F() | ||
| call void @F(), !callsite !16 | ||
| ; IR: call void @D() | ||
| call void @D(), !callsite !2 | ||
| ret void | ||
| } | ||
|
|
||
| ; IR-LABEL: define dso_local void @D() | ||
| define void @D() #0 { | ||
| ; IR: call void @E() | ||
| call void @E(), !callsite !3 | ||
| ; IR: call void @G() | ||
| call void @G(), !callsite !17 | ||
| ret void | ||
| } | ||
|
|
||
| ; IR-LABEL: define dso_local void @E() | ||
| define void @E() #0 { | ||
| ; IR: call ptr @_Znwm(i64 0) #[[NOTCOLD:[0-9]+]] | ||
| %1 = call ptr @_Znwm(i64 0), !memprof !4, !callsite !9 | ||
| ret void | ||
| } | ||
|
|
||
| ; IR-LABEL: define dso_local void @F() | ||
| define void @F() #0 { | ||
| ; IR: call void @G() | ||
| call void @G(), !callsite !17 | ||
| ret void | ||
| } | ||
|
|
||
| ; IR-LABEL: define dso_local void @G() | ||
| define void @G() #0 { | ||
| ; IR: call ptr @_Znwm(i64 0) #[[NOTCOLD]] | ||
| %2 = call ptr @_Znwm(i64 0), !memprof !10, !callsite !15 | ||
| ret void | ||
| } | ||
|
|
||
| ; IR-LABEL: define dso_local void @A1() | ||
| define void @A1() #0 { | ||
| ; IR: call void @C() | ||
| call void @C(), !callsite !18 | ||
| ret void | ||
| } | ||
|
|
||
| ; IR-LABEL: define dso_local void @B1() | ||
| define void @B1() #0 { | ||
| ; IR: call void @C.memprof.1() | ||
| call void @C(), !callsite !19 | ||
| ret void | ||
| } | ||
|
|
||
| ; IR-LABEL: define dso_local void @C.memprof.1() | ||
| ; IR: call void @F.memprof.1() | ||
| ; IR: call void @D.memprof.1() | ||
|
|
||
| ; IR-LABEL: define dso_local void @D.memprof.1() | ||
| ; IR: call void @E.memprof.1() | ||
| ; IR: call void @G() | ||
|
|
||
| ; IR-LABEL: define dso_local void @E.memprof.1() | ||
| ; IR: call ptr @_Znwm(i64 0) #[[COLD:[0-9]+]] | ||
|
|
||
| ; IR-LABEL: define dso_local void @F.memprof.1() | ||
| ; IR: call void @G.memprof.1() | ||
|
|
||
| ; IR-LABEL: define dso_local void @G.memprof.1() | ||
| ; IR: call ptr @_Znwm(i64 0) #[[COLD]] | ||
|
|
||
| declare ptr @_Znwm(i64) | ||
|
|
||
| attributes #0 = { noinline optnone } | ||
| ; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" } | ||
| ; IR: attributes #[[COLD]] = { "memprof"="cold" } | ||
|
|
||
| !0 = !{i64 123} | ||
| !1 = !{i64 234} | ||
| !2 = !{i64 345} | ||
| !3 = !{i64 456} | ||
| !4 = !{!5, !7} | ||
| !5 = !{!6, !"notcold"} | ||
| !6 = !{i64 567, i64 456, i64 345, i64 123} | ||
| !7 = !{!8, !"cold"} | ||
| !8 = !{i64 567, i64 456, i64 345, i64 234} | ||
| !9 = !{i64 567} | ||
| !10 = !{!11, !13} | ||
| !11 = !{!12, !"notcold"} | ||
| !12 = !{i64 678, i64 891, i64 789, i64 912} | ||
| !13 = !{!14, !"cold"} | ||
| !14 = !{i64 678, i64 891, i64 789, i64 812} | ||
| !15 = !{i64 678} | ||
| !16 = !{i64 789} | ||
| !17 = !{i64 891} | ||
| !18 = !{i64 912} | ||
| !19 = !{i64 812} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe if we used a struct instead of an integer to indicate the clone number as value in the map, we could hold additional data about whether it was assigned along with the assigned clone number. Would that simplify any of the logic here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not completely sure which map you are referring to, but I started out making a change to add a bool in the ContextNode as to whether its callsite had been assigned to a func clone, but that turns out to be tricky to get right, and this was more targeted and simpler to get right as a fix for the specific problem I was looking at. In particular, we want callsites that we weren't able to update callers for to get assigned to clone 0, which this change ensures. I've added a TODO though to look at adding a mechanism to better identify and distinguish callsite clones that aren't getting assigned to any clone.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wasn't referring to any particular map, rather some way of noting that the callsite has been assigned. Your response about adding it in ContextNode was helpful. Adding a TODO for now sounds good.