-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[DLCov] Origin-Tracking: Add debugify support #143594
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
✅ With the latest revision this PR passed the C/C++ code formatter. |
✅ With the latest revision this PR passed the Python code formatter. |
c6f681d
to
eff0813
Compare
4786afd
to
c973e73
Compare
c973e73
to
3facd4f
Compare
6ade803
to
37c0c68
Compare
✅ With the latest revision this PR passed the undef deprecator. |
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-debuginfo Author: Stephen Tozer (SLTozer) ChangesFull diff: https://github.com/llvm/llvm-project/pull/143594.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index 729813a92f516..a9a66baf5571f 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -15,7 +15,10 @@
#include "llvm/Transforms/Utils/Debugify.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/InstIterator.h"
@@ -28,6 +31,11 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/JSON.h"
#include <optional>
+#if LLVM_ENABLE_DEBUGLOC_ORIGIN_TRACKING
+// We need the Signals header to operate on stacktraces if we're using DebugLoc
+// origin-tracking.
+#include "llvm/Support/Signals.h"
+#endif
#define DEBUG_TYPE "debugify"
@@ -59,6 +67,49 @@ cl::opt<Level> DebugifyLevel(
raw_ostream &dbg() { return Quiet ? nulls() : errs(); }
+#if LLVM_ENABLE_DEBUGLOC_ORIGIN_TRACKING
+// These maps refer to addresses in this instance of LLVM, so we can reuse them
+// everywhere - therefore, we store them at file scope.
+static DenseMap<void *, std::string> SymbolizedAddrs;
+static DenseSet<void *> UnsymbolizedAddrs;
+
+std::string symbolizeStackTrace(const Instruction *I) {
+ // We flush the set of unsymbolized addresses at the latest possible moment,
+ // i.e. now.
+ if (!UnsymbolizedAddrs.empty()) {
+ sys::symbolizeAddresses(UnsymbolizedAddrs, SymbolizedAddrs);
+ UnsymbolizedAddrs.clear();
+ }
+ auto OriginStackTraces = I->getDebugLoc().getOriginStackTraces();
+ std::string Result;
+ raw_string_ostream OS(Result);
+ for (size_t TraceIdx = 0; TraceIdx < OriginStackTraces.size(); ++TraceIdx) {
+ if (TraceIdx != 0)
+ OS << "========================================\n";
+ auto &[Depth, StackTrace] = OriginStackTraces[TraceIdx];
+ for (int Frame = 0; Frame < Depth; ++Frame) {
+ assert(SymbolizedAddrs.contains(StackTrace[Frame]) &&
+ "Expected each address to have been symbolized.");
+ OS << right_justify(formatv("#{0}", Frame).str(), std::log10(Depth) + 2)
+ << ' ' << SymbolizedAddrs[StackTrace[Frame]];
+ }
+ }
+ return Result;
+}
+void collectStackAddresses(Instruction &I) {
+ auto &OriginStackTraces = I.getDebugLoc().getOriginStackTraces();
+ for (auto &[Depth, StackTrace] : OriginStackTraces) {
+ for (int Frame = 0; Frame < Depth; ++Frame) {
+ void *Addr = StackTrace[Frame];
+ if (!SymbolizedAddrs.contains(Addr))
+ UnsymbolizedAddrs.insert(Addr);
+ }
+ }
+}
+#else
+void collectStackAddresses(Instruction &I) {}
+#endif // LLVM_ENABLE_DEBUGLOC_ORIGIN_TRACKING
+
uint64_t getAllocSizeInBits(Module &M, Type *Ty) {
return Ty->isSized() ? M.getDataLayout().getTypeAllocSizeInBits(Ty) : 0;
}
@@ -379,6 +430,8 @@ bool llvm::collectDebugInfoMetadata(Module &M,
LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n');
DebugInfoBeforePass.InstToDelete.insert({&I, &I});
+ // Track the addresses to symbolize, if the feature is enabled.
+ collectStackAddresses(I);
DebugInfoBeforePass.DILocations.insert({&I, hasLoc(I)});
}
}
@@ -454,14 +507,20 @@ static bool checkInstructions(const DebugInstMap &DILocsBefore,
auto BBName = BB->hasName() ? BB->getName() : "no-name";
auto InstName = Instruction::getOpcodeName(Instr->getOpcode());
+ auto CreateJSONBugEntry = [&](const char *Action) {
+ Bugs.push_back(llvm::json::Object({
+ {"metadata", "DILocation"}, {"fn-name", FnName.str()},
+ {"bb-name", BBName.str()}, {"instr", InstName}, {"action", Action},
+#if LLVM_ENABLE_DEBUGLOC_ORIGIN_TRACKING
+ {"origin", symbolizeStackTrace(Instr)},
+#endif
+ }));
+ };
+
auto InstrIt = DILocsBefore.find(Instr);
if (InstrIt == DILocsBefore.end()) {
if (ShouldWriteIntoJSON)
- Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"},
- {"fn-name", FnName.str()},
- {"bb-name", BBName.str()},
- {"instr", InstName},
- {"action", "not-generate"}}));
+ CreateJSONBugEntry("not-generate");
else
dbg() << "WARNING: " << NameOfWrappedPass
<< " did not generate DILocation for " << *Instr
@@ -474,11 +533,7 @@ static bool checkInstructions(const DebugInstMap &DILocsBefore,
// If the instr had the !dbg attached before the pass, consider it as
// a debug info issue.
if (ShouldWriteIntoJSON)
- Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"},
- {"fn-name", FnName.str()},
- {"bb-name", BBName.str()},
- {"instr", InstName},
- {"action", "drop"}}));
+ CreateJSONBugEntry("drop");
else
dbg() << "WARNING: " << NameOfWrappedPass << " dropped DILocation of "
<< *Instr << " (BB: " << BBName << ", Fn: " << FnName
@@ -622,6 +677,8 @@ bool llvm::checkDebugInfoMetadata(Module &M,
LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n');
+ // Track the addresses to symbolize, if the feature is enabled.
+ collectStackAddresses(I);
DebugInfoAfterPass.DILocations.insert({&I, hasLoc(I)});
}
}
diff --git a/llvm/utils/llvm-original-di-preservation.py b/llvm/utils/llvm-original-di-preservation.py
index dc1fa518ca8e6..a8c12252d972c 100755
--- a/llvm/utils/llvm-original-di-preservation.py
+++ b/llvm/utils/llvm-original-di-preservation.py
@@ -13,14 +13,15 @@
class DILocBug:
- def __init__(self, action, bb_name, fn_name, instr):
+ def __init__(self, origin, action, bb_name, fn_name, instr):
+ self.origin = origin
self.action = action
self.bb_name = bb_name
self.fn_name = fn_name
self.instr = instr
def __str__(self):
- return self.action + self.bb_name + self.fn_name + self.instr
+ return self.action + self.bb_name + self.fn_name + self.instr + self.origin
class DISPBug:
@@ -86,6 +87,7 @@ def generate_html_report(
"Function Name",
"Basic Block Name",
"Action",
+ "Origin",
]
for column in header_di_loc:
@@ -112,6 +114,7 @@ def generate_html_report(
row.append(x.fn_name)
row.append(x.bb_name)
row.append(x.action)
+ row.append(f"<details><summary>View Origin StackTrace</summary><pre>{x.origin}</pre></details>")
row.append(" </tr>\n")
# Dump the bugs info into the table.
for column in row:
@@ -428,9 +431,9 @@ def Main():
sys.exit(1)
# Use the defaultdict in order to make multidim dicts.
- di_location_bugs = defaultdict(lambda: defaultdict(dict))
- di_subprogram_bugs = defaultdict(lambda: defaultdict(dict))
- di_variable_bugs = defaultdict(lambda: defaultdict(dict))
+ di_location_bugs = defaultdict(lambda: defaultdict(list))
+ di_subprogram_bugs = defaultdict(lambda: defaultdict(list))
+ di_variable_bugs = defaultdict(lambda: defaultdict(list))
# Use the ordered dict to make a summary.
di_location_bugs_summary = OrderedDict()
@@ -470,9 +473,9 @@ def Main():
skipped_lines += 1
continue
- di_loc_bugs = []
- di_sp_bugs = []
- di_var_bugs = []
+ di_loc_bugs = di_location_bugs[bugs_file][bugs_pass]
+ di_sp_bugs = di_subprogram_bugs[bugs_file][bugs_pass]
+ di_var_bugs = di_variable_bugs[bugs_file][bugs_pass]
# Omit duplicated bugs.
di_loc_set = set()
@@ -487,6 +490,7 @@ def Main():
if bugs_metadata == "DILocation":
try:
+ origin = bug["origin"]
action = bug["action"]
bb_name = bug["bb-name"]
fn_name = bug["fn-name"]
@@ -494,7 +498,7 @@ def Main():
except:
skipped_bugs += 1
continue
- di_loc_bug = DILocBug(action, bb_name, fn_name, instr)
+ di_loc_bug = DILocBug(origin, action, bb_name, fn_name, instr)
if not str(di_loc_bug) in di_loc_set:
di_loc_set.add(str(di_loc_bug))
if opts.compress:
|
3facd4f
to
fe9494b
Compare
37c0c68
to
3d83059
Compare
4bbd28b
to
e46273b
Compare
3d83059
to
5e56291
Compare
e46273b
to
ef6ccda
Compare
5e56291
to
2ff6e13
Compare
4410b5f
to
fb65cb7
Compare
afeb26b
to
e2ff01b
Compare
1320d5f
to
ca34df4
Compare
e2ff01b
to
a4ef5c6
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@SLTozer thanks for doing this!
Are you planning to extend documentation, e.g. https://llvm.org/docs/HowToUpdateDebugInfo.html#how-to-automatically-convert-tests-into-debug-info-tests?
assert(SymbolizedAddrs.contains(StackTrace[Frame]) && | ||
"Expected each address to have been symbolized."); | ||
for (std::string &SymbolizedFrame : SymbolizedAddrs[StackTrace[Frame]]) { | ||
OS << right_justify(formatv("#{0}", VirtualFrameNo++).str(), std::log10(Depth) + 2) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please run clang-format
.
Yes, and it probably is best if the documentation lands in this patch! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tentative LGTM, noting that you're planning on updating docs in this PR too. I've no familiarity with the python modified alas.
// These maps refer to addresses in this instance of LLVM, so we can reuse them | ||
// everywhere - therefore, we store them at file scope. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I feel the term "instance of LLVM" could be more precise: can we us the word "process" here? All other ambiguities and difficulties of fixed addresses are brought to mind with the word process.
return Result; | ||
} | ||
void collectStackAddresses(Instruction &I) { | ||
auto &OriginStackTraces = I.getDebugLoc().getOriginStackTraces(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Major nit; would we be able to name the type here rather than auto
? It'll make the resulting code a lot easier to localise and dissect for future readers. (The next auto
makes sense of course).
LGTM, thanks |
13ac38d
to
5b1762e
Compare
@djtodoro I found an error in my changes to the script, which caused a failure in the lit test, so I fixed it in the latest commit - along the way, I appear to have incidentally fixed what I think is an existing error that was baked into the test. Specifically, in the lit test that tests the output for The reason for this is at line 492, where we initialize Since this is a driveby fix, and I'm updating the tests anyway, I'd prefer to simply fix error in this PR, if you agree that this is a valid fix - WDYT? |
I see. I have nothing against it, thanks a lot for it! LGTM. |
8d2d9ad
to
b14f08d
Compare
This patch is part of a series that adds origin-tracking to the debugify source location coverage checks, allowing us to report symbolized stack traces of the point where missing source locations appear.
This patch completes the feature, having debugify handle origin stack traces by symbolizing them when an associated bug is found and printing them into the JSON report file as part of the bug entry. This patch also updates the script that parses the JSON report and creates a human-readable HTML report, adding an "Origin" entry to the table that contains an expandable textbox containing the symbolized stack trace.