From 68b6c6bcbf7bc309ab949603f15ae54a3c9bf1bb Mon Sep 17 00:00:00 2001 From: Dave Bartolomeo Date: Wed, 17 Sep 2025 16:12:53 -0400 Subject: [PATCH] [analyzer] Emit IssueHash in SARIF The change adds two new properties to each `result` object in the SARIF log: `partialFingerprints`: Contains the "issue hash" that the analyzer already generates for each result, which can help identify a result across runs even if surrounding code changes. `hostedViewUri`: If running with `-analyzer-format=sarif-html`, this property will now be emitted with the `file:` URL of the generated HTML report for that result. Along the way, I discovered an existing bug where the HTML diagnostic consumer does not record the path to the generated report if another compilation already created that report. This caused both the SARIF and Plist consumers to be missing the link to the file in all but one of the compilations in case of a warning in a header file. I added a new test to ensure that the generated SARIF for each compilation contains the property. Finally, I made a few changes to the `normalize_sarif` processing in the tests. I switched to `sed` to allow substitutions. The normalization now removes directory components from `file:` URLs, replaces the `length` property of the source file with a constant `-1`, and puts placeholders in the values of the `version` properties rather than just deleting them. The URL transformation in particular lets us verify that the right filename is generated for each HTML report. Fixes #158159 rdar://160410408 --- clang/include/clang/Analysis/PathDiagnostic.h | 4 + clang/include/clang/Basic/Sarif.h | 15 ++ clang/lib/Analysis/PathDiagnostic.cpp | 14 ++ clang/lib/Basic/Sarif.cpp | 16 +- .../StaticAnalyzer/Core/HTMLDiagnostics.cpp | 28 ++-- .../lib/StaticAnalyzer/Core/HTMLDiagnostics.h | 14 ++ .../StaticAnalyzer/Core/PlistDiagnostics.cpp | 10 +- .../StaticAnalyzer/Core/SarifDiagnostics.cpp | 46 +++++- .../sarif-diagnostics-taint-test.c.sarif | 11 +- .../sarif-multi-diagnostic-test.c.sarif | 44 +++++- .../sarif-multi-file-diagnostics.c.sarif | 144 ++++++++++++++++++ .../sarif-multi-file-diagnostics.c | 12 ++ clang/test/Analysis/lit.local.cfg | 12 +- 13 files changed, 335 insertions(+), 35 deletions(-) create mode 100644 clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.h create mode 100644 clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-file-diagnostics.c.sarif create mode 100644 clang/test/Analysis/diagnostics/sarif-multi-file-diagnostics.c diff --git a/clang/include/clang/Analysis/PathDiagnostic.h b/clang/include/clang/Analysis/PathDiagnostic.h index 5907df022e449..197920d4cd100 100644 --- a/clang/include/clang/Analysis/PathDiagnostic.h +++ b/clang/include/clang/Analysis/PathDiagnostic.h @@ -885,6 +885,10 @@ class PathDiagnostic : public llvm::FoldingSetNode { return UniqueingDecl; } + /// Get a hash that identifies the issue. + SmallString<32> getIssueHash(const SourceManager &SrcMgr, + const LangOptions &LangOpts) const; + void flattenLocations() { Loc.flatten(); for (const auto &I : pathImpl) diff --git a/clang/include/clang/Basic/Sarif.h b/clang/include/clang/Basic/Sarif.h index e6c46224b316d..a88d1ee2965a9 100644 --- a/clang/include/clang/Basic/Sarif.h +++ b/clang/include/clang/Basic/Sarif.h @@ -322,6 +322,8 @@ class SarifResult { uint32_t RuleIdx; std::string RuleId; std::string DiagnosticMessage; + std::string HostedViewerURI; + llvm::SmallDenseMap PartialFingerprints; llvm::SmallVector Locations; llvm::SmallVector ThreadFlows; std::optional LevelOverride; @@ -347,6 +349,11 @@ class SarifResult { return *this; } + SarifResult setHostedViewerURI(llvm::StringRef URI) { + HostedViewerURI = URI.str(); + return *this; + } + SarifResult setLocations(llvm::ArrayRef DiagLocs) { #ifndef NDEBUG for (const auto &Loc : DiagLocs) { @@ -366,6 +373,12 @@ class SarifResult { LevelOverride = TheLevel; return *this; } + + SarifResult addPartialFingerprint(llvm::StringRef key, + llvm::StringRef value) { + PartialFingerprints[key] = value; + return *this; + } }; /// This class handles creating a valid SARIF document given various input @@ -475,6 +488,8 @@ class SarifDocumentWriter { /// reported diagnostics, resulting in an expensive call. llvm::json::Object createDocument(); + static std::string fileNameToURI(llvm::StringRef Filename); + private: /// Source Manager to use for the current SARIF document. const SourceManager &SourceMgr; diff --git a/clang/lib/Analysis/PathDiagnostic.cpp b/clang/lib/Analysis/PathDiagnostic.cpp index ef24efd3c4bd0..e42731b93bfb2 100644 --- a/clang/lib/Analysis/PathDiagnostic.cpp +++ b/clang/lib/Analysis/PathDiagnostic.cpp @@ -24,6 +24,7 @@ #include "clang/AST/Type.h" #include "clang/Analysis/AnalysisDeclContext.h" #include "clang/Analysis/CFG.h" +#include "clang/Analysis/IssueHash.h" #include "clang/Analysis/ProgramPoint.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/SourceLocation.h" @@ -1075,6 +1076,19 @@ unsigned PathDiagnostic::full_size() { return size; } +SmallString<32> +PathDiagnostic::getIssueHash(const SourceManager &SrcMgr, + const LangOptions &LangOpts) const { + PathDiagnosticLocation UPDLoc = getUniqueingLoc(); + FullSourceLoc FullLoc( + SrcMgr.getExpansionLoc(UPDLoc.isValid() ? UPDLoc.asLocation() + : getLocation().asLocation()), + SrcMgr); + + return clang::getIssueHash(FullLoc, getCheckerName(), getBugType(), + getDeclWithIssue(), LangOpts); +} + //===----------------------------------------------------------------------===// // FoldingSet profiling methods. //===----------------------------------------------------------------------===// diff --git a/clang/lib/Basic/Sarif.cpp b/clang/lib/Basic/Sarif.cpp index 69862b73febd7..b3fb9a21249e9 100644 --- a/clang/lib/Basic/Sarif.cpp +++ b/clang/lib/Basic/Sarif.cpp @@ -67,7 +67,7 @@ static std::string percentEncodeURICharacter(char C) { /// \param Filename The filename to be represented as URI. /// /// \return RFC3986 URI representing the input file name. -static std::string fileNameToURI(StringRef Filename) { +std::string SarifDocumentWriter::fileNameToURI(StringRef Filename) { SmallString<32> Ret = StringRef("file://"); // Get the root name to see if it has a URI authority. @@ -391,6 +391,11 @@ void SarifDocumentWriter::appendResult(const SarifResult &Result) { json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)}, {"ruleIndex", static_cast(RuleIdx)}, {"ruleId", Rule.Id}}; + + if (!Result.HostedViewerURI.empty()) { + Ret["hostedViewerUri"] = Result.HostedViewerURI; + } + if (!Result.Locations.empty()) { json::Array Locs; for (auto &Range : Result.Locations) { @@ -398,6 +403,15 @@ void SarifDocumentWriter::appendResult(const SarifResult &Result) { } Ret["locations"] = std::move(Locs); } + + if (!Result.PartialFingerprints.empty()) { + json::Object fingerprints = {}; + for (auto &pair : Result.PartialFingerprints) { + fingerprints[pair.first] = pair.second; + } + Ret["partialFingerprints"] = std::move(fingerprints); + } + if (!Result.ThreadFlows.empty()) Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)}; diff --git a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp index 4c9c619f2487a..217b853305ed1 100644 --- a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "HTMLDiagnostics.h" #include "PlistDiagnostics.h" #include "SarifDiagnostics.h" #include "clang/AST/Decl.h" @@ -82,7 +83,7 @@ class HTMLDiagnostics : public PathDiagnosticConsumer { void FlushDiagnosticsImpl(std::vector &Diags, FilesMade *filesMade) override; - StringRef getName() const override { return "HTMLDiagnostics"; } + StringRef getName() const override { return HTML_DIAGNOSTICS_NAME; } bool supportsCrossFileDiagnostics() const override { return SupportsCrossFileDiagnostics; @@ -254,18 +255,6 @@ void HTMLDiagnostics::FlushDiagnosticsImpl( ReportDiag(*Diag, filesMade); } -static llvm::SmallString<32> getIssueHash(const PathDiagnostic &D, - const Preprocessor &PP) { - SourceManager &SMgr = PP.getSourceManager(); - PathDiagnosticLocation UPDLoc = D.getUniqueingLoc(); - FullSourceLoc L(SMgr.getExpansionLoc(UPDLoc.isValid() - ? UPDLoc.asLocation() - : D.getLocation().asLocation()), - SMgr); - return getIssueHash(L, D.getCheckerName(), D.getBugType(), - D.getDeclWithIssue(), PP.getLangOpts()); -} - void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D, FilesMade *filesMade) { // Create the HTML directory if it is missing. @@ -310,7 +299,8 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D, } } - SmallString<32> IssueHash = getIssueHash(D, PP); + SmallString<32> IssueHash = + D.getIssueHash(PP.getSourceManager(), PP.getLangOpts()); auto [It, IsNew] = EmittedHashes.insert(IssueHash); if (!IsNew) { // We've already emitted a duplicate issue. It'll get overwritten anyway. @@ -369,6 +359,12 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D, if (EC != llvm::errc::file_exists) { llvm::errs() << "warning: could not create file in '" << Directory << "': " << EC.message() << '\n'; + } else if (filesMade) { + // Record that we created the file so that it gets referenced in the + // plist and SARIF reports for every translation unit that found the + // issue. + filesMade->addDiagnostic(D, getName(), + llvm::sys::path::filename(ResultPath)); } return; } @@ -679,8 +675,8 @@ void HTMLDiagnostics::FinalizeHTML(const PathDiagnostic &D, Rewriter &R, os << "\n\n"; - os << "\n\n"; + os << "\n\n"; os << "\n