diff --git a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp index 0bb5739db4b75..e55d064253b84 100644 --- a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp @@ -207,6 +207,14 @@ std::vector taint::getTaintedSymbolsImpl(ProgramStateRef State, return getTaintedSymbolsImpl(State, Sym, Kind, returnFirstOnly); if (const MemRegion *Reg = V.getAsRegion()) return getTaintedSymbolsImpl(State, Reg, Kind, returnFirstOnly); + + if (auto LCV = V.getAs()) { + StoreManager &StoreMgr = State->getStateManager().getStoreManager(); + if (auto DefaultVal = StoreMgr.getDefaultBinding(*LCV)) { + return getTaintedSymbolsImpl(State, *DefaultVal, Kind, returnFirstOnly); + } + } + return {}; } diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp index ccc3097e8d2f9..17ee1f7c945ed 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp @@ -68,23 +68,15 @@ void ExprEngine::performTrivialCopy(NodeBuilder &Bldr, ExplodedNode *Pred, Bldr.takeNodes(Pred); assert(ThisRD); - if (!ThisRD->isEmpty()) { - // Load the source value only for non-empty classes. - // Otherwise it'd retrieve an UnknownVal - // and bind it and RegionStore would think that the actual value - // in this region at this offset is unknown. - SVal V = Call.getArgSVal(0); - - // If the value being copied is not unknown, load from its location to get - // an aggregate rvalue. - if (std::optional L = V.getAs()) - V = Pred->getState()->getSVal(*L); - else - assert(V.isUnknownOrUndef()); - evalBind(Dst, CallExpr, Pred, ThisVal, V, true); - } else { - Dst.Add(Pred); - } + SVal V = Call.getArgSVal(0); + + // If the value being copied is not unknown, load from its location to get + // an aggregate rvalue. + if (std::optional L = V.getAs()) + V = Pred->getState()->getSVal(*L); + else + assert(V.isUnknownOrUndef()); + evalBind(Dst, CallExpr, Pred, ThisVal, V, true); PostStmt PS(CallExpr, LCtx); for (ExplodedNode *N : Dst) { diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index 674099dd7e1f0..b54ba43ff4241 100644 --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -67,9 +67,10 @@ class BindingKey { isa(r)) && "Not a base"); } -public: +public: bool isDirect() const { return P.getInt() & Direct; } + bool isDefault() const { return !isDirect(); } bool hasSymbolicOffset() const { return P.getInt() & Symbolic; } const MemRegion *getRegion() const { return P.getPointer(); } @@ -232,27 +233,86 @@ class RegionBindingsRef : public llvm::ImmutableMapRef StringifyCache; + auto ToString = [&StringifyCache](const MemRegion *R) { + auto [Place, Inserted] = StringifyCache.try_emplace(R); + if (!Inserted) + return Place->second; + std::string Res; + raw_string_ostream OS(Res); + OS << R; + Place->second = Res; + return Res; + }; + + using Cluster = + std::pair>; + using Binding = std::pair; + + const auto MemSpaceBeforeRegionName = [&ToString](const Cluster *L, + const Cluster *R) { + if (isa(L->first) && !isa(R->first)) + return true; + if (!isa(L->first) && isa(R->first)) + return false; + return ToString(L->first) < ToString(R->first); + }; + + const auto SymbolicBeforeOffset = [&ToString](const BindingKey &L, + const BindingKey &R) { + if (L.hasSymbolicOffset() && !R.hasSymbolicOffset()) + return true; + if (!L.hasSymbolicOffset() && R.hasSymbolicOffset()) + return false; + if (L.hasSymbolicOffset() && R.hasSymbolicOffset()) + return ToString(L.getRegion()) < ToString(R.getRegion()); + return L.getOffset() < R.getOffset(); + }; + + const auto DefaultBindingBeforeDirectBindings = + [&SymbolicBeforeOffset](const Binding *LPtr, const Binding *RPtr) { + const BindingKey &L = LPtr->first; + const BindingKey &R = RPtr->first; + if (L.isDefault() && !R.isDefault()) + return true; + if (!L.isDefault() && R.isDefault()) + return false; + assert(L.isDefault() == R.isDefault()); + return SymbolicBeforeOffset(L, R); + }; + + const auto AddrOf = [](const auto &Item) { return &Item; }; + + std::vector SortedClusters; + SortedClusters.reserve(std::distance(begin(), end())); + append_range(SortedClusters, map_range(*this, AddrOf)); + llvm::sort(SortedClusters, MemSpaceBeforeRegionName); + + for (auto [Idx, C] : llvm::enumerate(SortedClusters)) { + const auto &[BaseRegion, Bindings] = *C; Indent(Out, Space, IsDot) - << "{ \"cluster\": \"" << I.getKey() << "\", \"pointer\": \"" - << (const void *)I.getKey() << "\", \"items\": [" << NL; + << "{ \"cluster\": \"" << BaseRegion << "\", \"pointer\": \"" + << (const void *)BaseRegion << "\", \"items\": [" << NL; + + std::vector SortedBindings; + SortedBindings.reserve(std::distance(Bindings.begin(), Bindings.end())); + append_range(SortedBindings, map_range(Bindings, AddrOf)); + llvm::sort(SortedBindings, DefaultBindingBeforeDirectBindings); ++Space; - const ClusterBindings &CB = I.getData(); - for (ClusterBindings::iterator CI = CB.begin(), CE = CB.end(); CI != CE; - ++CI) { - Indent(Out, Space, IsDot) << "{ " << CI.getKey() << ", \"value\": "; - CI.getData().printJson(Out, /*AddQuotes=*/true); + for (auto [Idx, B] : llvm::enumerate(SortedBindings)) { + const auto &[Key, Value] = *B; + Indent(Out, Space, IsDot) << "{ " << Key << ", \"value\": "; + Value.printJson(Out, /*AddQuotes=*/true); Out << " }"; - if (std::next(CI) != CE) + if (Idx != SortedBindings.size() - 1) Out << ','; Out << NL; } - --Space; Indent(Out, Space, IsDot) << "]}"; - if (std::next(I) != E) + if (Idx != SortedClusters.size() - 1) Out << ','; Out << NL; } @@ -548,6 +608,12 @@ class RegionStoreManager : public StoreManager { return getBinding(getRegionBindings(S), L, T); } + /// Returns the value of the default binding of region \p BaseR + /// if and only if that is the unique binding in the cluster of \p BaseR. + /// \p BaseR must be a base region. + std::optional getUniqueDefaultBinding(Store S, + const MemRegion *BaseR) const; + std::optional getDefaultBinding(Store S, const MemRegion *R) override { RegionBindingsRef B = getRegionBindings(S); // Default bindings are always applied over a base region so look up the @@ -2276,20 +2342,16 @@ NonLoc RegionStoreManager::createLazyBinding(RegionBindingsConstRef B, return svalBuilder.makeLazyCompoundVal(StoreRef(B.asStore(), *this), R); } -static bool isRecordEmpty(const RecordDecl *RD) { - if (!RD->field_empty()) - return false; - if (const CXXRecordDecl *CRD = dyn_cast(RD)) - return CRD->getNumBases() == 0; - return true; -} - SVal RegionStoreManager::getBindingForStruct(RegionBindingsConstRef B, const TypedValueRegion *R) { const RecordDecl *RD = R->getValueType()->castAs()->getDecl(); - if (!RD->getDefinition() || isRecordEmpty(RD)) + if (!RD->getDefinition()) return UnknownVal(); + // We also create a LCV for copying empty structs because then the store + // behavior doesn't depend on the struct layout. + // This way even an empty struct can carry taint, no matter if creduce drops + // the last field member or not. return createLazyBinding(B, R); } @@ -2549,9 +2611,42 @@ RegionBindingsRef RegionStoreManager::bindVector(RegionBindingsConstRef B, return NewB; } +std::optional +RegionStoreManager::getUniqueDefaultBinding(Store S, + const MemRegion *BaseR) const { + assert(BaseR == BaseR->getBaseRegion() && "Expecting a base region"); + const auto *Cluster = getRegionBindings(S).lookup(BaseR); + if (!Cluster || !llvm::hasSingleElement(*Cluster)) + return std::nullopt; + + const auto [Key, Value] = *Cluster->begin(); + return Key.isDirect() ? std::optional{} : Value; +} + std::optional RegionStoreManager::tryBindSmallStruct( RegionBindingsConstRef B, const TypedValueRegion *R, const RecordDecl *RD, nonloc::LazyCompoundVal LCV) { + // If we try to copy a Conjured value representing the value of the whole + // struct, don't try to element-wise copy each field. + // That would unnecessarily bind Derived symbols slicing off the subregion for + // the field from the whole Conjured symbol. + // + // struct Window { int width; int height; }; + // Window getWindow(); <-- opaque fn. + // Window w = getWindow(); <-- conjures a new Window. + // Window w2 = w; <-- trivial copy "w", calling "tryBindSmallStruct" + // + // We should not end up with a new Store for "w2" like this: + // Direct [ 0..31]: Derived{Conj{}, w.width} + // Direct [32..63]: Derived{Conj{}, w.height} + // Instead, we should just bind that Conjured value instead. + if (LCV.getRegion()->getBaseRegion() == LCV.getRegion()) { + if (auto Val = getUniqueDefaultBinding(LCV.getStore(), LCV.getRegion())) { + return B.addBinding(BindingKey::Make(R, BindingKey::Default), + Val.value()); + } + } + FieldVector Fields; if (const CXXRecordDecl *Class = dyn_cast(RD)) diff --git a/clang/test/Analysis/ctor-trivial-copy.cpp b/clang/test/Analysis/ctor-trivial-copy.cpp index 5ed188aa8f1ea..41d0d97161bba 100644 --- a/clang/test/Analysis/ctor-trivial-copy.cpp +++ b/clang/test/Analysis/ctor-trivial-copy.cpp @@ -1,8 +1,12 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config c++-inlining=constructors -verify %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config c++-inlining=constructors -verify %s \ +// RUN: 2>&1 | FileCheck %s -template -void clang_analyzer_dump(T&); +void clang_analyzer_printState(); +template void clang_analyzer_dump_lref(T&); +template void clang_analyzer_dump_val(T); +template T conjure(); +template void nop(const Ts &... args) {} struct aggr { int x; @@ -15,20 +19,104 @@ struct empty { void test_copy_return() { aggr s1 = {1, 2}; aggr const& cr1 = aggr(s1); - clang_analyzer_dump(cr1); // expected-warning-re {{&lifetime_extended_object{aggr, cr1, S{{[0-9]+}}} }} + clang_analyzer_dump_lref(cr1); // expected-warning-re {{&lifetime_extended_object{aggr, cr1, S{{[0-9]+}}} }} empty s2; empty const& cr2 = empty{s2}; - clang_analyzer_dump(cr2); // expected-warning-re {{&lifetime_extended_object{empty, cr2, S{{[0-9]+}}} }} + clang_analyzer_dump_lref(cr2); // expected-warning-re {{&lifetime_extended_object{empty, cr2, S{{[0-9]+}}} }} } void test_assign_return() { aggr s1 = {1, 2}; aggr d1; - clang_analyzer_dump(d1 = s1); // expected-warning {{&d1 }} + clang_analyzer_dump_lref(d1 = s1); // expected-warning {{&d1 }} empty s2; empty d2; - clang_analyzer_dump(d2 = s2); // expected-warning {{&d2 }} was Unknown + clang_analyzer_dump_lref(d2 = s2); // expected-warning {{&d2 }} was Unknown } + +namespace trivial_struct_copy { + +void _01_empty_structs() { + clang_analyzer_dump_val(conjure()); // expected-warning {{lazyCompoundVal}} + empty Empty = conjure(); + empty Empty2 = Empty; + empty Empty3 = Empty2; + // All of these should refer to the exact same LCV, because all of + // these trivial copies refer to the original conjured value. + // There were Unknown before: + clang_analyzer_dump_val(Empty); // expected-warning {{lazyCompoundVal}} + clang_analyzer_dump_val(Empty2); // expected-warning {{lazyCompoundVal}} + clang_analyzer_dump_val(Empty3); // expected-warning {{lazyCompoundVal}} + + // We should have the same Conjured symbol for "Empty", "Empty2" and "Empty3". + clang_analyzer_printState(); + // CHECK: "store": { "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "cluster": "GlobalInternalSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$ + // CHECK-NEXT: ]}, + // CHECK-NEXT: { "cluster": "GlobalSystemSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$ + // CHECK-NEXT: ]}, + // CHECK-NEXT: { "cluster": "Empty", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ:conj_\$[0-9]+{int, LC[0-9]+, S[0-9]+, #[0-9]+}]]" } + // CHECK-NEXT: ]}, + // CHECK-NEXT: { "cluster": "Empty2", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ]]" } + // CHECK-NEXT: ]}, + // CHECK-NEXT: { "cluster": "Empty3", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ]]" } + // CHECK-NEXT: ]} + // CHECK-NEXT: ]}, + + nop(Empty, Empty2, Empty3); +} + +void _02_structs_with_members() { + clang_analyzer_dump_val(conjure()); // expected-warning {{lazyCompoundVal}} + aggr Aggr = conjure(); + aggr Aggr2 = Aggr; + aggr Aggr3 = Aggr2; + // All of these should refer to the exact same LCV, because all of + // these trivial copies refer to the original conjured value. + clang_analyzer_dump_val(Aggr); // expected-warning {{lazyCompoundVal}} + clang_analyzer_dump_val(Aggr2); // expected-warning {{lazyCompoundVal}} + clang_analyzer_dump_val(Aggr3); // expected-warning {{lazyCompoundVal}} + + // We should have the same Conjured symbol for "Aggr", "Aggr2" and "Aggr3". + // We used to have Derived symbols for the individual fields that were + // copied as part of copying the whole struct. + clang_analyzer_printState(); + // CHECK: "store": { "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "cluster": "GlobalInternalSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$ + // CHECK-NEXT: ]}, + // CHECK-NEXT: { "cluster": "GlobalSystemSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$ + // CHECK-NEXT: ]}, + // CHECK-NEXT: { "cluster": "Aggr", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[AGGR_CONJ:conj_\$[0-9]+{int, LC[0-9]+, S[0-9]+, #[0-9]+}]]" } + // CHECK-NEXT: ]}, + // CHECK-NEXT: { "cluster": "Aggr2", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[AGGR_CONJ]]" } + // CHECK-NEXT: ]}, + // CHECK-NEXT: { "cluster": "Aggr3", "pointer": "0x{{[0-9a-f]+}}", "items": [ + // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[AGGR_CONJ]]" } + // CHECK-NEXT: ]} + // CHECK-NEXT: ]}, + + nop(Aggr, Aggr2, Aggr3); +} + +// Tests that use `clang_analyzer_printState()` must share the analysis entry +// point, and have a strict ordering between. This is to meet the different +// `clang_analyzer_printState()` calls in a fixed relative ordering, thus +// FileCheck could check the stdouts. +void entrypoint() { + _01_empty_structs(); + _02_structs_with_members(); +} + +} // namespace trivial_struct_copy diff --git a/clang/test/Analysis/taint-generic.cpp b/clang/test/Analysis/taint-generic.cpp index 8092ac6f270b2..881c5baf889f6 100644 --- a/clang/test/Analysis/taint-generic.cpp +++ b/clang/test/Analysis/taint-generic.cpp @@ -1,10 +1,15 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,alpha.security.ArrayBoundV2 -analyzer-config optin.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml -Wno-format-security -verify -std=c++11 %s +// RUN: %clang_analyze_cc1 -std=c++11 -Wno-format-security \ +// RUN: -analyzer-checker=core,optin.taint,alpha.security.ArrayBoundV2,debug.ExprInspection \ +// RUN: -analyzer-config optin.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml \ +// RUN: -verify %s + +template void clang_analyzer_isTainted(T); #define BUFSIZE 10 int Buffer[BUFSIZE]; int scanf(const char*, ...); -int mySource1(); +template T mySource1(); int mySource3(); typedef struct _FILE FILE; @@ -136,3 +141,23 @@ void testReadingFromStdin(char **p) { fscanf(stdin, "%d", &n); Buffer[n] = 1; // expected-warning {{Potential out of bound access }} } + +namespace gh114270 { +class Empty {}; +class Aggr { +public: + int data; +}; + +void top() { + int Int = mySource1(); + clang_analyzer_isTainted(Int); // expected-warning {{YES}} + + Empty E = mySource1(); + clang_analyzer_isTainted(E); // expected-warning {{YES}} + + Aggr A = mySource1(); + clang_analyzer_isTainted(A); // expected-warning {{YES}} + clang_analyzer_isTainted(A.data); // expected-warning {{YES}} +} +} // namespace gh114270