From 5d1bb1db037ed9caa32bddfdb0d8db2796c749ab Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Fri, 2 Aug 2024 16:12:27 -0700 Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20change?= =?UTF-8?q?s=20to=20main=20this=20commit=20is=20based=20on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.4 [skip ci] --- compiler-rt/lib/asan/asan_globals.cpp | 130 ++++++++++++++---- .../llvm/Transforms/Utils/ModuleUtils.h | 9 ++ llvm/lib/Transforms/Utils/ModuleUtils.cpp | 44 ++++++ .../instrument_late_initializer.ll | 65 +++++++++ .../Transforms/Utils/ModuleUtilsTest.cpp | 125 +++++++++++++++-- 5 files changed, 332 insertions(+), 41 deletions(-) create mode 100644 llvm/test/Instrumentation/AddressSanitizer/instrument_late_initializer.ll diff --git a/compiler-rt/lib/asan/asan_globals.cpp b/compiler-rt/lib/asan/asan_globals.cpp index cc5308a24fe89..80030af934cc6 100644 --- a/compiler-rt/lib/asan/asan_globals.cpp +++ b/compiler-rt/lib/asan/asan_globals.cpp @@ -47,8 +47,6 @@ struct DynInitGlobal { bool initialized = false; DynInitGlobal *next = nullptr; }; -typedef IntrusiveList DynInitGlobals; -static DynInitGlobals dynamic_init_globals SANITIZER_GUARDED_BY(mu_for_globals); // We want to remember where a certain range of globals was registered. struct GlobalRegistrationSite { @@ -72,6 +70,25 @@ static ListOfGlobals &GlobalsByIndicator(uptr odr_indicator) return (*globals_by_indicator)[odr_indicator]; } +static const char *current_dynamic_init_module_name + SANITIZER_GUARDED_BY(mu_for_globals) = nullptr; + +using DynInitGlobalsByModule = + DenseMap>; + +// TODO: Add a NoDestroy helper, this patter is very common in sanitizers. +static DynInitGlobalsByModule &DynInitGlobals() + SANITIZER_REQUIRES(mu_for_globals) { + static DynInitGlobalsByModule *globals_by_module = nullptr; + if (!globals_by_module) { + alignas(alignof(DynInitGlobalsByModule)) static char + placeholder[sizeof(DynInitGlobalsByModule)]; + globals_by_module = new (placeholder) DynInitGlobalsByModule(); + } + + return *globals_by_module; +} + ALWAYS_INLINE void PoisonShadowForGlobal(const Global *g, u8 value) { FastPoisonShadow(g->beg, g->size_with_redzone, value); } @@ -257,8 +274,8 @@ static void RegisterGlobal(const Global *g) SANITIZER_REQUIRES(mu_for_globals) { AddGlobalToList(list_of_all_globals, g); if (g->has_dynamic_init) { - dynamic_init_globals.push_back(new (GetGlobalLowLevelAllocator()) - DynInitGlobal{*g, false}); + DynInitGlobals()[g->module_name].push_back( + new (GetGlobalLowLevelAllocator()) DynInitGlobal{*g, false}); } } @@ -284,20 +301,42 @@ static void UnregisterGlobal(const Global *g) } } -void StopInitOrderChecking() { - if (!flags()->check_initialization_order) - return; - Lock lock(&mu_for_globals); - flags()->check_initialization_order = false; - for (const DynInitGlobal &dyn_g : dynamic_init_globals) { +static void UnpoisonDynamicGlobals(IntrusiveList &dyn_globals, + bool mark_initialized) { + for (auto &dyn_g : dyn_globals) { const Global *g = &dyn_g.g; + if (dyn_g.initialized) + continue; // Unpoison the whole global. PoisonShadowForGlobal(g, 0); // Poison redzones back. PoisonRedZones(*g); + if (mark_initialized) + dyn_g.initialized = true; } } +static void PoisonDynamicGlobals( + const IntrusiveList &dyn_globals) { + for (auto &dyn_g : dyn_globals) { + const Global *g = &dyn_g.g; + if (dyn_g.initialized) + continue; + PoisonShadowForGlobal(g, kAsanInitializationOrderMagic); + } +} + +void StopInitOrderChecking() { + if (!flags()->check_initialization_order) + return; + Lock lock(&mu_for_globals); + flags()->check_initialization_order = false; + DynInitGlobals().forEach([&](auto &kv) { + UnpoisonDynamicGlobals(kv.second, /*mark_initialized=*/false); + return true; + }); +} + static bool IsASCII(unsigned char c) { return /*0x00 <= c &&*/ c <= 0x7F; } const char *MaybeDemangleGlobalName(const char *name) { @@ -456,36 +495,73 @@ void __asan_before_dynamic_init(const char *module_name) { CHECK(module_name); CHECK(AsanInited()); Lock lock(&mu_for_globals); + if (current_dynamic_init_module_name == module_name) + return; if (flags()->report_globals >= 3) Printf("DynInitPoison module: %s\n", module_name); - for (DynInitGlobal &dyn_g : dynamic_init_globals) { - const Global *g = &dyn_g.g; - if (dyn_g.initialized) - continue; - if (g->module_name != module_name) - PoisonShadowForGlobal(g, kAsanInitializationOrderMagic); - else if (!strict_init_order) - dyn_g.initialized = true; + + if (current_dynamic_init_module_name == nullptr) { + // First call, poison all globals from other modules. + DynInitGlobals().forEach([&](auto &kv) { + if (kv.first != module_name) { + PoisonDynamicGlobals(kv.second); + } else { + UnpoisonDynamicGlobals(kv.second, + /*mark_initialized=*/!strict_init_order); + } + return true; + }); + } else { + // Module changed. + PoisonDynamicGlobals(DynInitGlobals()[current_dynamic_init_module_name]); + UnpoisonDynamicGlobals(DynInitGlobals()[module_name], + /*mark_initialized=*/!strict_init_order); } + current_dynamic_init_module_name = module_name; } +#if SANITIZER_CAN_USE_PREINIT_ARRAY +static bool allow_after_dynamic_init = false; + +static void __attribute__((used)) AfterDynamicInit(void) { + if (flags()->report_globals >= 3) + Printf("AfterDynamicInit\n"); + if (allow_after_dynamic_init) + return; + allow_after_dynamic_init = true; + __asan_after_dynamic_init(); +} + +// Maybe SANITIZER_CAN_USE_PREINIT_ARRAY is to conservative for `.init_array`. +__attribute__((section(".init_array"), constructor(10000), + used)) static void (*__init)(void) = AfterDynamicInit; +#endif // SANITIZER_CAN_USE_PREINIT_ARRAY + // This method runs immediately after dynamic initialization in each TU, when // all dynamically initialized globals except for those defined in the current // TU are poisoned. It simply unpoisons all dynamically initialized globals. void __asan_after_dynamic_init() { +#if SANITIZER_CAN_USE_PREINIT_ARRAY + // Ignore all callback until the first one from .init_array, which should + // happed after all C++ global constructors. + if (!allow_after_dynamic_init) + return; +#endif + if (!flags()->check_initialization_order || !CanPoisonMemory()) return; CHECK(AsanInited()); Lock lock(&mu_for_globals); + if (!current_dynamic_init_module_name) + return; + if (flags()->report_globals >= 3) Printf("DynInitUnpoison\n"); - for (const DynInitGlobal &dyn_g : dynamic_init_globals) { - const Global *g = &dyn_g.g; - if (!dyn_g.initialized) { - // Unpoison the whole global. - PoisonShadowForGlobal(g, 0); - // Poison redzones back. - PoisonRedZones(*g); - } - } + + DynInitGlobals().forEach([&](auto &kv) { + UnpoisonDynamicGlobals(kv.second, /*mark_initialized=*/false); + return true; + }); + + current_dynamic_init_module_name = nullptr; } diff --git a/llvm/include/llvm/Transforms/Utils/ModuleUtils.h b/llvm/include/llvm/Transforms/Utils/ModuleUtils.h index 1ec87505544f8..37d6a3e33315a 100644 --- a/llvm/include/llvm/Transforms/Utils/ModuleUtils.h +++ b/llvm/include/llvm/Transforms/Utils/ModuleUtils.h @@ -30,6 +30,7 @@ class FunctionCallee; class GlobalIFunc; class GlobalValue; class Constant; +class ConstantStruct; class Value; class Type; @@ -44,6 +45,14 @@ void appendToGlobalCtors(Module &M, Function *F, int Priority, void appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data = nullptr); +/// Apply 'Fn' to the list of global ctors of module M and replace contructor +/// record with the one returned by `Fn`. If `nullptr` was returned, the +/// corresponding constructor will be removed from the array. For details see +/// https://llvm.org/docs/LangRef.html#the-llvm-global-ctors-global-variable +using GlobalCtorUpdateFn = llvm::function_ref; +void updateGlobalCtors(Module &M, const GlobalCtorUpdateFn &Fn); +void updateGlobalDtors(Module &M, const GlobalCtorUpdateFn &Fn); + /// Sets the KCFI type for the function. Used for compiler-generated functions /// that are indirectly called in instrumented code. void setKCFIType(Module &M, Function &F, StringRef MangledType); diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp index 122279160cc7e..e443d820a2256 100644 --- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -79,6 +79,50 @@ void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *D appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data); } +static void updateGlobalArray(StringRef ArrayName, Module &M, + const GlobalCtorUpdateFn &Fn) { + GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName); + if (!GVCtor) + return; + + IRBuilder<> IRB(M.getContext()); + SmallVector CurrentCtors; + bool Changed = false; + StructType *EltTy = + cast(GVCtor->getValueType()->getArrayElementType()); + if (Constant *Init = GVCtor->getInitializer()) { + CurrentCtors.reserve(Init->getNumOperands()); + for (Value *OP : Init->operands()) { + Constant *C = cast(OP); + Constant *NewC = Fn(C); + Changed |= (!NewC || NewC != C); + if (NewC) + CurrentCtors.push_back(NewC); + } + } + if (!Changed) + return; + + GVCtor->eraseFromParent(); + + // Create a new initializer. + ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size()); + Constant *NewInit = ConstantArray::get(AT, CurrentCtors); + + // Create the new global variable and replace all uses of + // the old global variable with the new one. + (void)new GlobalVariable(M, NewInit->getType(), false, + GlobalValue::AppendingLinkage, NewInit, ArrayName); +} + +void llvm::updateGlobalCtors(Module &M, const GlobalCtorUpdateFn &Fn) { + updateGlobalArray("llvm.global_ctors", M, Fn); +} + +void llvm::updateGlobalDtors(Module &M, const GlobalCtorUpdateFn &Fn) { + updateGlobalArray("llvm.global_dtors", M, Fn); +} + static void collectUsedGlobals(GlobalVariable *GV, SmallSetVector &Init) { if (!GV || !GV->hasInitializer()) diff --git a/llvm/test/Instrumentation/AddressSanitizer/instrument_late_initializer.ll b/llvm/test/Instrumentation/AddressSanitizer/instrument_late_initializer.ll new file mode 100644 index 0000000000000..45d526a42c9f7 --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/instrument_late_initializer.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --global-value-regex ".*global_ctors.*" --version 5 +; RUN: opt < %s -passes=asan -S | FileCheck %s +; RUN: opt < %s -passes=asan -S -asan-initialization-order=0 | FileCheck %s --check-prefixes=NOINIT + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__late_ctor, ptr null }] +@g = internal global i32 0, align 4, sanitize_address_dyninit ; With dynamic initializer. + +;. +; CHECK: @llvm.global_ctors = appending global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__late_ctor, ptr null }, { i32, ptr, ptr } { i32 1, ptr @asan.module_ctor, ptr @asan.module_ctor }] +;. +; NOINIT: @llvm.global_ctors = appending global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__late_ctor, ptr null }, { i32, ptr, ptr } { i32 1, ptr @asan.module_ctor, ptr @asan.module_ctor }] +;. +define i32 @initializer() uwtable { +; CHECK-LABEL: define i32 @initializer( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i32 42 +; +; NOINIT-LABEL: define i32 @initializer( +; NOINIT-SAME: ) #[[ATTR0:[0-9]+]] { +; NOINIT-NEXT: [[ENTRY:.*:]] +; NOINIT-NEXT: ret i32 42 +; +entry: + ret i32 42 +} + +define internal void @__cxx_global_var_init() section ".text.startup" { +; CHECK-LABEL: define internal void @__cxx_global_var_init() section ".text.startup" { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 @initializer() +; CHECK-NEXT: store i32 [[CALL]], ptr @g, align 4 +; CHECK-NEXT: ret void +; +; NOINIT-LABEL: define internal void @__cxx_global_var_init() section ".text.startup" { +; NOINIT-NEXT: [[ENTRY:.*:]] +; NOINIT-NEXT: [[CALL:%.*]] = call i32 @initializer() +; NOINIT-NEXT: store i32 [[CALL]], ptr @g, align 4 +; NOINIT-NEXT: ret void +; +entry: + %call = call i32 @initializer() + store i32 %call, ptr @g, align 4 + ret void +} + +define internal void @__late_ctor() sanitize_address section ".text.startup" { +; CHECK-LABEL: define internal void @__late_ctor( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] section ".text.startup" { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call void @__asan_before_dynamic_init(i64 ptrtoint (ptr @___asan_gen_ to i64)) +; CHECK-NEXT: call void @__asan_after_dynamic_init() +; CHECK-NEXT: ret void +; +; NOINIT-LABEL: define internal void @__late_ctor( +; NOINIT-SAME: ) #[[ATTR1:[0-9]+]] section ".text.startup" { +; NOINIT-NEXT: [[ENTRY:.*:]] +; NOINIT-NEXT: ret void +; +entry: + ret void +} diff --git a/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp b/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp index e1bc58fda0e38..582448a14ba8a 100644 --- a/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp +++ b/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp @@ -9,6 +9,7 @@ #include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/ADT/StringRef.h" #include "llvm/AsmParser/Parser.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/SourceMgr.h" @@ -16,7 +17,7 @@ using namespace llvm; -static std::unique_ptr parseIR(LLVMContext &C, const char *IR) { +static std::unique_ptr parseIR(LLVMContext &C, StringRef IR) { SMDiagnostic Err; std::unique_ptr Mod = parseAssemblyString(IR, Err, C); if (!Mod) @@ -24,12 +25,12 @@ static std::unique_ptr parseIR(LLVMContext &C, const char *IR) { return Mod; } -static int getUsedListSize(Module &M, StringRef Name) { - auto *UsedList = M.getGlobalVariable(Name); - if (!UsedList) +static int getListSize(Module &M, StringRef Name) { + auto *List = M.getGlobalVariable(Name); + if (!List) return 0; - auto *UsedListBaseArrayType = cast(UsedList->getValueType()); - return UsedListBaseArrayType->getNumElements(); + auto *T = cast(List->getValueType()); + return T->getNumElements(); } TEST(ModuleUtils, AppendToUsedList1) { @@ -41,13 +42,13 @@ TEST(ModuleUtils, AppendToUsedList1) { for (auto &G : M->globals()) { Globals.push_back(&G); } - EXPECT_EQ(0, getUsedListSize(*M, "llvm.compiler.used")); + EXPECT_EQ(0, getListSize(*M, "llvm.compiler.used")); appendToCompilerUsed(*M, Globals); - EXPECT_EQ(1, getUsedListSize(*M, "llvm.compiler.used")); + EXPECT_EQ(1, getListSize(*M, "llvm.compiler.used")); - EXPECT_EQ(0, getUsedListSize(*M, "llvm.used")); + EXPECT_EQ(0, getListSize(*M, "llvm.used")); appendToUsed(*M, Globals); - EXPECT_EQ(1, getUsedListSize(*M, "llvm.used")); + EXPECT_EQ(1, getListSize(*M, "llvm.used")); } TEST(ModuleUtils, AppendToUsedList2) { @@ -59,11 +60,107 @@ TEST(ModuleUtils, AppendToUsedList2) { for (auto &G : M->globals()) { Globals.push_back(&G); } - EXPECT_EQ(0, getUsedListSize(*M, "llvm.compiler.used")); + EXPECT_EQ(0, getListSize(*M, "llvm.compiler.used")); appendToCompilerUsed(*M, Globals); - EXPECT_EQ(1, getUsedListSize(*M, "llvm.compiler.used")); + EXPECT_EQ(1, getListSize(*M, "llvm.compiler.used")); - EXPECT_EQ(0, getUsedListSize(*M, "llvm.used")); + EXPECT_EQ(0, getListSize(*M, "llvm.used")); appendToUsed(*M, Globals); - EXPECT_EQ(1, getUsedListSize(*M, "llvm.used")); + EXPECT_EQ(1, getListSize(*M, "llvm.used")); +} + +using AppendFnType = decltype(&appendToGlobalCtors); +using UpdateFnType = decltype(&updateGlobalCtors); +using ParamType = std::tuple; +class ModuleUtilsTest : public testing::TestWithParam { +public: + StringRef arrayName() const { return std::get<0>(GetParam()); } + AppendFnType appendFn() const { return std::get(GetParam()); } + UpdateFnType updateFn() const { return std::get(GetParam()); } +}; + +INSTANTIATE_TEST_SUITE_P( + ModuleUtilsTestCtors, ModuleUtilsTest, + ::testing::Values(ParamType{"llvm.global_ctors", &appendToGlobalCtors, + &updateGlobalCtors}, + ParamType{"llvm.global_dtors", &appendToGlobalDtors, + &updateGlobalDtors})); + +TEST_P(ModuleUtilsTest, AppendToMissingArray) { + LLVMContext C; + + std::unique_ptr M = parseIR(C, ""); + + EXPECT_EQ(0, getListSize(*M, arrayName())); + Function *F = cast( + M->getOrInsertFunction("ctor", Type::getVoidTy(C)).getCallee()); + appendFn()(*M, F, 11, F); + ASSERT_EQ(1, getListSize(*M, arrayName())); + + ConstantArray *CA = dyn_cast( + M->getGlobalVariable(arrayName())->getInitializer()); + ASSERT_NE(nullptr, CA); + ConstantStruct *CS = dyn_cast(CA->getOperand(0)); + ASSERT_NE(nullptr, CS); + ConstantInt *Pri = dyn_cast(CS->getOperand(0)); + ASSERT_NE(nullptr, Pri); + EXPECT_EQ(11u, Pri->getLimitedValue()); + EXPECT_EQ(F, dyn_cast(CS->getOperand(1))); + EXPECT_EQ(F, CS->getOperand(2)); +} + +TEST_P(ModuleUtilsTest, AppendToArray) { + LLVMContext C; + + std::unique_ptr M = + parseIR(C, (R"(@)" + arrayName() + + R"( = appending global [2 x { i32, ptr, ptr }] [ + { i32, ptr, ptr } { i32 65535, ptr null, ptr null }, + { i32, ptr, ptr } { i32 0, ptr null, ptr null }] + )") + .str()); + + EXPECT_EQ(2, getListSize(*M, arrayName())); + appendFn()( + *M, + cast( + M->getOrInsertFunction("ctor", Type::getVoidTy(C)).getCallee()), + 11, nullptr); + EXPECT_EQ(3, getListSize(*M, arrayName())); +} + +TEST_P(ModuleUtilsTest, UpdateArray) { + LLVMContext C; + + std::unique_ptr M = + parseIR(C, (R"(@)" + arrayName() + + R"( = appending global [2 x { i32, ptr, ptr }] [ + { i32, ptr, ptr } { i32 65535, ptr null, ptr null }, + { i32, ptr, ptr } { i32 0, ptr null, ptr null }] + )") + .str()); + + EXPECT_EQ(2, getListSize(*M, arrayName())); + updateFn()(*M, [](Constant *C) -> Constant * { + ConstantStruct *CS = dyn_cast(C); + if (!CS) + return nullptr; + StructType *EltTy = cast(C->getType()); + Constant *CSVals[3] = { + ConstantInt::getSigned(CS->getOperand(0)->getType(), 12), + CS->getOperand(1), + CS->getOperand(2), + }; + return ConstantStruct::get(EltTy, + ArrayRef(CSVals, EltTy->getNumElements())); + }); + EXPECT_EQ(1, getListSize(*M, arrayName())); + ConstantArray *CA = dyn_cast( + M->getGlobalVariable(arrayName())->getInitializer()); + ASSERT_NE(nullptr, CA); + ConstantStruct *CS = dyn_cast(CA->getOperand(0)); + ASSERT_NE(nullptr, CS); + ConstantInt *Pri = dyn_cast(CS->getOperand(0)); + ASSERT_NE(nullptr, Pri); + EXPECT_EQ(12u, Pri->getLimitedValue()); }