Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
//===- Transforms/Instrumentation/OffloadSanitizer.h ------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Pass to instrument offload code in order to detect errors and communicate
// them to the LLVM/Offload runtimes.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H
#define LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H

#include "llvm/IR/PassManager.h"

namespace llvm {

class OffloadSanitizerPass : public PassInfoMixin<OffloadSanitizerPass> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};
} // end namespace llvm

#endif // LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H
1 change: 1 addition & 0 deletions llvm/lib/Passes/PassBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@
#include "llvm/Transforms/Instrumentation/MemProfiler.h"
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
#include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h"
#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h"
#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h"
#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
#include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Passes/PassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ MODULE_PASS("tsan-module", ModuleThreadSanitizerPass())
MODULE_PASS("verify", VerifierPass())
MODULE_PASS("view-callgraph", CallGraphViewerPass())
MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass())
MODULE_PASS("offload-sanitizer", OffloadSanitizerPass())
#undef MODULE_PASS

#ifndef MODULE_PASS_WITH_PARAMS
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
#include "llvm/Transforms/IPO/ExpandVariadics.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
#include "llvm/Transforms/Scalar/FlattenCFG.h"
Expand Down Expand Up @@ -444,6 +445,10 @@ static cl::opt<bool> EnableHipStdPar(
cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false),
cl::Hidden);

static cl::opt<bool>
EnableOffloadSanitizer("amdgpu-enable-offload-sanitizer",
cl::desc("Enable the offload sanitizer"),
cl::init(false), cl::Hidden);
static cl::opt<bool>
EnableAMDGPUAttributor("amdgpu-attributor-enable",
cl::desc("Enable AMDGPUAttributorPass"),
Expand Down Expand Up @@ -823,6 +828,9 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {

PB.registerFullLinkTimeOptimizationLastEPCallback(
[this](ModulePassManager &PM, OptimizationLevel Level) {
if (EnableOffloadSanitizer)
PM.addPass(OffloadSanitizerPass());

// We want to support the -lto-partitions=N option as "best effort".
// For that, we need to lower LDS earlier in the pipeline before the
// module is partitioned for codegen.
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ add_llvm_target(AMDGPUCodeGen
Core
GlobalISel
HipStdPar
Instrumentation
IPO
IRPrinter
Instrumentation
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Instrumentation/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ add_llvm_component_library(LLVMInstrumentation
MemProfiler.cpp
MemorySanitizer.cpp
NumericalStabilitySanitizer.cpp
OffloadSanitizer.cpp
IndirectCallPromotion.cpp
InstrOrderFile.cpp
InstrProfiling.cpp
Expand Down
160 changes: 160 additions & 0 deletions llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
//===-- OffloadSanitizer.cpp - Offload sanitizer --------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h"

#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Value.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"

using namespace llvm;

#define DEBUG_TYPE "offload-sanitizer"

namespace {

class OffloadSanitizerImpl final {
public:
OffloadSanitizerImpl(Module &M, FunctionAnalysisManager &FAM)
: M(M), FAM(FAM), Ctx(M.getContext()) {}

bool instrument();

private:
bool shouldInstrumentFunction(Function &Fn);
bool instrumentFunction(Function &Fn);
bool instrumentTrapInstructions(SmallVectorImpl<IntrinsicInst *> &TrapCalls);

FunctionCallee getOrCreateFn(FunctionCallee &FC, StringRef Name, Type *RetTy,
ArrayRef<Type *> ArgTys) {
if (!FC) {
auto *NewAllocationFnTy = FunctionType::get(RetTy, ArgTys, false);
FC = M.getOrInsertFunction(Name, NewAllocationFnTy);
}
return FC;
}

/// void __offload_san_trap_info(Int64Ty);
FunctionCallee TrapInfoFn;
FunctionCallee getTrapInfoFn() {
return getOrCreateFn(TrapInfoFn, "__offload_san_trap_info", VoidTy,
{/*PC*/ Int64Ty});
}

CallInst *createCall(IRBuilder<> &IRB, FunctionCallee Callee,
ArrayRef<Value *> Args = std::nullopt,
const Twine &Name = "") {
Calls.push_back(IRB.CreateCall(Callee, Args, Name));
return Calls.back();
}
SmallVector<CallInst *> Calls;

Value *getPC(IRBuilder<> &IRB) {
return IRB.CreateIntrinsic(Int64Ty, Intrinsic::amdgcn_s_getpc, {}, nullptr,
"PC");
}

Module &M;
FunctionAnalysisManager &FAM;
LLVMContext &Ctx;

Type *VoidTy = Type::getVoidTy(Ctx);
Type *IntptrTy = M.getDataLayout().getIntPtrType(Ctx);
PointerType *PtrTy = PointerType::getUnqual(Ctx);
IntegerType *Int8Ty = Type::getInt8Ty(Ctx);
IntegerType *Int32Ty = Type::getInt32Ty(Ctx);
IntegerType *Int64Ty = Type::getInt64Ty(Ctx);

const DataLayout &DL = M.getDataLayout();
};

} // end anonymous namespace

bool OffloadSanitizerImpl::shouldInstrumentFunction(Function &Fn) {
if (Fn.isDeclaration())
return false;
if (Fn.getName().contains("ompx") || Fn.getName().contains("__kmpc") ||
Fn.getName().starts_with("rpc_"))
return false;
return !Fn.hasFnAttribute(Attribute::DisableSanitizerInstrumentation);
}

bool OffloadSanitizerImpl::instrumentTrapInstructions(
SmallVectorImpl<IntrinsicInst *> &TrapCalls) {
bool Changed = false;
for (auto *II : TrapCalls) {
IRBuilder<> IRB(II);
createCall(IRB, getTrapInfoFn(), {getPC(IRB)});
}
return Changed;
}

bool OffloadSanitizerImpl::instrumentFunction(Function &Fn) {
if (!shouldInstrumentFunction(Fn))
return false;

SmallVector<IntrinsicInst *> TrapCalls;

bool Changed = false;
for (auto &I : instructions(Fn)) {
switch (I.getOpcode()) {
case Instruction::Call: {
auto &CI = cast<CallInst>(I);
if (auto *II = dyn_cast<IntrinsicInst>(&CI))
if (II->getIntrinsicID() == Intrinsic::trap)
TrapCalls.push_back(II);
break;
}
default:
break;
}
}

Changed |= instrumentTrapInstructions(TrapCalls);

return Changed;
}

bool OffloadSanitizerImpl::instrument() {
bool Changed = false;

for (Function &Fn : M)
Changed |= instrumentFunction(Fn);

removeFromUsedLists(M, [&](Constant *C) {
if (!C->getName().starts_with("__offload_san"))
return false;
return Changed = true;
});

return Changed;
}

PreservedAnalyses OffloadSanitizerPass::run(Module &M,
ModuleAnalysisManager &AM) {
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
OffloadSanitizerImpl Impl(M, FAM);
if (!Impl.instrument())
return PreservedAnalyses::all();
LLVM_DEBUG(M.dump());
return PreservedAnalyses::none();
}
56 changes: 56 additions & 0 deletions llvm/test/Instrumentation/OffloadSanitizer/basic.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5

target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
target triple = "amdgcn-amd-amdhsa"

; Test basic offload sanitizer trap instrumentation.

; RUN: opt < %s -passes=offload-sanitizer -S | FileCheck --check-prefixes=CHECK %s

define void @test_trap1() {
; CHECK-LABEL: define void @test_trap1() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc()
; CHECK-NEXT: call void @__offload_san_trap_info(i64 [[PC]])
; CHECK-NEXT: call void @llvm.trap()
; CHECK-NEXT: ret void
;
entry:
call void @llvm.trap()
ret void
}

define void @test_trap2() {
; CHECK-LABEL: define void @test_trap2() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc()
; CHECK-NEXT: call void @__offload_san_trap_info(i64 [[PC]])
; CHECK-NEXT: call void @llvm.trap()
; CHECK-NEXT: unreachable
;
entry:
call void @llvm.trap()
unreachable
}

define void @test_trap3(i1 %c) {
; CHECK-LABEL: define void @test_trap3(
; CHECK-SAME: i1 [[C:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br i1 [[C]], label %[[T:.*]], label %[[F:.*]]
; CHECK: [[T]]:
; CHECK-NEXT: [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc()
; CHECK-NEXT: call void @__offload_san_trap_info(i64 [[PC]])
; CHECK-NEXT: call void @llvm.trap()
; CHECK-NEXT: unreachable
; CHECK: [[F]]:
; CHECK-NEXT: ret void
;
entry:
br i1 %c, label %t ,label %f
t:
call void @llvm.trap()
unreachable
f:
ret void
}
1 change: 1 addition & 0 deletions offload/DeviceRTL/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ set(src_files
${source_directory}/Parallelism.cpp
${source_directory}/Profiling.cpp
${source_directory}/Reduction.cpp
${source_directory}/Sanitizer.cpp
${source_directory}/State.cpp
${source_directory}/Synchronization.cpp
${source_directory}/Tasking.cpp
Expand Down
3 changes: 3 additions & 0 deletions offload/DeviceRTL/include/DeviceUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ int64_t shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta, int32_t Width);

uint64_t ballotSync(uint64_t Mask, int32_t Pred);

/// Terminate the execution of this warp.
void terminateWarp();

/// Return \p LowBits and \p HighBits packed into a single 64 bit value.
uint64_t pack(uint32_t LowBits, uint32_t HighBits);

Expand Down
7 changes: 7 additions & 0 deletions offload/DeviceRTL/src/DeviceUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
int32_t Width);

uint64_t ballotSync(uint64_t Mask, int32_t Pred);
void terminateWarp();

/// AMDGCN Implementation
///
Expand All @@ -62,6 +63,8 @@ uint64_t ballotSync(uint64_t Mask, int32_t Pred) {
return Mask & __builtin_amdgcn_ballot_w64(Pred);
}

void terminateWarp() { __builtin_amdgcn_endpgm(); }

bool isSharedMemPtr(const void *Ptr) {
return __builtin_amdgcn_is_shared(
(const __attribute__((address_space(0))) void *)Ptr);
Expand Down Expand Up @@ -89,6 +92,8 @@ uint64_t ballotSync(uint64_t Mask, int32_t Pred) {
return __nvvm_vote_ballot_sync(static_cast<uint32_t>(Mask), Pred);
}

void terminateWarp() { __nvvm_exit(); }

bool isSharedMemPtr(const void *Ptr) { return __nvvm_isspacep_shared(Ptr); }

#pragma omp end declare variant
Expand Down Expand Up @@ -126,6 +131,8 @@ uint64_t utils::ballotSync(uint64_t Mask, int32_t Pred) {
return impl::ballotSync(Mask, Pred);
}

void utils::terminateWarp() { return impl::terminateWarp(); }

bool utils::isSharedMemPtr(void *Ptr) { return impl::isSharedMemPtr(Ptr); }

extern "C" {
Expand Down
Loading