Skip to content

Commit cbf8353

Browse files
authored
Merge pull request #9 from EthanLuisMcDonough/gpusan_shared
GPU Sanitizer support for shared memory
2 parents acf4219 + 8703c6d commit cbf8353

File tree

16 files changed

+805
-167
lines changed

16 files changed

+805
-167
lines changed

llvm/lib/Transforms/Instrumentation/GPUSan.cpp

Lines changed: 320 additions & 113 deletions
Large diffs are not rendered by default.

offload/DeviceRTL/src/Sanitizer.cpp

Lines changed: 131 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ struct AllocationInfoGlobalTy {
3333
uint64_t Length;
3434
uint32_t Tag;
3535
};
36+
struct AllocationInfoSharedTy {
37+
_AS_PTR(void, AllocationKind::SHARED) Start;
38+
uint64_t Length;
39+
uint32_t Tag;
40+
};
3641

3742
template <AllocationKind AK> struct AllocationInfoTy {};
3843
template <> struct AllocationInfoTy<AllocationKind::GLOBAL> {
@@ -41,6 +46,9 @@ template <> struct AllocationInfoTy<AllocationKind::GLOBAL> {
4146
template <> struct AllocationInfoTy<AllocationKind::LOCAL> {
4247
using ASVoidPtrTy = AllocationInfoLocalTy;
4348
};
49+
template <> struct AllocationInfoTy<AllocationKind::SHARED> {
50+
using ASVoidPtrTy = AllocationInfoSharedTy;
51+
};
4452

4553
template <>
4654
AllocationPtrTy<AllocationKind::LOCAL>
@@ -88,7 +96,7 @@ template <AllocationKind AK> struct AllocationTracker {
8896
// Reserve the 0 element for the null pointer in global space.
8997
auto &AllocArr = getAllocationArray<AK>();
9098
auto &Cnt = AllocArr.Cnt;
91-
if constexpr (AK == AllocationKind::LOCAL)
99+
if constexpr (AK == AllocationKind::LOCAL || AK == AllocationKind::SHARED)
92100
Slot = ++Cnt;
93101
if (Slot == -1)
94102
Slot = ++Cnt;
@@ -155,10 +163,14 @@ template <AllocationKind AK> struct AllocationTracker {
155163
if constexpr (AK == AllocationKind::LOCAL)
156164
if (Length == 0)
157165
Length = getAllocation<AK>(AP, AccessId, PC).Length;
158-
if constexpr (AK == AllocationKind::GLOBAL)
159-
if (AP.Magic != SanitizerConfig<AllocationKind::GLOBAL>::MAGIC)
166+
if constexpr (AK == AllocationKind::GLOBAL ||
167+
AK == AllocationKind::SHARED) {
168+
if (AP.Magic != SanitizerConfig<AllocationKind::GLOBAL>::MAGIC) {
160169
__sanitizer_trap_info_ptr->garbagePointer<AK>(AP, (void *)P, SourceId,
161170
PC);
171+
}
172+
}
173+
162174
int64_t Offset = AP.Offset;
163175
if (OMP_UNLIKELY(
164176
Offset > Length - Size ||
@@ -212,29 +224,40 @@ template <AllocationKind AK> struct AllocationTracker {
212224
__sanitizer_trap_info_ptr->memoryLeak<AK>(A, Slot);
213225
}
214226
}
227+
228+
[[clang::disable_sanitizer_instrumentation]] static bool
229+
checkPtr(void *P, int64_t SourceId, uint64_t PC) {
230+
auto AP = AllocationPtrTy<AK>::get(P);
231+
if ((AllocationKind)AP.Kind != AK)
232+
return false;
233+
if (AP.Magic != SanitizerConfig<AK>::MAGIC)
234+
__sanitizer_trap_info_ptr->garbagePointer<AK>(AP, P, SourceId, PC);
235+
return true;
236+
}
215237
};
216238

217239
template <AllocationKind AK>
218240
AllocationArrayTy<AK>
219241
Allocations<AK>::Arr[SanitizerConfig<AK>::NUM_ALLOCATION_ARRAYS];
220242

221-
static void checkForMagic(bool IsGlobal, void *P, int64_t SourceId,
222-
uint64_t PC) {
223-
if (IsGlobal) {
224-
auto AP = AllocationPtrTy<AllocationKind::GLOBAL>::get(P);
225-
if (AP.Magic != SanitizerConfig<AllocationKind::GLOBAL>::MAGIC)
226-
__sanitizer_trap_info_ptr->garbagePointer<AllocationKind::GLOBAL>(
227-
AP, P, SourceId, PC);
228-
} else {
229-
auto AP = AllocationPtrTy<AllocationKind::LOCAL>::get(P);
230-
if (AP.Magic != SanitizerConfig<AllocationKind::LOCAL>::MAGIC)
231-
__sanitizer_trap_info_ptr->garbagePointer<AllocationKind::LOCAL>(
232-
AP, P, SourceId, PC);
233-
}
243+
[[clang::disable_sanitizer_instrumentation,
244+
gnu::always_inline]] static AllocationKind
245+
getFakePtrType(void *P, int64_t SourceId, uint64_t PC) {
246+
if (AllocationTracker<AllocationKind::SHARED>::checkPtr(P, SourceId, PC))
247+
return AllocationKind::SHARED;
248+
if (AllocationTracker<AllocationKind::GLOBAL>::checkPtr(P, SourceId, PC))
249+
return AllocationKind::GLOBAL;
250+
if (AllocationTracker<AllocationKind::LOCAL>::checkPtr(P, SourceId, PC))
251+
return AllocationKind::LOCAL;
252+
253+
// Couldn't determine type
254+
__sanitizer_trap_info_ptr->garbagePointer<AllocationKind::LOCAL>(
255+
AllocationPtrTy<AllocationKind::LOCAL>::get(P), P, SourceId, PC);
234256
}
235257

236258
extern "C" {
237259

260+
#define REAL_PTR_IS_SHARED(PTR) (isSharedMemPtr(PTR))
238261
#define REAL_PTR_IS_LOCAL(PTR) (isThreadLocalMemPtr(PTR))
239262
#define IS_GLOBAL(PTR) ((uintptr_t)PTR & (1UL << 63))
240263

@@ -253,6 +276,14 @@ extern "C" {
253276
return AllocationTracker<AllocationKind::GLOBAL>::create(
254277
Start, Length, AllocationId, -1, SourceId, PC);
255278
}
279+
[[clang::disable_sanitizer_instrumentation, gnu::flatten, gnu::always_inline,
280+
gnu::used, gnu::retain]] _AS_PTR(void, AllocationKind::SHARED)
281+
ompx_new_shared(_AS_PTR(void, AllocationKind::SHARED) Start,
282+
uint64_t Length, int64_t AllocationId, int64_t SourceId,
283+
uint64_t PC) {
284+
return AllocationTracker<AllocationKind::SHARED>::create(
285+
Start, Length, AllocationId, 0, SourceId, PC);
286+
}
256287
[[clang::disable_sanitizer_instrumentation, gnu::flatten, gnu::always_inline,
257288
gnu::used, gnu::retain]] void
258289
__sanitizer_register_host(_AS_PTR(void, AllocationKind::GLOBAL) Start,
@@ -264,6 +295,9 @@ __sanitizer_register_host(_AS_PTR(void, AllocationKind::GLOBAL) Start,
264295
gnu::used, gnu::retain]] void *
265296
ompx_new(void *Start, uint64_t Length, int64_t AllocationId, int64_t SourceId,
266297
uint64_t PC) {
298+
if (REAL_PTR_IS_SHARED(Start))
299+
return (void *)ompx_new_shared((_AS_PTR(void, AllocationKind::SHARED))Start,
300+
Length, AllocationId, SourceId, PC);
267301
if (REAL_PTR_IS_LOCAL(Start))
268302
return (void *)ompx_new_local((_AS_PTR(void, AllocationKind::LOCAL))Start,
269303
Length, AllocationId, SourceId, PC);
@@ -290,14 +324,23 @@ ompx_free_local(_AS_PTR(void, AllocationKind::LOCAL) P, int64_t SourceId) {
290324
ompx_free_global(_AS_PTR(void, AllocationKind::GLOBAL) P, int64_t SourceId) {
291325
return AllocationTracker<AllocationKind::GLOBAL>::remove(P, SourceId);
292326
}
327+
[[clang::disable_sanitizer_instrumentation, gnu::flatten, gnu::always_inline,
328+
gnu::used, gnu::retain]] void
329+
ompx_free_shared(_AS_PTR(void, AllocationKind::SHARED) P, int64_t SourceId) {
330+
return AllocationTracker<AllocationKind::SHARED>::remove(P, SourceId);
331+
}
293332
[[clang::disable_sanitizer_instrumentation, gnu::flatten, gnu::always_inline,
294333
gnu::used, gnu::retain]] void
295334
ompx_free(void *P, int64_t SourceId, uint64_t PC) {
296-
bool IsGlobal = IS_GLOBAL(P);
297-
checkForMagic(IsGlobal, P, SourceId, PC);
298-
if (IsGlobal)
335+
auto PtrKind = getFakePtrType(P, SourceId, PC);
336+
switch (PtrKind) {
337+
case AllocationKind::GLOBAL:
299338
return ompx_free_global((_AS_PTR(void, AllocationKind::GLOBAL))P, SourceId);
300-
return ompx_free_local((_AS_PTR(void, AllocationKind::LOCAL))P, SourceId);
339+
case AllocationKind::LOCAL:
340+
return ompx_free_local((_AS_PTR(void, AllocationKind::LOCAL))P, SourceId);
341+
case AllocationKind::SHARED:
342+
return ompx_free_shared((_AS_PTR(void, AllocationKind::SHARED))P, SourceId);
343+
}
301344
}
302345

303346
[[clang::disable_sanitizer_instrumentation, gnu::flatten, gnu::always_inline,
@@ -313,16 +356,28 @@ ompx_free(void *P, int64_t SourceId, uint64_t PC) {
313356
return AllocationTracker<AllocationKind::GLOBAL>::advance(P, Offset,
314357
SourceId);
315358
}
359+
[[clang::disable_sanitizer_instrumentation, gnu::flatten, gnu::always_inline,
360+
gnu::used, gnu::retain]] _AS_PTR(void, AllocationKind::SHARED)
361+
ompx_gep_shared(_AS_PTR(void, AllocationKind::SHARED) P, uint64_t Offset,
362+
int64_t SourceId) {
363+
return AllocationTracker<AllocationKind::SHARED>::advance(P, Offset,
364+
SourceId);
365+
}
316366
[[clang::disable_sanitizer_instrumentation, gnu::flatten, gnu::always_inline,
317367
gnu::used, gnu::retain]] void *
318368
ompx_gep(void *P, uint64_t Offset, int64_t SourceId) {
319-
bool IsGlobal = IS_GLOBAL(P);
320-
checkForMagic(IsGlobal, P, SourceId, /*PC=*/0);
321-
if (IsGlobal)
369+
auto PtrKind = getFakePtrType(P, SourceId, 0);
370+
switch (PtrKind) {
371+
case AllocationKind::GLOBAL:
322372
return (void *)ompx_gep_global((_AS_PTR(void, AllocationKind::GLOBAL))P,
323373
Offset, SourceId);
324-
return (void *)ompx_gep_local((_AS_PTR(void, AllocationKind::LOCAL))P, Offset,
325-
SourceId);
374+
case AllocationKind::LOCAL:
375+
return (void *)ompx_gep_local((_AS_PTR(void, AllocationKind::LOCAL))P,
376+
Offset, SourceId);
377+
case AllocationKind::SHARED:
378+
return (void *)ompx_gep_shared((_AS_PTR(void, AllocationKind::SHARED))P,
379+
Offset, SourceId);
380+
}
326381
}
327382

328383
[[clang::disable_sanitizer_instrumentation, gnu::flatten, gnu::always_inline,
@@ -339,17 +394,29 @@ ompx_gep(void *P, uint64_t Offset, int64_t SourceId) {
339394
return AllocationTracker<AllocationKind::GLOBAL>::check(P, Size, AccessId,
340395
SourceId, PC);
341396
}
397+
[[clang::disable_sanitizer_instrumentation, gnu::flatten, gnu::always_inline,
398+
gnu::used, gnu::retain]] _AS_PTR(void, AllocationKind::SHARED)
399+
ompx_check_shared(_AS_PTR(void, AllocationKind::SHARED) P, uint64_t Size,
400+
uint64_t AccessId, int64_t SourceId, uint64_t PC) {
401+
return AllocationTracker<AllocationKind::SHARED>::check(P, Size, AccessId,
402+
SourceId, PC);
403+
}
342404
[[clang::disable_sanitizer_instrumentation, gnu::flatten, gnu::always_inline,
343405
gnu::used, gnu::retain]] void *
344406
ompx_check(void *P, uint64_t Size, uint64_t AccessId, int64_t SourceId,
345407
uint64_t PC) {
346-
bool IsGlobal = IS_GLOBAL(P);
347-
checkForMagic(IsGlobal, P, SourceId, PC);
348-
if (IsGlobal)
408+
auto PtrKind = getFakePtrType(P, SourceId, PC);
409+
switch (PtrKind) {
410+
case AllocationKind::GLOBAL:
349411
return (void *)ompx_check_global((_AS_PTR(void, AllocationKind::GLOBAL))P,
350412
Size, AccessId, SourceId, PC);
351-
return (void *)ompx_check_local((_AS_PTR(void, AllocationKind::LOCAL))P, Size,
352-
AccessId, SourceId, PC);
413+
case AllocationKind::LOCAL:
414+
return (void *)ompx_check_local((_AS_PTR(void, AllocationKind::LOCAL))P,
415+
Size, AccessId, SourceId, PC);
416+
case AllocationKind::SHARED:
417+
return (void *)ompx_check_shared((_AS_PTR(void, AllocationKind::SHARED))P,
418+
Size, AccessId, SourceId, PC);
419+
}
353420
}
354421

355422
[[clang::disable_sanitizer_instrumentation, gnu::flatten, gnu::always_inline,
@@ -374,6 +441,17 @@ ompx_check(void *P, uint64_t Size, uint64_t AccessId, int64_t SourceId,
374441
P, Start, Length, Tag, Size, AccessId, SourceId, PC);
375442
}
376443

444+
[[clang::disable_sanitizer_instrumentation, gnu::flatten, gnu::always_inline,
445+
gnu::used, gnu::retain]] _AS_PTR(void, AllocationKind::SHARED)
446+
ompx_check_with_base_shared(_AS_PTR(void, AllocationKind::SHARED) P,
447+
_AS_PTR(void, AllocationKind::SHARED) Start,
448+
uint64_t Length, uint32_t Tag, uint64_t Size,
449+
uint64_t AccessId, int64_t SourceId,
450+
uint64_t PC) {
451+
return AllocationTracker<AllocationKind::SHARED>::checkWithBase(
452+
P, Start, Length, Tag, Size, AccessId, SourceId, PC);
453+
}
454+
377455
[[clang::disable_sanitizer_instrumentation, gnu::flatten, gnu::always_inline,
378456
gnu::used, gnu::retain]] _AS_PTR(void, AllocationKind::LOCAL)
379457
ompx_unpack_local(_AS_PTR(void, AllocationKind::LOCAL) P,
@@ -388,16 +466,29 @@ ompx_check(void *P, uint64_t Size, uint64_t AccessId, int64_t SourceId,
388466
return AllocationTracker<AllocationKind::GLOBAL>::unpack(P, SourceId,
389467
/*PC=*/0);
390468
}
469+
[[clang::disable_sanitizer_instrumentation, gnu::flatten, gnu::always_inline,
470+
gnu::used, gnu::retain]] _AS_PTR(void, AllocationKind::SHARED)
471+
ompx_unpack_shared(_AS_PTR(void, AllocationKind::SHARED) P,
472+
int64_t SourceId) {
473+
return AllocationTracker<AllocationKind::SHARED>::unpack(P, SourceId,
474+
/*PC=*/0);
475+
}
391476
[[clang::disable_sanitizer_instrumentation, gnu::flatten, gnu::always_inline,
392477
gnu::used, gnu::retain]] void *
393478
ompx_unpack(void *P, int64_t SourceId) {
394-
bool IsGlobal = IS_GLOBAL(P);
395-
checkForMagic(IsGlobal, P, SourceId, /*PC=*/0);
396-
if (IsGlobal)
479+
printf("UNPACK GENERIC %p\n", P);
480+
auto PtrKind = getFakePtrType(P, SourceId, 0);
481+
switch (PtrKind) {
482+
case AllocationKind::GLOBAL:
397483
return (void *)ompx_unpack_global((_AS_PTR(void, AllocationKind::GLOBAL))P,
398484
SourceId);
399-
return (void *)ompx_unpack_local((_AS_PTR(void, AllocationKind::LOCAL))P,
400-
SourceId);
485+
case AllocationKind::LOCAL:
486+
return (void *)ompx_unpack_local((_AS_PTR(void, AllocationKind::LOCAL))P,
487+
SourceId);
488+
case AllocationKind::SHARED:
489+
return (void *)ompx_unpack_shared((_AS_PTR(void, AllocationKind::SHARED))P,
490+
SourceId);
491+
}
401492
}
402493

403494
[[clang::disable_sanitizer_instrumentation, gnu::flatten, gnu::always_inline,
@@ -421,6 +512,11 @@ ompx_get_allocation_info_local(_AS_PTR(void, AllocationKind::LOCAL) P) {
421512
ompx_get_allocation_info_global(_AS_PTR(void, AllocationKind::GLOBAL) P) {
422513
return AllocationTracker<AllocationKind::GLOBAL>::getAllocationInfo(P);
423514
}
515+
[[clang::disable_sanitizer_instrumentation, gnu::flatten, gnu::always_inline,
516+
gnu::used, gnu::retain]] struct AllocationInfoSharedTy
517+
ompx_get_allocation_info_shared(_AS_PTR(void, AllocationKind::SHARED) P) {
518+
return AllocationTracker<AllocationKind::SHARED>::getAllocationInfo(P);
519+
}
424520

425521
[[clang::disable_sanitizer_instrumentation, gnu::flatten, gnu::always_inline,
426522
gnu::used, gnu::retain]] void

offload/include/Shared/Sanitizer.h

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ int64_t __san_get_location_value();
2424
#define INLINE gnu::always_inline
2525
#define NOINLINE gnu::noinline
2626

27-
enum class AllocationKind { LOCAL, GLOBAL, LAST = GLOBAL };
27+
enum class AllocationKind { LOCAL, GLOBAL, SHARED, LAST = SHARED };
28+
constexpr uint32_t FAKE_PTR_KIND_BITS = 2;
2829

2930
template <AllocationKind AK> struct ASTypes {
3031
using INT_TY = uint64_t;
@@ -33,22 +34,36 @@ template <AllocationKind AK> struct ASTypes {
3334
template <> struct ASTypes<AllocationKind::LOCAL> {
3435
using INT_TY = uint32_t;
3536
};
37+
template <> struct ASTypes<AllocationKind::SHARED> {
38+
using INT_TY = uint32_t;
39+
};
3640
#pragma omp end declare variant
3741

42+
template <AllocationKind AK> struct ASAddrSpace {};
43+
template <> struct ASAddrSpace<AllocationKind::GLOBAL> {
44+
static constexpr uint32_t ADDR_SPACE = 0;
45+
};
46+
template <> struct ASAddrSpace<AllocationKind::LOCAL> {
47+
static constexpr uint32_t ADDR_SPACE = 5;
48+
};
49+
template <> struct ASAddrSpace<AllocationKind::SHARED> {
50+
static constexpr uint32_t ADDR_SPACE = 3;
51+
};
52+
3853
template <AllocationKind AK> struct SanitizerConfig {
39-
static constexpr uint32_t ADDR_SPACE = AK == AllocationKind::GLOBAL ? 0 : 5;
54+
static constexpr uint32_t ADDR_SPACE = ASAddrSpace<AK>::ADDR_SPACE;
4055
static constexpr uint32_t ADDR_SPACE_PTR_SIZE =
4156
sizeof(typename ASTypes<AK>::INT_TY) * 8;
4257

4358
static constexpr uint32_t NUM_ALLOCATION_ARRAYS =
44-
AK == AllocationKind::GLOBAL ? 1 : (1024 * 1024 * 2);
45-
static constexpr uint32_t TAG_BITS = AK == AllocationKind::GLOBAL ? 1 : 8;
59+
AK == AllocationKind::LOCAL ? (1024 * 1024 * 2) : 1;
60+
static constexpr uint32_t TAG_BITS = AK == AllocationKind::LOCAL ? 8 : 1;
4661
static constexpr uint32_t MAGIC_BITS = 3;
4762
static constexpr uint32_t MAGIC = 0b101;
4863

4964
static constexpr uint32_t OBJECT_BITS = AK == AllocationKind::GLOBAL ? 10 : 7;
5065
static constexpr uint32_t SLOTS = (1 << (OBJECT_BITS));
51-
static constexpr uint32_t KIND_BITS = 1;
66+
static constexpr uint32_t KIND_BITS = FAKE_PTR_KIND_BITS;
5267
static constexpr uint32_t Id_BITS = 9 - KIND_BITS;
5368

5469
static constexpr uint32_t LENGTH_BITS =
@@ -104,6 +119,7 @@ template <AllocationKind AK> struct AllocationPtrTy {
104119
};
105120
#pragma omp begin declare variant match(device = {arch(amdgcn)})
106121
static_assert(sizeof(AllocationPtrTy<AllocationKind::LOCAL>) * 8 == 32);
122+
static_assert(sizeof(AllocationPtrTy<AllocationKind::SHARED>) * 8 == 32);
107123
#pragma omp end declare variant
108124

109125
union TypePunUnion {

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -678,8 +678,20 @@ struct GPUSanTy {
678678
void addGPUSanNewFn(GenericKernelTy &GK) { NewFns.push_back(&GK); }
679679
void addGPUSanFreeFn(GenericKernelTy &GK) { FreeFns.push_back(&GK); }
680680
void checkAndReportError();
681+
682+
bool hasSharedShadow(const char *GlobalName,
683+
SmallVector<DeviceImageTy *> &Images);
684+
681685
Error transferFakePtrToDevice(const char *GlobalName, void *FakeHstPtr,
682686
SmallVector<DeviceImageTy *> &Images);
687+
Error readFakePtrFromDevice(const char *GlobalName, void *&FakeHstPtr,
688+
SmallVector<DeviceImageTy *> &Images);
689+
690+
static std::string getShadowName(const char *GlobalName) {
691+
std::string ShadowName("__san.global.");
692+
ShadowName.append(GlobalName);
693+
return ShadowName;
694+
}
683695

684696
private:
685697
uint32_t SlotCnt = SanitizerConfig<AllocationKind::GLOBAL>::SLOTS - 1;
@@ -825,13 +837,20 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
825837
return PinnedAllocs.unlockUnmappedHostBuffer(HstPtr);
826838
}
827839

828-
/// Transfers a fake pointer to its respective shadow variable to prevent
829-
/// double initializing GPUSan shadow constants. Only runs if GPUSan is
830-
/// enabled
840+
/// Transfers a fake pointer to its respective shadow variable. Only run
841+
/// if the shadow variable was not read from the device already. Only runs
842+
/// if GPUSan is enabled
831843
Error transferFakePtrToDevice(const char *GlobalName, void *FakeHstPtr) {
832844
return GPUSan.transferFakePtrToDevice(GlobalName, FakeHstPtr, LoadedImages);
833845
}
834846

847+
/// Attempts to read a fake pointer to its respective shadow variable to
848+
/// prevent double initializing GPUSan shadow constants. FakeHstPtr will be
849+
/// null if no corresponding shadow global is found
850+
Error readFakePtrFromDevice(const char *GlobalName, void *&FakeHstPtr) {
851+
return GPUSan.readFakePtrFromDevice(GlobalName, FakeHstPtr, LoadedImages);
852+
}
853+
835854
/// Check whether the host buffer with address \p HstPtr is pinned by the
836855
/// underlying vendor-specific runtime (if any). Retrieve the host pointer,
837856
/// the device accessible pointer and the size of the original pinned buffer.

0 commit comments

Comments
 (0)