Skip to content

Commit 3ebaccb

Browse files
committed
Add intrinsic for dynamic shared memory
Shared memory is a memory region that is shared between all threads in a thread block/workgroup on GPUs. Dynamic shared memory is in that memory region, though the allocated size is specified late, when launching a kernel. Shared memory in amdgpu and nvptx lives in address space 3. Dynamic shared memory is implemented by creating an external global variable in address space 3. The global is declared with size 0, as the actual size is only known at runtime. It is defined behavior in LLVM to access an external global outside the defined size. As far as I know, there is no similar way to get the allocated size of dynamic shared memory on amdgpu an nvptx, so users have to pass this out-of-band or rely on target specific ways.
1 parent 94722ca commit 3ebaccb

File tree

13 files changed

+127
-6
lines changed

13 files changed

+127
-6
lines changed

compiler/rustc_abi/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1716,6 +1716,8 @@ pub struct AddressSpace(pub u32);
17161716
impl AddressSpace {
17171717
/// LLVM's `0` address space.
17181718
pub const ZERO: Self = AddressSpace(0);
1719+
/// The address space for shared memory on nvptx and amdgpu.
1720+
pub const SHARED: Self = AddressSpace(3);
17191721
}
17201722

17211723
/// The way we represent values to the backend

compiler/rustc_codegen_llvm/src/declare.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
use std::borrow::Borrow;
1515

1616
use itertools::Itertools;
17+
use rustc_abi::AddressSpace;
1718
use rustc_codegen_ssa::traits::TypeMembershipCodegenMethods;
1819
use rustc_data_structures::fx::FxIndexSet;
1920
use rustc_middle::ty::{Instance, Ty};
@@ -99,6 +100,28 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
99100
)
100101
}
101102
}
103+
104+
/// Declare a global value in a specific address space.
105+
///
106+
/// If there’s a value with the same name already declared, the function will
107+
/// return its Value instead.
108+
pub(crate) fn declare_global_in_addrspace(
109+
&self,
110+
name: &str,
111+
ty: &'ll Type,
112+
addr_space: AddressSpace,
113+
) -> &'ll Value {
114+
debug!("declare_global(name={name:?}, addrspace={addr_space:?})");
115+
unsafe {
116+
llvm::LLVMRustGetOrInsertGlobalInAddrspace(
117+
(**self).borrow().llmod,
118+
name.as_c_char_ptr(),
119+
name.len(),
120+
ty,
121+
addr_space.0,
122+
)
123+
}
124+
}
102125
}
103126

104127
impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {

compiler/rustc_codegen_llvm/src/intrinsic.rs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
use std::assert_matches::assert_matches;
22
use std::cmp::Ordering;
33

4-
use rustc_abi::{Align, BackendRepr, ExternAbi, Float, HasDataLayout, Primitive, Size};
4+
use rustc_abi::{
5+
AddressSpace, Align, BackendRepr, ExternAbi, Float, HasDataLayout, Primitive, Size,
6+
};
57
use rustc_codegen_ssa::base::{compare_simd_types, wants_msvc_seh, wants_wasm_eh};
68
use rustc_codegen_ssa::codegen_attrs::autodiff_attrs;
79
use rustc_codegen_ssa::common::{IntPredicate, TypeKind};
@@ -532,6 +534,22 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
532534
return Ok(());
533535
}
534536

537+
sym::dynamic_shared_memory => {
538+
let global = self.declare_global_in_addrspace(
539+
"dynamic_shared_memory",
540+
self.type_array(self.type_i8(), 0),
541+
AddressSpace::SHARED,
542+
);
543+
let ty::RawPtr(inner_ty, _) = result.layout.ty.kind() else { unreachable!() };
544+
let alignment = self.align_of(*inner_ty).bytes() as u32;
545+
unsafe {
546+
if alignment > llvm::LLVMGetAlignment(global) {
547+
llvm::LLVMSetAlignment(global, alignment);
548+
}
549+
}
550+
self.cx().const_pointercast(global, self.type_ptr())
551+
}
552+
535553
_ if name.as_str().starts_with("simd_") => {
536554
// Unpack non-power-of-2 #[repr(packed, simd)] arguments.
537555
// This gives them the expected layout of a regular #[repr(simd)] vector.

compiler/rustc_codegen_llvm/src/llvm/ffi.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1907,6 +1907,13 @@ unsafe extern "C" {
19071907
NameLen: size_t,
19081908
T: &'a Type,
19091909
) -> &'a Value;
1910+
pub(crate) fn LLVMRustGetOrInsertGlobalInAddrspace<'a>(
1911+
M: &'a Module,
1912+
Name: *const c_char,
1913+
NameLen: size_t,
1914+
T: &'a Type,
1915+
AddressSpace: c_uint,
1916+
) -> &'a Value;
19101917
pub(crate) fn LLVMRustInsertPrivateGlobal<'a>(M: &'a Module, T: &'a Type) -> &'a Value;
19111918
pub(crate) fn LLVMRustGetNamedValue(
19121919
M: &Module,

compiler/rustc_codegen_ssa/src/mir/intrinsic.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
110110
sym::abort
111111
| sym::unreachable
112112
| sym::cold_path
113+
| sym::dynamic_shared_memory
113114
| sym::breakpoint
114115
| sym::assert_zero_valid
115116
| sym::assert_mem_uninitialized_valid

compiler/rustc_hir_analysis/src/check/intrinsic.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ fn intrinsic_operation_unsafety(tcx: TyCtxt<'_>, intrinsic_id: LocalDefId) -> hi
7474
| sym::align_of
7575
| sym::needs_drop
7676
| sym::caller_location
77+
| sym::dynamic_shared_memory
7778
| sym::add_with_overflow
7879
| sym::sub_with_overflow
7980
| sym::mul_with_overflow
@@ -213,6 +214,7 @@ pub(crate) fn check_intrinsic_type(
213214
}
214215
sym::rustc_peek => (1, 0, vec![param(0)], param(0)),
215216
sym::caller_location => (0, 0, vec![], tcx.caller_location_ty()),
217+
sym::dynamic_shared_memory => (1, 0, vec![], Ty::new_mut_ptr(tcx, param(0))),
216218
sym::assert_inhabited | sym::assert_zero_valid | sym::assert_mem_uninitialized_valid => {
217219
(1, 0, vec![], tcx.types.unit)
218220
}

compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -209,10 +209,12 @@ extern "C" LLVMValueRef LLVMRustGetOrInsertFunction(LLVMModuleRef M,
209209
.getCallee());
210210
}
211211

212-
extern "C" LLVMValueRef LLVMRustGetOrInsertGlobal(LLVMModuleRef M,
213-
const char *Name,
214-
size_t NameLen,
215-
LLVMTypeRef Ty) {
212+
extern "C" LLVMValueRef
213+
LLVMRustGetOrInsertGlobalInAddrspace(LLVMModuleRef M,
214+
const char *Name,
215+
size_t NameLen,
216+
LLVMTypeRef Ty,
217+
unsigned AddressSpace) {
216218
Module *Mod = unwrap(M);
217219
auto NameRef = StringRef(Name, NameLen);
218220

@@ -223,10 +225,21 @@ extern "C" LLVMValueRef LLVMRustGetOrInsertGlobal(LLVMModuleRef M,
223225
GlobalVariable *GV = Mod->getGlobalVariable(NameRef, true);
224226
if (!GV)
225227
GV = new GlobalVariable(*Mod, unwrap(Ty), false,
226-
GlobalValue::ExternalLinkage, nullptr, NameRef);
228+
GlobalValue::ExternalLinkage, nullptr, NameRef,
229+
nullptr, GlobalValue::NotThreadLocal, AddressSpace);
227230
return wrap(GV);
228231
}
229232

233+
extern "C" LLVMValueRef LLVMRustGetOrInsertGlobal(LLVMModuleRef M,
234+
const char *Name,
235+
size_t NameLen,
236+
LLVMTypeRef Ty) {
237+
Module *Mod = unwrap(M);
238+
unsigned AddressSpace = Mod->getDataLayout().getDefaultGlobalsAddressSpace();
239+
return LLVMRustGetOrInsertGlobalInAddrspace(M, Name, NameLen, Ty,
240+
AddressSpace);
241+
}
242+
230243
extern "C" LLVMValueRef LLVMRustInsertPrivateGlobal(LLVMModuleRef M,
231244
LLVMTypeRef Ty) {
232245
return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false,

compiler/rustc_span/src/symbol.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -903,6 +903,7 @@ symbols! {
903903
dyn_star,
904904
dyn_trait,
905905
dynamic_no_pic: "dynamic-no-pic",
906+
dynamic_shared_memory,
906907
e,
907908
edition_panic,
908909
effective_target_features,

library/core/src/intrinsics/mod.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3238,6 +3238,23 @@ pub(crate) const fn miri_promise_symbolic_alignment(ptr: *const (), align: usize
32383238
)
32393239
}
32403240

3241+
/// Returns a pointer to dynamic shared memory.
3242+
///
3243+
/// The returned pointer is the start of the dynamic shared memory region.
3244+
/// All pointers returned by `dynamic_shared_memory` point to the same address,
3245+
/// so alias the same memory.
3246+
/// The returned pointer is aligned by at least the alignment of `T`.
3247+
///
3248+
/// # Other APIs
3249+
///
3250+
/// CUDA and HIP call this shared memory.
3251+
/// OpenCL and SYCL call this local memory.
3252+
#[rustc_intrinsic]
3253+
#[rustc_nounwind]
3254+
#[unstable(feature = "dynamic_shared_memory", issue = "135513")]
3255+
#[cfg(any(target_arch = "amdgpu", target_arch = "nvptx64"))]
3256+
pub fn dynamic_shared_memory<T: ?Sized>() -> *mut T;
3257+
32413258
/// Copies the current location of arglist `src` to the arglist `dst`.
32423259
///
32433260
/// FIXME: document safety requirements

src/bootstrap/src/core/build_steps/compile.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,14 @@ pub fn std_cargo(builder: &Builder<'_>, target: TargetSelection, cargo: &mut Car
668668
cargo.rustflag("-Cforce-unwind-tables=yes");
669669
}
670670

671+
// amdgcn must have a cpu specified, otherwise it refuses to compile.
672+
// We want to be able to run tests for amdgcn that depend on core, therefore
673+
// we need to be able to compiler core.
674+
// The cpu used here must match in tests that use the standard library.
675+
if target.contains("amdgcn") && target.file.is_none() {
676+
cargo.rustflag("-Ctarget-cpu=gfx900");
677+
}
678+
671679
// Enable frame pointers by default for the library. Note that they are still controlled by a
672680
// separate setting for the compiler.
673681
cargo.rustflag("-Zunstable-options");

0 commit comments

Comments
 (0)