Skip to content

Commit 64dc21c

Browse files
authored
Unrolled build for #145768
Rollup merge of #145768 - ZuseZ4:offload-device, r=oli-obk Offload device LLVM's offload functionality usually expects an extra dyn_ptr argument. We could avoid it,b ut likely gonna need it very soon in one of the follow-up PRs (e.g. to request shared memory). So we might as well already add it. This PR adds a %dyn_ptr ptr to GPUKernel ABI functions, if the offload feature is enabled. WIP r? ```@ghost```
2 parents c90bcb9 + 360b38c commit 64dc21c

File tree

12 files changed

+129
-2
lines changed

12 files changed

+129
-2
lines changed

compiler/rustc_codegen_llvm/src/back/lto.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -616,7 +616,8 @@ pub(crate) fn run_pass_manager(
616616
write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage);
617617
}
618618

619-
if enable_gpu && !thin {
619+
// Here we only handle the GPU host (=cpu) code.
620+
if enable_gpu && !thin && !cgcx.target_is_like_gpu {
620621
let cx =
621622
SimpleCx::new(module.module_llvm.llmod(), &module.module_llvm.llcx, cgcx.pointer_size);
622623
crate::builder::gpu_offload::handle_gpu_code(cgcx, &cx);

compiler/rustc_codegen_llvm/src/back/write.rs

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ use crate::errors::{
4343
use crate::llvm::diagnostic::OptimizationDiagnosticKind::*;
4444
use crate::llvm::{self, DiagnosticInfo};
4545
use crate::type_::llvm_type_ptr;
46-
use crate::{LlvmCodegenBackend, ModuleLlvm, base, common, llvm_util};
46+
use crate::{LlvmCodegenBackend, ModuleLlvm, SimpleCx, base, common, llvm_util};
4747

4848
pub(crate) fn llvm_err<'a>(dcx: DiagCtxtHandle<'_>, err: LlvmError<'a>) -> ! {
4949
match llvm::last_error() {
@@ -645,6 +645,74 @@ pub(crate) unsafe fn llvm_optimize(
645645
None
646646
};
647647

648+
fn handle_offload<'ll>(cx: &'ll SimpleCx<'_>, old_fn: &llvm::Value) {
649+
let old_fn_ty = cx.get_type_of_global(old_fn);
650+
let old_param_types = cx.func_params_types(old_fn_ty);
651+
let old_param_count = old_param_types.len();
652+
if old_param_count == 0 {
653+
return;
654+
}
655+
656+
let first_param = llvm::get_param(old_fn, 0);
657+
let c_name = llvm::get_value_name(first_param);
658+
let first_arg_name = str::from_utf8(&c_name).unwrap();
659+
// We might call llvm_optimize (and thus this code) multiple times on the same IR,
660+
// but we shouldn't add this helper ptr multiple times.
661+
// FIXME(offload): This could break if the user calls his first argument `dyn_ptr`.
662+
if first_arg_name == "dyn_ptr" {
663+
return;
664+
}
665+
666+
// Create the new parameter list, with ptr as the first argument
667+
let mut new_param_types = Vec::with_capacity(old_param_count as usize + 1);
668+
new_param_types.push(cx.type_ptr());
669+
new_param_types.extend(old_param_types);
670+
671+
// Create the new function type
672+
let ret_ty = unsafe { llvm::LLVMGetReturnType(old_fn_ty) };
673+
let new_fn_ty = cx.type_func(&new_param_types, ret_ty);
674+
675+
// Create the new function, with a temporary .offload name to avoid a name collision.
676+
let old_fn_name = String::from_utf8(llvm::get_value_name(old_fn)).unwrap();
677+
let new_fn_name = format!("{}.offload", &old_fn_name);
678+
let new_fn = cx.add_func(&new_fn_name, new_fn_ty);
679+
let a0 = llvm::get_param(new_fn, 0);
680+
llvm::set_value_name(a0, CString::new("dyn_ptr").unwrap().as_bytes());
681+
682+
// Here we map the old arguments to the new arguments, with an offset of 1 to make sure
683+
// that we don't use the newly added `%dyn_ptr`.
684+
unsafe {
685+
llvm::LLVMRustOffloadMapper(cx.llmod(), old_fn, new_fn);
686+
}
687+
688+
llvm::set_linkage(new_fn, llvm::get_linkage(old_fn));
689+
llvm::set_visibility(new_fn, llvm::get_visibility(old_fn));
690+
691+
// Replace all uses of old_fn with new_fn (RAUW)
692+
unsafe {
693+
llvm::LLVMReplaceAllUsesWith(old_fn, new_fn);
694+
}
695+
let name = llvm::get_value_name(old_fn);
696+
unsafe {
697+
llvm::LLVMDeleteFunction(old_fn);
698+
}
699+
// Now we can re-use the old name, without name collision.
700+
llvm::set_value_name(new_fn, &name);
701+
}
702+
703+
if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Enable) {
704+
let cx =
705+
SimpleCx::new(module.module_llvm.llmod(), module.module_llvm.llcx, cgcx.pointer_size);
706+
// For now we only support up to 10 kernels named kernel_0 ... kernel_9, a follow-up PR is
707+
// introducing a proper offload intrinsic to solve this limitation.
708+
for num in 0..9 {
709+
let name = format!("kernel_{num}");
710+
if let Some(kernel) = cx.get_function(&name) {
711+
handle_offload(&cx, kernel);
712+
}
713+
}
714+
}
715+
648716
let mut llvm_profiler = cgcx
649717
.prof
650718
.llvm_recording_enabled()

compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ pub(crate) fn handle_gpu_code<'ll>(
1919
let mut memtransfer_types = vec![];
2020
let mut region_ids = vec![];
2121
let offload_entry_ty = TgtOffloadEntry::new_decl(&cx);
22+
// This is a temporary hack, we only search for kernel_0 to kernel_9 functions.
23+
// There is a draft PR in progress which will introduce a proper offload intrinsic to remove
24+
// this limitation.
2225
for num in 0..9 {
2326
let kernel = cx.get_function(&format!("kernel_{num}"));
2427
if let Some(kernel) = kernel {

compiler/rustc_codegen_llvm/src/llvm/ffi.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1127,6 +1127,11 @@ unsafe extern "C" {
11271127

11281128
// Operations on functions
11291129
pub(crate) fn LLVMSetFunctionCallConv(Fn: &Value, CC: c_uint);
1130+
pub(crate) fn LLVMAddFunction<'a>(
1131+
Mod: &'a Module,
1132+
Name: *const c_char,
1133+
FunctionTy: &'a Type,
1134+
) -> &'a Value;
11301135
pub(crate) fn LLVMDeleteFunction(Fn: &Value);
11311136

11321137
// Operations about llvm intrinsics
@@ -2017,6 +2022,7 @@ unsafe extern "C" {
20172022
) -> &Attribute;
20182023

20192024
// Operations on functions
2025+
pub(crate) fn LLVMRustOffloadMapper<'a>(M: &'a Module, Fn: &'a Value, Fn: &'a Value);
20202026
pub(crate) fn LLVMRustGetOrInsertFunction<'a>(
20212027
M: &'a Module,
20222028
Name: *const c_char,

compiler/rustc_codegen_llvm/src/type_.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
6868
unsafe { llvm::LLVMVectorType(ty, len as c_uint) }
6969
}
7070

71+
pub(crate) fn add_func(&self, name: &str, ty: &'ll Type) -> &'ll Value {
72+
let name = SmallCStr::new(name);
73+
unsafe { llvm::LLVMAddFunction(self.llmod(), name.as_ptr(), ty) }
74+
}
75+
7176
pub(crate) fn func_params_types(&self, ty: &'ll Type) -> Vec<&'ll Type> {
7277
unsafe {
7378
let n_args = llvm::LLVMCountParamTypes(ty) as usize;

compiler/rustc_codegen_ssa/src/back/write.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ pub struct CodegenContext<B: WriteBackendMethods> {
342342
pub target_arch: String,
343343
pub target_is_like_darwin: bool,
344344
pub target_is_like_aix: bool,
345+
pub target_is_like_gpu: bool,
345346
pub split_debuginfo: rustc_target::spec::SplitDebuginfo,
346347
pub split_dwarf_kind: rustc_session::config::SplitDwarfKind,
347348
pub pointer_size: Size,
@@ -1309,6 +1310,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
13091310
target_arch: tcx.sess.target.arch.to_string(),
13101311
target_is_like_darwin: tcx.sess.target.is_like_darwin,
13111312
target_is_like_aix: tcx.sess.target.is_like_aix,
1313+
target_is_like_gpu: tcx.sess.target.is_like_gpu,
13121314
split_debuginfo: tcx.sess.split_debuginfo(),
13131315
split_dwarf_kind: tcx.sess.opts.unstable_opts.split_dwarf_kind,
13141316
parallel: backend.supports_parallel() && !sess.opts.unstable_opts.no_parallel_backend,

compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
#include "llvm/Support/Signals.h"
3636
#include "llvm/Support/Timer.h"
3737
#include "llvm/Support/ToolOutputFile.h"
38+
#include "llvm/Transforms/Utils/Cloning.h"
39+
#include "llvm/Transforms/Utils/ValueMapper.h"
3840
#include <iostream>
3941

4042
// for raw `write` in the bad-alloc handler
@@ -142,6 +144,28 @@ extern "C" void LLVMRustPrintStatistics(RustStringRef OutBuf) {
142144
llvm::PrintStatistics(OS);
143145
}
144146

147+
extern "C" void LLVMRustOffloadMapper(LLVMModuleRef M, LLVMValueRef OldFn,
148+
LLVMValueRef NewFn) {
149+
llvm::Module *module = llvm::unwrap(M);
150+
llvm::Function *oldFn = llvm::unwrap<llvm::Function>(OldFn);
151+
llvm::Function *newFn = llvm::unwrap<llvm::Function>(NewFn);
152+
153+
// Map old arguments to new arguments. We skip the first dyn_ptr argument,
154+
// since it can't be used directly by user code.
155+
llvm::ValueToValueMapTy vmap;
156+
auto newArgIt = newFn->arg_begin();
157+
newArgIt->setName("dyn_ptr");
158+
++newArgIt; // skip %dyn_ptr
159+
for (auto &oldArg : oldFn->args()) {
160+
vmap[&oldArg] = &*newArgIt++;
161+
}
162+
163+
llvm::SmallVector<llvm::ReturnInst *, 8> returns;
164+
llvm::CloneFunctionInto(newFn, oldFn, vmap,
165+
llvm::CloneFunctionChangeType::LocalChangesOnly,
166+
returns);
167+
}
168+
145169
extern "C" LLVMValueRef LLVMRustGetNamedValue(LLVMModuleRef M, const char *Name,
146170
size_t NameLen) {
147171
return wrap(unwrap(M)->getNamedValue(StringRef(Name, NameLen)));

compiler/rustc_target/src/callconv/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,7 @@ impl RiscvInterruptKind {
578578
///
579579
/// The signature represented by this type may not match the MIR function signature.
580580
/// Certain attributes, like `#[track_caller]` can introduce additional arguments, which are present in [`FnAbi`], but not in `FnSig`.
581+
/// The std::offload module also adds an addition dyn_ptr argument to the GpuKernel ABI.
581582
/// While this difference is rarely relevant, it should still be kept in mind.
582583
///
583584
/// I will do my best to describe this structure, but these

compiler/rustc_target/src/spec/json.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ impl Target {
147147
forward!(is_like_darwin);
148148
forward!(is_like_solaris);
149149
forward!(is_like_windows);
150+
forward!(is_like_gpu);
150151
forward!(is_like_msvc);
151152
forward!(is_like_wasm);
152153
forward!(is_like_android);
@@ -342,6 +343,7 @@ impl ToJson for Target {
342343
target_option_val!(is_like_darwin);
343344
target_option_val!(is_like_solaris);
344345
target_option_val!(is_like_windows);
346+
target_option_val!(is_like_gpu);
345347
target_option_val!(is_like_msvc);
346348
target_option_val!(is_like_wasm);
347349
target_option_val!(is_like_android);
@@ -562,6 +564,7 @@ struct TargetSpecJson {
562564
is_like_darwin: Option<bool>,
563565
is_like_solaris: Option<bool>,
564566
is_like_windows: Option<bool>,
567+
is_like_gpu: Option<bool>,
565568
is_like_msvc: Option<bool>,
566569
is_like_wasm: Option<bool>,
567570
is_like_android: Option<bool>,

compiler/rustc_target/src/spec/mod.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2180,6 +2180,8 @@ pub struct TargetOptions {
21802180
/// Also indicates whether to use Apple-specific ABI changes, such as extending function
21812181
/// parameters to 32-bits.
21822182
pub is_like_darwin: bool,
2183+
/// Whether the target is a GPU (e.g. NVIDIA, AMD, Intel).
2184+
pub is_like_gpu: bool,
21832185
/// Whether the target toolchain is like Solaris's.
21842186
/// Only useful for compiling against Illumos/Solaris,
21852187
/// as they have a different set of linker flags. Defaults to false.
@@ -2590,6 +2592,7 @@ impl Default for TargetOptions {
25902592
abi_return_struct_as_int: false,
25912593
is_like_aix: false,
25922594
is_like_darwin: false,
2595+
is_like_gpu: false,
25932596
is_like_solaris: false,
25942597
is_like_windows: false,
25952598
is_like_msvc: false,
@@ -2756,6 +2759,11 @@ impl Target {
27562759
self.os == "solaris" || self.os == "illumos",
27572760
"`is_like_solaris` must be set if and only if `os` is `solaris` or `illumos`"
27582761
);
2762+
check_eq!(
2763+
self.is_like_gpu,
2764+
self.arch == Arch::Nvptx64 || self.arch == Arch::AmdGpu,
2765+
"`is_like_gpu` must be set if and only if `target` is `nvptx64` or `amdgcn`"
2766+
);
27592767
check_eq!(
27602768
self.is_like_windows,
27612769
self.os == "windows" || self.os == "uefi" || self.os == "cygwin",

0 commit comments

Comments
 (0)