-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[clang][CodeGen] cast addr space of ReturnValue if needed #154380
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[clang][CodeGen] cast addr space of ReturnValue if needed #154380
Conversation
|
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-clang-codegen Author: None (macurtis-amd) ChangesFixes a bug on AMDGPU targets where a pointer was stored as address space 5, but then loaded as address space 0. Issue found as part of Kokkos testing, specifically Issue was introduced by commit 39ec9de7c230 - [clang][CodeGen] sret args should always point to the alloca AS, so use that (#114062). Full diff: https://github.com/llvm/llvm-project/pull/154380.diff 3 Files Affected:
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index d229d81d6b934..c02e84eb753e9 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2209,6 +2209,18 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
}
}
+ // When storing a pointer, perform address space cast if needed.
+ if (auto *ValueTy = dyn_cast<llvm::PointerType>(Value->getType())) {
+ if (auto *MemTy = dyn_cast<llvm::PointerType>(Addr.getElementType())) {
+ LangAS ValueAS = getLangASFromTargetAS(ValueTy->getAddressSpace());
+ LangAS MemAS = getLangASFromTargetAS(MemTy->getAddressSpace());
+ if (ValueAS != MemAS) {
+ Value =
+ getTargetHooks().performAddrSpaceCast(*this, Value, ValueAS, MemTy);
+ }
+ }
+ }
+
Value = EmitToMemory(Value, Ty);
LValue AtomicLValue =
diff --git a/clang/test/CodeGenCXX/amdgcn-func-arg.cpp b/clang/test/CodeGenCXX/amdgcn-func-arg.cpp
index a5f83dc91b038..21945bfc36677 100644
--- a/clang/test/CodeGenCXX/amdgcn-func-arg.cpp
+++ b/clang/test/CodeGenCXX/amdgcn-func-arg.cpp
@@ -24,9 +24,10 @@ void func_with_ref_arg(B &b);
// CHECK-NEXT: [[P:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[A_INDIRECT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_INDIRECT_ADDR]] to ptr
// CHECK-NEXT: [[P_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P]] to ptr
-// CHECK-NEXT: store ptr addrspace(5) [[A:%.*]], ptr [[A_INDIRECT_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
-// CHECK-NEXT: store ptr [[A_ASCAST]], ptr [[P_ASCAST]], align 8
+// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A:%.*]] to ptr
+// CHECK-NEXT: store ptr [[A_ASCAST]], ptr [[A_INDIRECT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[A_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
+// CHECK-NEXT: store ptr [[A_ASCAST1]], ptr [[P_ASCAST]], align 8
// CHECK-NEXT: ret void
//
void func_with_indirect_arg(A a) {
diff --git a/clang/test/CodeGenHIP/store-addr-space.hip b/clang/test/CodeGenHIP/store-addr-space.hip
new file mode 100644
index 0000000000000..46ab1157d0704
--- /dev/null
+++ b/clang/test/CodeGenHIP/store-addr-space.hip
@@ -0,0 +1,47 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --functions "bar" --version 5
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -fcuda-is-device \
+// RUN: -o - %s | FileCheck --check-prefix=AMDGCN --enable-var-scope %s
+
+struct Foo {
+ unsigned long long val;
+//
+ __attribute__((device)) inline Foo() { val = 0; }
+ __attribute__((device)) inline Foo(const Foo &src) { val = src.val; }
+ __attribute__((device)) inline Foo(const volatile Foo &src) { val = src.val; }
+};
+
+// AMDGCN-LABEL: define dso_local void @_Z3barPK3Foo(
+// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[SRC_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+// AMDGCN-NEXT: [[ENTRY:.*:]]
+// AMDGCN-NEXT: [[RESULT_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// AMDGCN-NEXT: [[SRC_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGCN-NEXT: [[DST:%.*]] = alloca [[UNION_ANON:%.*]], align 8, addrspace(5)
+// AMDGCN-NEXT: [[RESULT_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_PTR]] to ptr
+// AMDGCN-NEXT: [[SRC_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_PTR_ADDR]] to ptr
+// AMDGCN-NEXT: [[DST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DST]] to ptr
+// AMDGCN-NEXT: store ptr addrspace(5) [[AGG_RESULT]], ptr [[RESULT_PTR_ASCAST]], align 4
+// AMDGCN-NEXT: store ptr [[SRC_PTR]], ptr [[SRC_PTR_ADDR_ASCAST]], align 8
+// AMDGCN-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
+// AMDGCN-NEXT: call void @_ZN3FooC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[AGG_RESULT_ASCAST]]) #[[ATTR1:[0-9]+]]
+// AMDGCN-NEXT: [[AGG_RESULT_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
+// AMDGCN-NEXT: store ptr [[AGG_RESULT_ASCAST1]], ptr [[DST_ASCAST]], align 8
+// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SRC_PTR_ADDR_ASCAST]], align 8
+// AMDGCN-NEXT: [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[TMP0]], i32 0, i32 0
+// AMDGCN-NEXT: [[TMP1:%.*]] = load i64, ptr [[VAL]], align 8
+// AMDGCN-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DST_ASCAST]], align 8
+// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 0
+// AMDGCN-NEXT: store i64 [[TMP1]], ptr [[ARRAYIDX]], align 8
+// AMDGCN-NEXT: ret void
+//
+__attribute__((device)) Foo bar(const Foo *const src_ptr) {
+ Foo result;
+
+ union {
+ Foo* const ptr;
+ unsigned long long * const ptr64;
+ } dst = {&result};
+
+ dst.ptr64[0] = src_ptr->val;
+ return result;
+}
|
clang/lib/CodeGen/CGExpr.cpp
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems like the wrong place to do this: if the input Value has the wrong type, something has already gone wrong.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@efriedma-quic Thanks for looking at this. Did not realize that the types are expected to match at this point.
Would just before to the call to EmitStoreThroughLValue in CodeGenFunction::EmitScalarInit be an appropriate place to do the cast if needed?
void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
LValue lvalue, bool capturedByInit) {
Qualifiers::ObjCLifetime lifetime = lvalue.getObjCLifetime();
if (!lifetime) {
llvm::Value *Value;
if (PointerAuthQualifier PtrAuth = lvalue.getQuals().getPointerAuth()) {
Value = EmitPointerAuthQualify(PtrAuth, init, lvalue.getAddress());
lvalue.getQuals().removePointerAuth();
} else {
Value = EmitScalarExpr(init);
}
if (capturedByInit)
drillIntoBlockVariable(*this, lvalue, cast<VarDecl>(D));
EmitNullabilityCheck(lvalue, Value, init->getExprLoc());
EmitStoreThroughLValue(RValue::get(Value), lvalue, true);
return;
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Types in IRGen should match types in the AST. So if an lvalue is in address-space zero, the generated code for the "LValue" should be in address-space zero. If the "&" operator in the AST returns a pointer in address-space zero, then codegen for that operator should also be in address-space zero. By the time we get to EmitScalarInit, it's way too late to try to fix the type.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the explanation.
I actually did some more investigation and determined that the mismatched address space originates in the the NVRO branch of EmitAutoVarAlloca.
While AllocaAddr is ok in addrspace(5), address is really expected to be in the default address space and the NVRO branch needs something like the code here.
Does this sound right to you?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, that sounds right.
1af9c61 to
d82e5d4
Compare
efriedma-quic
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with one minor comment.
Fixes a bug on AMDGPU targets where a pointer was stored as address space 5, but then loaded as address space 0.
Issue found as part of Kokkos testing, specifically
hip.atomics(see core/unit_test/TestAtomics.hpp).Issue was introduced by commit 39ec9de7c230 - [clang][CodeGen] sret args should always point to the alloca AS, so use that (#114062).