Skip to content

Commit 85f5383

Browse files
committed
AMDGPU: Do not infer implicit inputs for !nocallback intrinsics
This isn't really the right check, we want to know that the intrinsic does not perform a true function call to any code (in the module or not). nocallback appears to be the closest thing to this property we have now though. Also do the same for inferring no-agpr usage.
1 parent 1646797 commit 85f5383

File tree

3 files changed

+101
-9
lines changed

3 files changed

+101
-9
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,10 @@ enum ImplicitArgumentPositions {
3838
#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
3939

4040
enum ImplicitArgumentMask {
41-
NOT_IMPLICIT_INPUT = 0,
41+
UNKNOWN_INTRINSIC = 0,
4242
#include "AMDGPUAttributes.def"
43-
ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
43+
ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1,
44+
NOT_IMPLICIT_INPUT
4445
};
4546

4647
#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
@@ -115,7 +116,7 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
115116
NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
116117
return QUEUE_PTR;
117118
default:
118-
return NOT_IMPLICIT_INPUT;
119+
return UNKNOWN_INTRINSIC;
119120
}
120121
}
121122

@@ -534,6 +535,21 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
534535
ImplicitArgumentMask AttrMask =
535536
intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
536537
HasApertureRegs, SupportsGetDoorbellID, COV);
538+
539+
if (AttrMask == UNKNOWN_INTRINSIC) {
540+
// Assume not-nocallback intrinsics may invoke a function which accesses
541+
// implicit arguments.
542+
//
543+
// FIXME: This isn't really the correct check. We want to ensure it
544+
// isn't calling any function that may use implicit arguments regardless
545+
// of whether it's internal to the module or not.
546+
//
547+
// TODO: Ignoring callsite attributes.
548+
if (!Callee->hasFnAttribute(Attribute::NoCallback))
549+
return indicatePessimisticFixpoint();
550+
continue;
551+
}
552+
537553
if (AttrMask != NOT_IMPLICIT_INPUT) {
538554
if ((IsNonEntryFunc || !NonKernelOnly))
539555
removeAssumedBits(AttrMask);
@@ -1357,7 +1373,10 @@ struct AAAMDGPUMinAGPRAlloc
13571373
default:
13581374
// Some intrinsics may use AGPRs, but if we have a choice, we are not
13591375
// required to use AGPRs.
1360-
return true;
1376+
1377+
// Assume !nocallback intrinsics may call a function which requires
1378+
// AGPRs.
1379+
return CB.hasFnAttr(Attribute::NoCallback);
13611380
}
13621381

13631382
// TODO: Handle callsite attributes

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ declare zeroext i32 @return_i32()
88

99
define i32 @test_i32_return() gc "statepoint-example" {
1010
; CHECK-LABEL: define i32 @test_i32_return(
11-
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] gc "statepoint-example" {
11+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] gc "statepoint-example" {
1212
; CHECK-NEXT: [[ENTRY:.*:]]
1313
; CHECK-NEXT: [[SAFEPOINT_TOKEN:%.*]] = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0)
1414
; CHECK-NEXT: [[CALL1:%.*]] = call zeroext i32 @llvm.experimental.gc.result.i32(token [[SAFEPOINT_TOKEN]])
@@ -25,8 +25,7 @@ declare i32 @llvm.experimental.gc.result.i32(token) #0
2525

2626
attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
2727
;.
28-
; CHECK: attributes #[[ATTR0:[0-9]+]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
29-
; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
30-
; CHECK: attributes #[[ATTR2:[0-9]+]] = { "target-cpu"="gfx90a" }
31-
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
28+
; CHECK: attributes #[[ATTR0]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
29+
; CHECK: attributes #[[ATTR1:[0-9]+]] = { "target-cpu"="gfx90a" }
30+
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
3231
;.
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
2+
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-attributor -mcpu=gfx90a %s | FileCheck %s
3+
4+
; Make sure we infer no inputs are used through some intrinsics
5+
6+
define void @use_fake_use(i32 %arg) {
7+
; CHECK-LABEL: define void @use_fake_use(
8+
; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
9+
; CHECK-NEXT: call void (...) @llvm.fake.use(i32 [[ARG]])
10+
; CHECK-NEXT: ret void
11+
;
12+
call void (...) @llvm.fake.use(i32 %arg)
13+
ret void
14+
}
15+
16+
define void @use_donothing() {
17+
; CHECK-LABEL: define void @use_donothing(
18+
; CHECK-SAME: ) #[[ATTR0]] {
19+
; CHECK-NEXT: call void @llvm.donothing()
20+
; CHECK-NEXT: ret void
21+
;
22+
call void @llvm.donothing()
23+
ret void
24+
}
25+
26+
define void @use_assume(i1 %arg) {
27+
; CHECK-LABEL: define void @use_assume(
28+
; CHECK-SAME: i1 [[ARG:%.*]]) #[[ATTR0]] {
29+
; CHECK-NEXT: call void @llvm.assume(i1 [[ARG]])
30+
; CHECK-NEXT: ret void
31+
;
32+
call void @llvm.assume(i1 %arg)
33+
ret void
34+
}
35+
36+
define void @use_trap() {
37+
; CHECK-LABEL: define void @use_trap(
38+
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
39+
; CHECK-NEXT: call void @llvm.trap()
40+
; CHECK-NEXT: ret void
41+
;
42+
call void @llvm.trap()
43+
ret void
44+
}
45+
46+
define void @use_debugtrap() {
47+
; CHECK-LABEL: define void @use_debugtrap(
48+
; CHECK-SAME: ) #[[ATTR1]] {
49+
; CHECK-NEXT: call void @llvm.debugtrap()
50+
; CHECK-NEXT: ret void
51+
;
52+
call void @llvm.debugtrap()
53+
ret void
54+
}
55+
56+
define void @use_ubsantrap() {
57+
; CHECK-LABEL: define void @use_ubsantrap(
58+
; CHECK-SAME: ) #[[ATTR1]] {
59+
; CHECK-NEXT: call void @llvm.ubsantrap(i8 0)
60+
; CHECK-NEXT: ret void
61+
;
62+
call void @llvm.ubsantrap(i8 0)
63+
ret void
64+
}
65+
66+
;.
67+
; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
68+
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
69+
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
70+
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
71+
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
72+
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) "target-cpu"="gfx90a" }
73+
; CHECK: attributes #[[ATTR6:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
74+
;.

0 commit comments

Comments
 (0)