Skip to content

Commit 76cd2ea

Browse files
committed
[AMDGPU] Introduce GFX9/10.1/10.3/11 Generic Targets
These generic targets include multiple GPUs and will, in the future, provide a way to build once and run on multiple GPU, at the cost of less optimization opportunities. Note that this is just doing the compiler side of things, device libs an runtimes/loader/etc. don't know about these targets yet, so none of them actually work in practice right now. This is just the initial commit to make LLVM aware of them. No docs in this patch either as I plan to do it all in a follow-up patch.
1 parent 6368d82 commit 76cd2ea

File tree

27 files changed

+384
-115
lines changed

27 files changed

+384
-115
lines changed

clang/lib/Basic/Targets/AMDGPU.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -275,13 +275,25 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
275275
Builder.defineMacro("__R600__");
276276

277277
if (GPUKind != llvm::AMDGPU::GK_NONE) {
278-
StringRef CanonName = isAMDGCN(getTriple()) ?
279-
getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
278+
std::string CanonName = (isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind)
279+
: getArchNameR600(GPUKind))
280+
.str();
281+
282+
// Sanitize the name of generic targets.
283+
// e.g. gfx10.1-generic -> gfx10_1_generic
284+
if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST &&
285+
GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) {
286+
std::replace(CanonName.begin(), CanonName.end(), '.', '_');
287+
std::replace(CanonName.begin(), CanonName.end(), '-', '_');
288+
}
289+
280290
Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
281291
// Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
282292
if (isAMDGCN(getTriple())) {
283-
assert(CanonName.starts_with("gfx") && "Invalid amdgcn canonical name");
284-
Builder.defineMacro(Twine("__") + Twine(CanonName.drop_back(2).upper()) +
293+
assert(StringRef(CanonName).starts_with("gfx") &&
294+
"Invalid amdgcn canonical name");
295+
StringRef CanonFamilyName = getArchFamilyNameAMDGCN(GPUKind);
296+
Builder.defineMacro(Twine("__") + Twine(CanonFamilyName.upper()) +
285297
Twine("__"));
286298
}
287299
if (isAMDGCN(getTriple())) {

clang/test/Driver/amdgpu-macros.cl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,11 @@
131131
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1200 -DFAMILY=GFX12
132132
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1201 -DFAMILY=GFX12
133133

134+
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx9_generic -DFAMILY=GFX9
135+
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10.1-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_1_generic -DFAMILY=GFX10
136+
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10.3-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_3_generic -DFAMILY=GFX10
137+
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx11_generic -DFAMILY=GFX11
138+
134139
// ARCH-GCN-DAG: #define FP_FAST_FMA 1
135140

136141
// FAST_FMAF-DAG: #define FP_FAST_FMAF 1

clang/test/Driver/amdgpu-mcpu.cl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,11 @@
115115
// RUN: %clang -### -target amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX1200 %s
116116
// RUN: %clang -### -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefix=GFX1201 %s
117117

118+
// RUN: %clang -### -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefix=GFX9_GENERIC %s
119+
// RUN: %clang -### -target amdgcn -mcpu=gfx10.1-generic %s 2>&1 | FileCheck --check-prefix=GFX10_1_GENERIC %s
120+
// RUN: %clang -### -target amdgcn -mcpu=gfx10.3-generic %s 2>&1 | FileCheck --check-prefix=GFX10_3_GENERIC %s
121+
// RUN: %clang -### -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefix=GFX11_GENERIC %s
122+
118123
// GCNDEFAULT-NOT: -target-cpu
119124
// GFX600: "-target-cpu" "gfx600"
120125
// GFX601: "-target-cpu" "gfx601"
@@ -160,3 +165,8 @@
160165
// GFX1151: "-target-cpu" "gfx1151"
161166
// GFX1200: "-target-cpu" "gfx1200"
162167
// GFX1201: "-target-cpu" "gfx1201"
168+
169+
// GFX9_GENERIC: "-target-cpu" "gfx9-generic"
170+
// GFX10_1_GENERIC: "-target-cpu" "gfx10.1-generic"
171+
// GFX10_3_GENERIC: "-target-cpu" "gfx10.3-generic"
172+
// GFX11_GENERIC: "-target-cpu" "gfx11-generic"

llvm/include/llvm/BinaryFormat/ELF.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -788,11 +788,15 @@ enum : unsigned {
788788
EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c,
789789
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d,
790790
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
791+
EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x04f,
792+
EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x050,
793+
EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC = 0x051,
794+
EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC = 0x052,
791795
// clang-format on
792796

793797
// First/last AMDGCN-based processors.
794798
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
795-
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1201,
799+
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC,
796800

797801
// Indicates if the "xnack" target feature is enabled for all code contained
798802
// in the object.

llvm/include/llvm/TargetParser/TargetParser.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,14 @@ enum GPUKind : uint32_t {
111111

112112
GK_AMDGCN_FIRST = GK_GFX600,
113113
GK_AMDGCN_LAST = GK_GFX1201,
114+
115+
GK_GFX9_GENERIC = 192,
116+
GK_GFX10_1_GENERIC = 193,
117+
GK_GFX10_3_GENERIC = 194,
118+
GK_GFX11_GENERIC = 195,
119+
120+
GK_AMDGCN_GENERIC_FIRST = GK_GFX9_GENERIC,
121+
GK_AMDGCN_GENERIC_LAST = GK_GFX11_GENERIC,
114122
};
115123

116124
/// Instruction set architecture version.
@@ -147,6 +155,8 @@ enum ArchFeatureKind : uint32_t {
147155
FEATURE_WGP = 1 << 9,
148156
};
149157

158+
StringRef getArchFamilyNameAMDGCN(GPUKind AK);
159+
150160
StringRef getArchNameAMDGCN(GPUKind AK);
151161
StringRef getArchNameR600(GPUKind AK);
152162
StringRef getCanonicalArchName(const Triple &T, StringRef Arch);

llvm/lib/Object/ELFObjectFile.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,16 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
514514
return "gfx1200";
515515
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201:
516516
return "gfx1201";
517+
518+
// Generic AMDGCN targets
519+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC:
520+
return "gfx9-generic";
521+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC:
522+
return "gfx10.1-generic";
523+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC:
524+
return "gfx10.3-generic";
525+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC:
526+
return "gfx11-generic";
517527
default:
518528
llvm_unreachable("Unknown EF_AMDGPU_MACH value");
519529
}

llvm/lib/ObjectYAML/ELFYAML.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,10 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
612612
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1151, EF_AMDGPU_MACH);
613613
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1200, EF_AMDGPU_MACH);
614614
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1201, EF_AMDGPU_MACH);
615+
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, EF_AMDGPU_MACH);
616+
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, EF_AMDGPU_MACH);
617+
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, EF_AMDGPU_MACH);
618+
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, EF_AMDGPU_MACH);
615619
switch (Object->Header.ABIVersion) {
616620
default:
617621
// ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags.

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 43 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -984,6 +984,12 @@ def FeatureGWS : SubtargetFeature<"gws",
984984
"Has Global Wave Sync"
985985
>;
986986

987+
def FeatureRequiresCOV6 : SubtargetFeature<"requires-cov6",
988+
"RequiresCOV6",
989+
"true",
990+
"Target Requires Code Object V6"
991+
>;
992+
987993
// Dummy feature used to disable assembler instructions.
988994
def FeatureDisable : SubtargetFeature<"",
989995
"FeatureDisable","true",
@@ -1193,6 +1199,17 @@ def FeatureISAVersion9_0_Common : FeatureSet<
11931199
FeatureImageInsts,
11941200
FeatureMadMacF32Insts]>;
11951201

1202+
def FeatureISAVersion9_0_Consumer_Common : FeatureSet<
1203+
!listconcat(FeatureISAVersion9_0_Common.Features,
1204+
[FeatureImageGather4D16Bug,
1205+
FeatureDsSrc2Insts,
1206+
FeatureExtendedImageInsts,
1207+
FeatureGDS])>;
1208+
1209+
def FeatureISAVersion9_Generic : FeatureSet<
1210+
!listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
1211+
[FeatureRequiresCOV6])>;
1212+
11961213
def FeatureISAVersion9_0_MI_Common : FeatureSet<
11971214
!listconcat(FeatureISAVersion9_0_Common.Features,
11981215
[FeatureFmaMixInsts,
@@ -1211,43 +1228,27 @@ def FeatureISAVersion9_0_MI_Common : FeatureSet<
12111228
FeatureSupportsSRAMECC])>;
12121229

12131230
def FeatureISAVersion9_0_0 : FeatureSet<
1214-
!listconcat(FeatureISAVersion9_0_Common.Features,
1215-
[FeatureGDS,
1216-
FeatureMadMixInsts,
1217-
FeatureDsSrc2Insts,
1218-
FeatureExtendedImageInsts,
1219-
FeatureImageGather4D16Bug])>;
1231+
!listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
1232+
[FeatureMadMixInsts])>;
12201233

12211234
def FeatureISAVersion9_0_2 : FeatureSet<
1222-
!listconcat(FeatureISAVersion9_0_Common.Features,
1223-
[FeatureGDS,
1224-
FeatureMadMixInsts,
1225-
FeatureDsSrc2Insts,
1226-
FeatureExtendedImageInsts,
1227-
FeatureImageGather4D16Bug])>;
1235+
!listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
1236+
[FeatureMadMixInsts])>;
12281237

12291238
def FeatureISAVersion9_0_4 : FeatureSet<
1230-
!listconcat(FeatureISAVersion9_0_Common.Features,
1231-
[FeatureGDS,
1232-
FeatureDsSrc2Insts,
1233-
FeatureExtendedImageInsts,
1234-
FeatureFmaMixInsts,
1235-
FeatureImageGather4D16Bug])>;
1239+
!listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
1240+
[FeatureFmaMixInsts])>;
12361241

12371242
def FeatureISAVersion9_0_6 : FeatureSet<
1238-
!listconcat(FeatureISAVersion9_0_Common.Features,
1239-
[FeatureGDS,
1240-
HalfRate64Ops,
1243+
!listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
1244+
[HalfRate64Ops,
12411245
FeatureFmaMixInsts,
1242-
FeatureDsSrc2Insts,
1243-
FeatureExtendedImageInsts,
12441246
FeatureDLInsts,
12451247
FeatureDot1Insts,
12461248
FeatureDot2Insts,
12471249
FeatureDot7Insts,
12481250
FeatureDot10Insts,
1249-
FeatureSupportsSRAMECC,
1250-
FeatureImageGather4D16Bug])>;
1251+
FeatureSupportsSRAMECC])>;
12511252

12521253
def FeatureISAVersion9_0_8 : FeatureSet<
12531254
!listconcat(FeatureISAVersion9_0_MI_Common.Features,
@@ -1260,13 +1261,9 @@ def FeatureISAVersion9_0_8 : FeatureSet<
12601261
FeatureImageGather4D16Bug])>;
12611262

12621263
def FeatureISAVersion9_0_9 : FeatureSet<
1263-
!listconcat(FeatureISAVersion9_0_Common.Features,
1264-
[FeatureGDS,
1265-
FeatureMadMixInsts,
1266-
FeatureDsSrc2Insts,
1267-
FeatureExtendedImageInsts,
1268-
FeatureImageInsts,
1269-
FeatureImageGather4D16Bug])>;
1264+
!listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
1265+
[FeatureMadMixInsts,
1266+
FeatureImageInsts])>;
12701267

12711268
def FeatureISAVersion9_0_A : FeatureSet<
12721269
!listconcat(FeatureISAVersion9_0_MI_Common.Features,
@@ -1282,12 +1279,8 @@ def FeatureISAVersion9_0_A : FeatureSet<
12821279
FeatureKernargPreload])>;
12831280

12841281
def FeatureISAVersion9_0_C : FeatureSet<
1285-
!listconcat(FeatureISAVersion9_0_Common.Features,
1286-
[FeatureGDS,
1287-
FeatureMadMixInsts,
1288-
FeatureDsSrc2Insts,
1289-
FeatureExtendedImageInsts,
1290-
FeatureImageGather4D16Bug])>;
1282+
!listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
1283+
[FeatureMadMixInsts])>;
12911284

12921285
def FeatureISAVersion9_4_Common : FeatureSet<
12931286
[FeatureGFX9,
@@ -1367,6 +1360,10 @@ def FeatureISAVersion10_1_Common : FeatureSet<
13671360
FeatureFlatSegmentOffsetBug,
13681361
FeatureNegativeUnalignedScratchOffsetBug])>;
13691362

1363+
def FeatureISAVersion10_1_Generic : FeatureSet<
1364+
!listconcat(FeatureISAVersion10_1_Common.Features,
1365+
[FeatureRequiresCOV6])>;
1366+
13701367
def FeatureISAVersion10_1_0 : FeatureSet<
13711368
!listconcat(FeatureISAVersion10_1_Common.Features,
13721369
[])>;
@@ -1406,6 +1403,10 @@ def FeatureISAVersion10_3_0 : FeatureSet<
14061403
FeatureDot10Insts,
14071404
FeatureShaderCyclesRegister])>;
14081405

1406+
def FeatureISAVersion10_3_Generic: FeatureSet<
1407+
!listconcat(FeatureISAVersion10_3_0.Features,
1408+
[FeatureRequiresCOV6])>;
1409+
14091410
def FeatureISAVersion11_Common : FeatureSet<
14101411
[FeatureGFX11,
14111412
FeatureLDSBankCount32,
@@ -1428,6 +1429,10 @@ def FeatureISAVersion11_Common : FeatureSet<
14281429
FeatureVcmpxPermlaneHazard,
14291430
FeatureMADIntraFwdBug]>;
14301431

1432+
def FeatureISAVersion11_Generic: FeatureSet<
1433+
!listconcat(FeatureISAVersion11_Common.Features,
1434+
[FeatureRequiresCOV6])>;
1435+
14311436
def FeatureISAVersion11_0_Common : FeatureSet<
14321437
!listconcat(FeatureISAVersion11_Common.Features,
14331438
[FeatureMSAALoadDstSelBug,

llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,10 +138,10 @@ bool AMDGPURemoveIncompatibleFunctions::checkFunction(Function &F) {
138138
const GCNSubtarget *ST =
139139
static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F));
140140

141-
// Check the GPU isn't generic. Generic is used for testing only
142-
// and we don't want this pass to interfere with it.
141+
// Check the GPU isn't generic or generic-hsa. Generic is used for testing
142+
// only and we don't want this pass to interfere with it.
143143
StringRef GPUName = ST->getCPU();
144-
if (GPUName.empty() || GPUName.contains("generic"))
144+
if (GPUName.empty() || GPUName.starts_with("generic"))
145145
return false;
146146

147147
// Try to fetch the GPU's info. If we can't, it's likely an unknown processor

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,13 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
162162
LLVM_DEBUG(dbgs() << "sramecc setting for subtarget: "
163163
<< TargetID.getSramEccSetting() << '\n');
164164

165+
// FIXME(?): It's very ugly to crash, it'd be better to print a diagnostic.
166+
if (RequiresCOV6 &&
167+
AMDGPU::getAmdhsaCodeObjectVersion() < AMDGPU::AMDHSA_COV6)
168+
report_fatal_error(
169+
GPU + " is only available on code object version 6 or better",
170+
/*gen_crash_diag*/ false);
171+
165172
return *this;
166173
}
167174

0 commit comments

Comments
 (0)