llvm
diff --git a/‎clang/lib/Basic/Targets/AMDGPU.cpp‎
Lines changed: 16 additions & 4 deletions b/‎clang/lib/Basic/Targets/AMDGPU.cpp‎
Lines changed: 16 additions & 4 deletions
diff --git a/‎clang/test/Driver/amdgpu-macros.cl‎
Lines changed: 5 additions & 0 deletions b/‎clang/test/Driver/amdgpu-macros.cl‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎clang/test/Driver/amdgpu-mcpu.cl‎
Lines changed: 10 additions & 0 deletions b/‎clang/test/Driver/amdgpu-mcpu.cl‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎llvm/include/llvm/BinaryFormat/ELF.h‎
Lines changed: 5 additions & 1 deletion b/‎llvm/include/llvm/BinaryFormat/ELF.h‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎llvm/include/llvm/TargetParser/TargetParser.h‎
Lines changed: 10 additions & 0 deletions b/‎llvm/include/llvm/TargetParser/TargetParser.h‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎llvm/lib/Object/ELFObjectFile.cpp‎
Lines changed: 10 additions & 0 deletions b/‎llvm/lib/Object/ELFObjectFile.cpp‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎llvm/lib/ObjectYAML/ELFYAML.cpp‎
Lines changed: 4 additions & 0 deletions b/‎llvm/lib/ObjectYAML/ELFYAML.cpp‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎llvm/lib/Target/AMDGPU/AMDGPU.td‎
Lines changed: 43 additions & 38 deletions b/‎llvm/lib/Target/AMDGPU/AMDGPU.td‎
Lines changed: 43 additions & 38 deletions
diff --git a/‎llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp‎
Lines changed: 3 additions & 3 deletions b/‎llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp‎
Lines changed: 7 additions & 0 deletions b/‎llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp‎
Lines changed: 7 additions & 0 deletions
@@ -275,13 +275,25 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__R600__");
 
   if (GPUKind != llvm::AMDGPU::GK_NONE) {
-    StringRef CanonName = isAMDGCN(getTriple()) ?
-      getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
+    std::string CanonName = (isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind)
+                                                   : getArchNameR600(GPUKind))
+                                .str();
+
+    // Sanitize the name of generic targets.
+    // e.g. gfx10.1-generic -> gfx10_1_generic
+    if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST &&
+        GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) {
+      std::replace(CanonName.begin(), CanonName.end(), '.', '_');
+      std::replace(CanonName.begin(), CanonName.end(), '-', '_');
+    }
+
     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
     // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
     if (isAMDGCN(getTriple())) {
-      assert(CanonName.starts_with("gfx") && "Invalid amdgcn canonical name");
-      Builder.defineMacro(Twine("__") + Twine(CanonName.drop_back(2).upper()) +
+      assert(StringRef(CanonName).starts_with("gfx") &&
+             "Invalid amdgcn canonical name");
+      StringRef CanonFamilyName = getArchFamilyNameAMDGCN(GPUKind);
+      Builder.defineMacro(Twine("__") + Twine(CanonFamilyName.upper()) +
                           Twine("__"));
     }
     if (isAMDGCN(getTriple())) {
 
@@ -131,6 +131,11 @@
 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1200 -DFAMILY=GFX12
 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1201 -DFAMILY=GFX12
 
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx9_generic -DFAMILY=GFX9
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10.1-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_1_generic -DFAMILY=GFX10
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10.3-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_3_generic -DFAMILY=GFX10
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx11_generic -DFAMILY=GFX11
+
 // ARCH-GCN-DAG: #define FP_FAST_FMA 1
 
 // FAST_FMAF-DAG: #define FP_FAST_FMAF 1
 
@@ -115,6 +115,11 @@
 // RUN: %clang -### -target amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX1200 %s
 // RUN: %clang -### -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefix=GFX1201 %s
 
+// RUN: %clang -### -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefix=GFX9_GENERIC %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx10.1-generic %s 2>&1 | FileCheck --check-prefix=GFX10_1_GENERIC %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx10.3-generic %s 2>&1 | FileCheck --check-prefix=GFX10_3_GENERIC %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefix=GFX11_GENERIC %s
+
 // GCNDEFAULT-NOT: -target-cpu
 // GFX600:    "-target-cpu" "gfx600"
 // GFX601:    "-target-cpu" "gfx601"
@@ -160,3 +165,8 @@
 // GFX1151:   "-target-cpu" "gfx1151"
 // GFX1200:   "-target-cpu" "gfx1200"
 // GFX1201:   "-target-cpu" "gfx1201"
+
+// GFX9_GENERIC:      "-target-cpu" "gfx9-generic"
+// GFX10_1_GENERIC:   "-target-cpu" "gfx10.1-generic"
+// GFX10_3_GENERIC:   "-target-cpu" "gfx10.3-generic"
+// GFX11_GENERIC:     "-target-cpu" "gfx11-generic"
@@ -788,11 +788,15 @@ enum : unsigned {
   EF_AMDGPU_MACH_AMDGCN_GFX942        = 0x04c,
   EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d,
   EF_AMDGPU_MACH_AMDGCN_GFX1201       = 0x04e,
+  EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC      = 0x04f,
+  EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC   = 0x050,
+  EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC   = 0x051,
+  EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC     = 0x052,
   // clang-format on
 
   // First/last AMDGCN-based processors.
   EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
-  EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1201,
+  EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC,
 
   // Indicates if the "xnack" target feature is enabled for all code contained
   // in the object.
 
@@ -111,6 +111,14 @@ enum GPUKind : uint32_t {
 
   GK_AMDGCN_FIRST = GK_GFX600,
   GK_AMDGCN_LAST = GK_GFX1201,
+
+  GK_GFX9_GENERIC = 192,
+  GK_GFX10_1_GENERIC = 193,
+  GK_GFX10_3_GENERIC = 194,
+  GK_GFX11_GENERIC = 195,
+
+  GK_AMDGCN_GENERIC_FIRST = GK_GFX9_GENERIC,
+  GK_AMDGCN_GENERIC_LAST = GK_GFX11_GENERIC,
 };
 
 /// Instruction set architecture version.
@@ -147,6 +155,8 @@ enum ArchFeatureKind : uint32_t {
   FEATURE_WGP = 1 << 9,
 };
 
+StringRef getArchFamilyNameAMDGCN(GPUKind AK);
+
 StringRef getArchNameAMDGCN(GPUKind AK);
 StringRef getArchNameR600(GPUKind AK);
 StringRef getCanonicalArchName(const Triple &T, StringRef Arch);
 
@@ -514,6 +514,16 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
     return "gfx1200";
   case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201:
     return "gfx1201";
+
+  // Generic AMDGCN targets
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC:
+    return "gfx9-generic";
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC:
+    return "gfx10.1-generic";
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC:
+    return "gfx10.3-generic";
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC:
+    return "gfx11-generic";
   default:
     llvm_unreachable("Unknown EF_AMDGPU_MACH value");
   }
 
@@ -612,6 +612,10 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
     BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1151, EF_AMDGPU_MACH);
     BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1200, EF_AMDGPU_MACH);
     BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1201, EF_AMDGPU_MACH);
+    BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, EF_AMDGPU_MACH);
+    BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, EF_AMDGPU_MACH);
+    BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, EF_AMDGPU_MACH);
+    BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, EF_AMDGPU_MACH);
     switch (Object->Header.ABIVersion) {
     default:
       // ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags.
 
@@ -984,6 +984,12 @@ def FeatureGWS : SubtargetFeature<"gws",
   "Has Global Wave Sync"
 >;
 
+def FeatureRequiresCOV6 : SubtargetFeature<"requires-cov6",
+  "RequiresCOV6",
+  "true",
+  "Target Requires Code Object V6"
+>;
+
 // Dummy feature used to disable assembler instructions.
 def FeatureDisable : SubtargetFeature<"",
   "FeatureDisable","true",
@@ -1193,6 +1199,17 @@ def FeatureISAVersion9_0_Common : FeatureSet<
    FeatureImageInsts,
    FeatureMadMacF32Insts]>;
 
+def FeatureISAVersion9_0_Consumer_Common : FeatureSet<
+  !listconcat(FeatureISAVersion9_0_Common.Features,
+    [FeatureImageGather4D16Bug,
+     FeatureDsSrc2Insts,
+     FeatureExtendedImageInsts,
+     FeatureGDS])>;
+
+def FeatureISAVersion9_Generic : FeatureSet<
+  !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+    [FeatureRequiresCOV6])>;
+
 def FeatureISAVersion9_0_MI_Common : FeatureSet<
   !listconcat(FeatureISAVersion9_0_Common.Features,
     [FeatureFmaMixInsts,
@@ -1211,43 +1228,27 @@ def FeatureISAVersion9_0_MI_Common : FeatureSet<
      FeatureSupportsSRAMECC])>;
 
 def FeatureISAVersion9_0_0 : FeatureSet<
-  !listconcat(FeatureISAVersion9_0_Common.Features,
-    [FeatureGDS,
-     FeatureMadMixInsts,
-     FeatureDsSrc2Insts,
-     FeatureExtendedImageInsts,
-     FeatureImageGather4D16Bug])>;
+  !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+    [FeatureMadMixInsts])>;
 
 def FeatureISAVersion9_0_2 : FeatureSet<
-  !listconcat(FeatureISAVersion9_0_Common.Features,
-    [FeatureGDS,
-     FeatureMadMixInsts,
-     FeatureDsSrc2Insts,
-     FeatureExtendedImageInsts,
-     FeatureImageGather4D16Bug])>;
+  !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+    [FeatureMadMixInsts])>;
 
 def FeatureISAVersion9_0_4 : FeatureSet<
-  !listconcat(FeatureISAVersion9_0_Common.Features,
-    [FeatureGDS,
-     FeatureDsSrc2Insts,
-     FeatureExtendedImageInsts,
-     FeatureFmaMixInsts,
-     FeatureImageGather4D16Bug])>;
+  !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+    [FeatureFmaMixInsts])>;
 
 def FeatureISAVersion9_0_6 : FeatureSet<
-  !listconcat(FeatureISAVersion9_0_Common.Features,
-    [FeatureGDS,
-     HalfRate64Ops,
+  !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+    [HalfRate64Ops,
      FeatureFmaMixInsts,
-     FeatureDsSrc2Insts,
-     FeatureExtendedImageInsts,
      FeatureDLInsts,
      FeatureDot1Insts,
      FeatureDot2Insts,
      FeatureDot7Insts,
      FeatureDot10Insts,
-     FeatureSupportsSRAMECC,
-     FeatureImageGather4D16Bug])>;
+     FeatureSupportsSRAMECC])>;
 
 def FeatureISAVersion9_0_8 : FeatureSet<
   !listconcat(FeatureISAVersion9_0_MI_Common.Features,
@@ -1260,13 +1261,9 @@ def FeatureISAVersion9_0_8 : FeatureSet<
      FeatureImageGather4D16Bug])>;
 
 def FeatureISAVersion9_0_9 : FeatureSet<
-  !listconcat(FeatureISAVersion9_0_Common.Features,
-    [FeatureGDS,
-     FeatureMadMixInsts,
-     FeatureDsSrc2Insts,
-     FeatureExtendedImageInsts,
-     FeatureImageInsts,
-     FeatureImageGather4D16Bug])>;
+  !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+    [FeatureMadMixInsts,
+     FeatureImageInsts])>;
 
 def FeatureISAVersion9_0_A : FeatureSet<
   !listconcat(FeatureISAVersion9_0_MI_Common.Features,
@@ -1282,12 +1279,8 @@ def FeatureISAVersion9_0_A : FeatureSet<
      FeatureKernargPreload])>;
 
 def FeatureISAVersion9_0_C : FeatureSet<
-  !listconcat(FeatureISAVersion9_0_Common.Features,
-    [FeatureGDS,
-     FeatureMadMixInsts,
-     FeatureDsSrc2Insts,
-     FeatureExtendedImageInsts,
-     FeatureImageGather4D16Bug])>;
+  !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+    [FeatureMadMixInsts])>;
 
 def FeatureISAVersion9_4_Common : FeatureSet<
   [FeatureGFX9,
@@ -1367,6 +1360,10 @@ def FeatureISAVersion10_1_Common : FeatureSet<
      FeatureFlatSegmentOffsetBug,
      FeatureNegativeUnalignedScratchOffsetBug])>;
 
+def FeatureISAVersion10_1_Generic : FeatureSet<
+  !listconcat(FeatureISAVersion10_1_Common.Features,
+    [FeatureRequiresCOV6])>;
+
 def FeatureISAVersion10_1_0 : FeatureSet<
   !listconcat(FeatureISAVersion10_1_Common.Features,
     [])>;
@@ -1406,6 +1403,10 @@ def FeatureISAVersion10_3_0 : FeatureSet<
      FeatureDot10Insts,
      FeatureShaderCyclesRegister])>;
 
+def FeatureISAVersion10_3_Generic: FeatureSet<
+  !listconcat(FeatureISAVersion10_3_0.Features,
+    [FeatureRequiresCOV6])>;
+
 def FeatureISAVersion11_Common : FeatureSet<
   [FeatureGFX11,
    FeatureLDSBankCount32,
@@ -1428,6 +1429,10 @@ def FeatureISAVersion11_Common : FeatureSet<
    FeatureVcmpxPermlaneHazard,
    FeatureMADIntraFwdBug]>;
 
+def FeatureISAVersion11_Generic: FeatureSet<
+  !listconcat(FeatureISAVersion11_Common.Features,
+    [FeatureRequiresCOV6])>;
+
 def FeatureISAVersion11_0_Common : FeatureSet<
   !listconcat(FeatureISAVersion11_Common.Features,
     [FeatureMSAALoadDstSelBug,
 
@@ -138,10 +138,10 @@ bool AMDGPURemoveIncompatibleFunctions::checkFunction(Function &F) {
   const GCNSubtarget *ST =
       static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F));
 
-  // Check the GPU isn't generic. Generic is used for testing only
-  // and we don't want this pass to interfere with it.
+  // Check the GPU isn't generic or generic-hsa. Generic is used for testing
+  // only and we don't want this pass to interfere with it.
   StringRef GPUName = ST->getCPU();
-  if (GPUName.empty() || GPUName.contains("generic"))
+  if (GPUName.empty() || GPUName.starts_with("generic"))
     return false;
 
   // Try to fetch the GPU's info. If we can't, it's likely an unknown processor
 
@@ -162,6 +162,13 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
   LLVM_DEBUG(dbgs() << "sramecc setting for subtarget: "
                     << TargetID.getSramEccSetting() << '\n');
 
+  // FIXME(?): It's very ugly to crash, it'd be better to print a diagnostic.
+  if (RequiresCOV6 &&
+      AMDGPU::getAmdhsaCodeObjectVersion() < AMDGPU::AMDHSA_COV6)
+    report_fatal_error(
+        GPU + " is only available on code object version 6 or better",
+        /*gen_crash_diag*/ false);
+
   return *this;
 }