Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 25 additions & 3 deletions llvm/include/llvm/BinaryFormat/ELF.h
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,7 @@ enum {
ELFOSABI_FENIXOS = 16, // FenixOS
ELFOSABI_CLOUDABI = 17, // Nuxi CloudABI
ELFOSABI_CUDA = 51, // NVIDIA CUDA architecture.
ELFOSABI_CUDA_V2 = 41, // NVIDIA CUDA architecture.
ELFOSABI_FIRST_ARCH = 64, // First architecture-specific OS ABI
ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime
ELFOSABI_AMDGPU_PAL = 65, // AMD PAL runtime
Expand All @@ -385,6 +386,12 @@ enum {
ELFABIVERSION_AMDGPU_HSA_V6 = 4,
};

// CUDA OS ABI Version identification.
enum {
ELFABIVERSION_CUDA_V1 = 7,
ELFABIVERSION_CUDA_V2 = 8,
};

#define ELF_RELOC(name, value) name = value,

// X86_64 relocations.
Expand Down Expand Up @@ -921,9 +928,15 @@ enum {

// NVPTX specific e_flags.
enum : unsigned {
// Processor selection mask for EF_CUDA_SM* values.
// Processor selection mask for EF_CUDA_SM* values prior to blackwell.
EF_CUDA_SM = 0xff,

// Processor selection mask for EF_CUDA_SM* values following blackwell.
EF_CUDA_SM_MASK = 0xff00,

// Processor selection mask for EF_CUDA_SM* values following blackwell.
EF_CUDA_SM_OFFSET = 8,

// SM based processor values.
EF_CUDA_SM20 = 0x14,
EF_CUDA_SM21 = 0x15,
Expand All @@ -943,9 +956,15 @@ enum : unsigned {
EF_CUDA_SM80 = 0x50,
EF_CUDA_SM86 = 0x56,
EF_CUDA_SM87 = 0x57,
EF_CUDA_SM88 = 0x58,
EF_CUDA_SM89 = 0x59,
// The sm_90a variant uses the same machine flag.
EF_CUDA_SM90 = 0x5a,
EF_CUDA_SM100 = 0x64,
EF_CUDA_SM101 = 0x65,
EF_CUDA_SM103 = 0x67,
EF_CUDA_SM110 = 0x6e,
EF_CUDA_SM120 = 0x78,
EF_CUDA_SM121 = 0x79,

// Unified texture binding is enabled.
EF_CUDA_TEXMODE_UNIFIED = 0x100,
Expand All @@ -954,12 +973,15 @@ enum : unsigned {
// The target is using 64-bit addressing.
EF_CUDA_64BIT_ADDRESS = 0x400,
// Set when using the sm_90a processor.
EF_CUDA_ACCELERATORS = 0x800,
EF_CUDA_ACCELERATORS_V1 = 0x800,
// Undocumented software feature.
EF_CUDA_SW_FLAG_V2 = 0x1000,

// Virtual processor selection mask for EF_CUDA_VIRTUAL_SM* values.
EF_CUDA_VIRTUAL_SM = 0xff0000,

// Set when using an accelerator variant like sm_100a.
EF_CUDA_ACCELERATORS = 0x8,
};

// ELF Relocation types for BPF
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/Object/ELFObjectFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -1479,6 +1479,7 @@ template <class ELFT> Triple::OSType ELFObjectFile<ELFT>::getOS() const {
case ELF::ELFOSABI_OPENBSD:
return Triple::OpenBSD;
case ELF::ELFOSABI_CUDA:
case ELF::ELFOSABI_CUDA_V2:
return Triple::CUDA;
case ELF::ELFOSABI_AMDGPU_HSA:
return Triple::AMDHSA;
Expand Down
32 changes: 30 additions & 2 deletions llvm/lib/Object/ELFObjectFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,10 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {

StringRef ELFObjectFileBase::getNVPTXCPUName() const {
assert(getEMachine() == ELF::EM_CUDA);
unsigned SM = getPlatformFlags() & ELF::EF_CUDA_SM;
unsigned SM = getEIdentABIVersion() == ELF::ELFABIVERSION_CUDA_V1
? getPlatformFlags() & ELF::EF_CUDA_SM
: (getPlatformFlags() & ELF::EF_CUDA_SM_MASK) >>
ELF::EF_CUDA_SM_OFFSET;

switch (SM) {
// Fermi architecture.
Expand Down Expand Up @@ -672,14 +675,39 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const {
return "sm_86";
case ELF::EF_CUDA_SM87:
return "sm_87";
case ELF::EF_CUDA_SM88:
return "sm_88";

// Ada architecture.
case ELF::EF_CUDA_SM89:
return "sm_89";

// Hopper architecture.
case ELF::EF_CUDA_SM90:
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_90a" : "sm_90";
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS_V1 ? "sm_90a"
: "sm_90";

// Blackwell architecture.
case ELF::EF_CUDA_SM100:
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_100a"
: "sm_100";
case ELF::EF_CUDA_SM101:
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_101a"
: "sm_101";
case ELF::EF_CUDA_SM103:
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_103a"
: "sm_103";
case ELF::EF_CUDA_SM110:
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_110a"
: "sm_110";

// Blackwell architecture.
case ELF::EF_CUDA_SM120:
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_120a"
: "sm_120";
case ELF::EF_CUDA_SM121:
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_121a"
: "sm_121";
default:
llvm_unreachable("Unknown EF_CUDA_SM value");
}
Expand Down
114 changes: 82 additions & 32 deletions llvm/tools/llvm-readobj/ELFDumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1083,26 +1083,26 @@ const EnumEntry<unsigned> ElfObjectFileType[] = {
};

const EnumEntry<unsigned> ElfOSABI[] = {
{"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE},
{"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX},
{"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD},
{"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX},
{"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD},
{"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS},
{"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX},
{"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX},
{"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD},
{"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64},
{"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO},
{"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD},
{"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS},
{"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK},
{"AROS", "AROS", ELF::ELFOSABI_AROS},
{"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS},
{"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI},
{"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA},
{"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}
};
{"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE},
{"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX},
{"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD},
{"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX},
{"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD},
{"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS},
{"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX},
{"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX},
{"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD},
{"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64},
{"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO},
{"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD},
{"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS},
{"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK},
{"AROS", "AROS", ELF::ELFOSABI_AROS},
{"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS},
{"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI},
{"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA},
{"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA_V2},
{"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}};

const EnumEntry<unsigned> AMDGPUElfOSABI[] = {
{"AMDGPU_HSA", "AMDGPU - HSA", ELF::ELFOSABI_AMDGPU_HSA},
Expand Down Expand Up @@ -1666,16 +1666,60 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
};

const EnumEntry<unsigned> ElfHeaderNVPTXFlags[] = {
ENUM_ENT(EF_CUDA_SM20, "sm_20"), ENUM_ENT(EF_CUDA_SM21, "sm_21"),
ENUM_ENT(EF_CUDA_SM30, "sm_30"), ENUM_ENT(EF_CUDA_SM32, "sm_32"),
ENUM_ENT(EF_CUDA_SM35, "sm_35"), ENUM_ENT(EF_CUDA_SM37, "sm_37"),
ENUM_ENT(EF_CUDA_SM50, "sm_50"), ENUM_ENT(EF_CUDA_SM52, "sm_52"),
ENUM_ENT(EF_CUDA_SM53, "sm_53"), ENUM_ENT(EF_CUDA_SM60, "sm_60"),
ENUM_ENT(EF_CUDA_SM61, "sm_61"), ENUM_ENT(EF_CUDA_SM62, "sm_62"),
ENUM_ENT(EF_CUDA_SM70, "sm_70"), ENUM_ENT(EF_CUDA_SM72, "sm_72"),
ENUM_ENT(EF_CUDA_SM75, "sm_75"), ENUM_ENT(EF_CUDA_SM80, "sm_80"),
ENUM_ENT(EF_CUDA_SM86, "sm_86"), ENUM_ENT(EF_CUDA_SM87, "sm_87"),
ENUM_ENT(EF_CUDA_SM89, "sm_89"), ENUM_ENT(EF_CUDA_SM90, "sm_90"),
ENUM_ENT(EF_CUDA_SM20, "sm_20"),
ENUM_ENT(EF_CUDA_SM21, "sm_21"),
ENUM_ENT(EF_CUDA_SM30, "sm_30"),
ENUM_ENT(EF_CUDA_SM32, "sm_32"),
ENUM_ENT(EF_CUDA_SM35, "sm_35"),
ENUM_ENT(EF_CUDA_SM37, "sm_37"),
ENUM_ENT(EF_CUDA_SM50, "sm_50"),
ENUM_ENT(EF_CUDA_SM52, "sm_52"),
ENUM_ENT(EF_CUDA_SM53, "sm_53"),
ENUM_ENT(EF_CUDA_SM60, "sm_60"),
ENUM_ENT(EF_CUDA_SM61, "sm_61"),
ENUM_ENT(EF_CUDA_SM62, "sm_62"),
ENUM_ENT(EF_CUDA_SM70, "sm_70"),
ENUM_ENT(EF_CUDA_SM72, "sm_72"),
ENUM_ENT(EF_CUDA_SM75, "sm_75"),
ENUM_ENT(EF_CUDA_SM80, "sm_80"),
ENUM_ENT(EF_CUDA_SM86, "sm_86"),
ENUM_ENT(EF_CUDA_SM87, "sm_87"),
ENUM_ENT(EF_CUDA_SM88, "sm_88"),
ENUM_ENT(EF_CUDA_SM89, "sm_89"),
ENUM_ENT(EF_CUDA_SM90, "sm_90"),
ENUM_ENT(EF_CUDA_SM100, "sm_100"),
ENUM_ENT(EF_CUDA_SM101, "sm_101"),
ENUM_ENT(EF_CUDA_SM103, "sm_103"),
ENUM_ENT(EF_CUDA_SM110, "sm_110"),
ENUM_ENT(EF_CUDA_SM120, "sm_120"),
ENUM_ENT(EF_CUDA_SM121, "sm_121"),
ENUM_ENT(EF_CUDA_SM20 << EF_CUDA_SM_OFFSET, "sm_20"),
ENUM_ENT(EF_CUDA_SM21 << EF_CUDA_SM_OFFSET, "sm_21"),
ENUM_ENT(EF_CUDA_SM30 << EF_CUDA_SM_OFFSET, "sm_30"),
ENUM_ENT(EF_CUDA_SM32 << EF_CUDA_SM_OFFSET, "sm_32"),
ENUM_ENT(EF_CUDA_SM35 << EF_CUDA_SM_OFFSET, "sm_35"),
ENUM_ENT(EF_CUDA_SM37 << EF_CUDA_SM_OFFSET, "sm_37"),
ENUM_ENT(EF_CUDA_SM50 << EF_CUDA_SM_OFFSET, "sm_50"),
ENUM_ENT(EF_CUDA_SM52 << EF_CUDA_SM_OFFSET, "sm_52"),
ENUM_ENT(EF_CUDA_SM53 << EF_CUDA_SM_OFFSET, "sm_53"),
ENUM_ENT(EF_CUDA_SM60 << EF_CUDA_SM_OFFSET, "sm_60"),
ENUM_ENT(EF_CUDA_SM61 << EF_CUDA_SM_OFFSET, "sm_61"),
ENUM_ENT(EF_CUDA_SM62 << EF_CUDA_SM_OFFSET, "sm_62"),
ENUM_ENT(EF_CUDA_SM70 << EF_CUDA_SM_OFFSET, "sm_70"),
ENUM_ENT(EF_CUDA_SM72 << EF_CUDA_SM_OFFSET, "sm_72"),
ENUM_ENT(EF_CUDA_SM75 << EF_CUDA_SM_OFFSET, "sm_75"),
ENUM_ENT(EF_CUDA_SM80 << EF_CUDA_SM_OFFSET, "sm_80"),
ENUM_ENT(EF_CUDA_SM86 << EF_CUDA_SM_OFFSET, "sm_86"),
ENUM_ENT(EF_CUDA_SM87 << EF_CUDA_SM_OFFSET, "sm_87"),
ENUM_ENT(EF_CUDA_SM88 << EF_CUDA_SM_OFFSET, "sm_88"),
ENUM_ENT(EF_CUDA_SM89 << EF_CUDA_SM_OFFSET, "sm_89"),
ENUM_ENT(EF_CUDA_SM90 << EF_CUDA_SM_OFFSET, "sm_90"),
ENUM_ENT(EF_CUDA_SM100 << EF_CUDA_SM_OFFSET, "sm_100"),
ENUM_ENT(EF_CUDA_SM101 << EF_CUDA_SM_OFFSET, "sm_101"),
ENUM_ENT(EF_CUDA_SM103 << EF_CUDA_SM_OFFSET, "sm_103"),
ENUM_ENT(EF_CUDA_SM110 << EF_CUDA_SM_OFFSET, "sm_110"),
ENUM_ENT(EF_CUDA_SM120 << EF_CUDA_SM_OFFSET, "sm_120"),
ENUM_ENT(EF_CUDA_SM121 << EF_CUDA_SM_OFFSET, "sm_121"),
};

const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
Expand Down Expand Up @@ -3650,10 +3694,16 @@ template <class ELFT> void GNUELFDumper<ELFT>::printFileHeaders() {
else if (e.e_machine == EM_XTENSA)
ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderXtensaFlags),
unsigned(ELF::EF_XTENSA_MACH));
else if (e.e_machine == EM_CUDA)
else if (e.e_machine == EM_CUDA) {
ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderNVPTXFlags),
unsigned(ELF::EF_CUDA_SM));
else if (e.e_machine == EM_AMDGPU) {
if (e.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1 &&
(e.e_flags & ELF::EF_CUDA_ACCELERATORS_V1))
ElfFlags += "a";
else if (e.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V2 &&
(e.e_flags & ELF::EF_CUDA_ACCELERATORS))
ElfFlags += "a";
} else if (e.e_machine == EM_AMDGPU) {
switch (e.e_ident[ELF::EI_ABIVERSION]) {
default:
break;
Expand Down
23 changes: 15 additions & 8 deletions offload/plugins-nextgen/common/src/Utils/ELF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,23 +60,30 @@ static Expected<bool>
checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
const auto Header = ELFObj.getELFFile().getHeader();
if (Header.e_type != ET_EXEC && Header.e_type != ET_DYN)
return createError("Only executable ELF files are supported");
return createError("only executable ELF files are supported");

if (Header.e_machine == EM_AMDGPU) {
if (Header.e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA)
return createError("Invalid AMD OS/ABI, must be AMDGPU_HSA");
return createError("invalid AMD OS/ABI, must be AMDGPU_HSA");
if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 &&
Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6)
return createError("Invalid AMD ABI version, must be version 5 or above");
return createError("invalid AMD ABI version, must be version 5 or above");
if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
(Header.e_flags & EF_AMDGPU_MACH) >
EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)
return createError("Unsupported AMDGPU architecture");
return createError("unsupported AMDGPU architecture");
} else if (Header.e_machine == EM_CUDA) {
if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)
return createError("Invalid CUDA addressing mode");
if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35)
return createError("Unsupported NVPTX architecture");
if (Header.e_ident[EI_ABIVERSION] == ELFABIVERSION_CUDA_V1) {
if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)
return createError("invalid CUDA addressing mode");
if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35)
return createError("unsupported NVPTX architecture");
} else if (Header.e_ident[EI_ABIVERSION] == ELFABIVERSION_CUDA_V2) {
if ((Header.e_flags & EF_CUDA_SM_MASK) < EF_CUDA_SM100)
return createError("unsupported NVPTX architecture");
} else {
return createError("invalid CUDA ABI version");
}
}

return Header.e_machine == EMachine;
Expand Down
6 changes: 5 additions & 1 deletion offload/plugins-nextgen/cuda/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1442,7 +1442,11 @@ struct CUDAPluginTy final : public GenericPluginTy {
return ElfOrErr.takeError();

// Get the numeric value for the image's `sm_` value.
auto SM = ElfOrErr->getPlatformFlags() & ELF::EF_CUDA_SM;
const auto Header = ElfOrErr->getELFFile().getHeader();
unsigned SM =
Header.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1
? Header.e_flags & ELF::EF_CUDA_SM
: (Header.e_flags & ELF::EF_CUDA_SM_MASK) >> ELF::EF_CUDA_SM_OFFSET;

CUdevice Device;
CUresult Res = cuDeviceGet(&Device, DeviceId);
Expand Down
Loading