2525
2626#include < algorithm>
2727#include < cassert>
28+ #include < cstdint>
2829#include < cstdlib>
2930#include < cstring>
3031#include < fstream>
@@ -397,6 +398,10 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M,
397398 Img.getLinkOptions (), PiDevices,
398399 ContextImpl->getCachedLibPrograms (), DeviceLibReqMask);
399400
401+ {
402+ std::lock_guard<std::mutex> Lock (MNativeProgramsMutex);
403+ NativePrograms[BuiltProgram.get ()] = &Img;
404+ }
400405 return BuiltProgram.release ();
401406 };
402407
@@ -851,6 +856,23 @@ ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context,
851856 return Program;
852857}
853858
859+ static ProgramManager::KernelArgMask
860+ createKernelArgMask (const pi::ByteArray &Bytes) {
861+ const int NBytesForSize = 8 ;
862+ const int NBitsInElement = 8 ;
863+ std::uint64_t SizeInBits = 0 ;
864+ for (int I = 0 ; I < NBytesForSize; ++I)
865+ SizeInBits |= static_cast <std::uint64_t >(Bytes[I]) << I * NBitsInElement;
866+
867+ ProgramManager::KernelArgMask Result;
868+ for (std::uint64_t I = 0 ; I < SizeInBits; ++I) {
869+ std::uint8_t Byte = Bytes[NBytesForSize + (I / NBitsInElement)];
870+ Result.push_back (Byte & (1 << (I % NBitsInElement)));
871+ }
872+
873+ return Result;
874+ }
875+
854876void ProgramManager::addImages (pi_device_binaries DeviceBinary) {
855877 std::lock_guard<std::mutex> Guard (Sync::getGlobalLock ());
856878
@@ -860,6 +882,17 @@ void ProgramManager::addImages(pi_device_binaries DeviceBinary) {
860882 const _pi_offload_entry EntriesB = RawImg->EntriesBegin ;
861883 const _pi_offload_entry EntriesE = RawImg->EntriesEnd ;
862884 auto Img = make_unique_ptr<RTDeviceBinaryImage>(RawImg, M);
885+
886+ // Fill the kernel argument mask map
887+ const pi::DeviceBinaryImage::PropertyRange &KPOIRange =
888+ Img->getKernelParamOptInfo ();
889+ if (KPOIRange.isAvailable ()) {
890+ KernelNameToArgMaskMap &ArgMaskMap =
891+ m_EliminatedKernelArgMasks[Img.get ()];
892+ for (const auto &Info : KPOIRange)
893+ ArgMaskMap[Info->Name ] =
894+ createKernelArgMask (pi::DeviceBinaryProperty (Info).asByteArray ());
895+ }
863896 // Use the entry information if it's available
864897 if (EntriesB != EntriesE) {
865898 // The kernel sets for any pair of images are either disjoint or
@@ -1018,6 +1051,55 @@ uint32_t ProgramManager::getDeviceLibReqMask(const RTDeviceBinaryImage &Img) {
10181051 return 0xFFFFFFFF ;
10191052}
10201053
1054+ // TODO consider another approach with storing the masks in the integration
1055+ // header instead.
1056+ ProgramManager::KernelArgMask ProgramManager::getEliminatedKernelArgMask (
1057+ OSModuleHandle M, const context &Context, pi::PiProgram NativePrg,
1058+ const string_class &KernelName, bool KnownProgram) {
1059+ // If instructed to use a spv file, assume no eliminated arguments.
1060+ if (m_UseSpvFile && M == OSUtil::ExeModuleHandle)
1061+ return {};
1062+
1063+ {
1064+ std::lock_guard<std::mutex> Lock (MNativeProgramsMutex);
1065+ auto ImgIt = NativePrograms.find (NativePrg);
1066+ if (ImgIt != NativePrograms.end ()) {
1067+ auto MapIt = m_EliminatedKernelArgMasks.find (ImgIt->second );
1068+ if (MapIt != m_EliminatedKernelArgMasks.end ())
1069+ return MapIt->second [KernelName];
1070+ return {};
1071+ }
1072+ }
1073+
1074+ if (KnownProgram)
1075+ throw runtime_error (" Program is not associated with a binary image" ,
1076+ PI_INVALID_VALUE);
1077+
1078+ // If not sure whether the program was built with one of the images, try
1079+ // finding the binary.
1080+ // TODO this can backfire in some extreme edge cases where there's a kernel
1081+ // name collision between our binaries and user-created native programs.
1082+ KernelSetId KSId;
1083+ try {
1084+ KSId = getKernelSetId (M, KernelName);
1085+ } catch (sycl::runtime_error &e) {
1086+ // If the kernel name wasn't found, assume that the program wasn't created
1087+ // from one of our device binary images.
1088+ if (e.get_cl_code () == PI_INVALID_KERNEL_NAME)
1089+ return {};
1090+ std::rethrow_exception (std::current_exception ());
1091+ }
1092+ RTDeviceBinaryImage &Img = getDeviceImage (M, KSId, Context);
1093+ {
1094+ std::lock_guard<std::mutex> Lock (MNativeProgramsMutex);
1095+ NativePrograms[NativePrg] = &Img;
1096+ }
1097+ auto MapIt = m_EliminatedKernelArgMasks.find (&Img);
1098+ if (MapIt != m_EliminatedKernelArgMasks.end ())
1099+ return MapIt->second [KernelName];
1100+ return {};
1101+ }
1102+
10211103} // namespace detail
10221104} // namespace sycl
10231105} // __SYCL_INLINE_NAMESPACE(cl)
0 commit comments