@@ -630,61 +630,8 @@ sycl::detail::pi::PiProgram ProgramManager::getBuiltPIProgram(
630630 getDeviceImage (KSId, Context, Device, JITCompilationIsRequired);
631631
632632 // Check that device supports all aspects used by the kernel
633- const RTDeviceBinaryImage::PropertyRange &ARange =
634- Img.getDeviceRequirements ();
635-
636- #define __SYCL_ASPECT (ASPECT, ID ) \
637- case aspect::ASPECT: \
638- return #ASPECT;
639- #define __SYCL_ASPECT_DEPRECATED (ASPECT, ID, MESSAGE ) __SYCL_ASPECT(ASPECT, ID)
640- // We don't need "case aspect::usm_allocator" here because it will duplicate
641- // "case aspect::usm_system_allocations", therefore leave this macro empty
642- #define __SYCL_ASPECT_DEPRECATED_ALIAS (ASPECT, ID, MESSAGE )
643- auto getAspectNameStr = [](aspect AspectNum) -> std::string {
644- switch (AspectNum) {
645- #include < sycl/info/aspects.def>
646- #include < sycl/info/aspects_deprecated.def>
647- }
648- throw sycl::exception (errc::kernel_not_supported,
649- " Unknown aspect " +
650- std::to_string (static_cast <unsigned >(AspectNum)));
651- };
652- #undef __SYCL_ASPECT_DEPRECATED_ALIAS
653- #undef __SYCL_ASPECT_DEPRECATED
654- #undef __SYCL_ASPECT
655-
656- for (RTDeviceBinaryImage::PropertyRange::ConstIterator It : ARange) {
657- using namespace std ::literals;
658- if ((*It)->Name == " aspects" sv) {
659- ByteArray Aspects = DeviceBinaryProperty (*It).asByteArray ();
660- // 8 because we need to skip 64-bits of size of the byte array
661- Aspects.dropBytes (8 );
662- while (!Aspects.empty ()) {
663- auto Aspect = static_cast <aspect>(Aspects.consume <int >());
664- if (!Dev->has (Aspect))
665- throw sycl::exception (errc::kernel_not_supported,
666- " Required aspect " + getAspectNameStr (Aspect) +
667- " is not supported on the device" );
668- }
669- } else if ((*It)->Name == " reqd_sub_group_size" sv) {
670- auto ReqdSubGroupSize = DeviceBinaryProperty (*It).asUint32 ();
671- auto SupportedSubGroupSizes =
672- Device.get_info <info::device::sub_group_sizes>();
673-
674- // !getUint32PropAsBool(Img, "isEsimdImage") is a WA for ESIMD,
675- // as ESIMD images have a reqd-sub-group-size of 1, but currently
676- // no backend currently includes 1 as a valid sub-group size.
677- // This can be removed if backends add 1 as a valid sub-group size.
678- if (!getUint32PropAsBool (Img, " isEsimdImage" ) &&
679- std::none_of (SupportedSubGroupSizes.cbegin (),
680- SupportedSubGroupSizes.cend (),
681- [=](auto s) { return s == ReqdSubGroupSize; }))
682- throw sycl::exception (errc::kernel_not_supported,
683- " Sub-group size " +
684- std::to_string (ReqdSubGroupSize) +
685- " is not supported on the device" );
686- }
687- }
633+ if (auto exception = checkDevSupportDeviceRequirements (Device, Img))
634+ throw *exception;
688635
689636 auto BuildF = [this , &Img, &Context, &ContextImpl, &Device, Prg, &CompileOpts,
690637 &LinkOpts, SpecConsts] {
@@ -2456,6 +2403,44 @@ ProgramManager::getOrCreateKernel(const context &Context,
24562403
24572404bool doesDevSupportDeviceRequirements (const device &Dev,
24582405 const RTDeviceBinaryImage &Img) {
2406+ return !checkDevSupportDeviceRequirements (Dev, Img).has_value ();
2407+ }
2408+
2409+ static std::string getAspectNameStr (sycl::aspect AspectNum) {
2410+ #define __SYCL_ASPECT (ASPECT, ID ) \
2411+ case aspect::ASPECT: \
2412+ return #ASPECT;
2413+ #define __SYCL_ASPECT_DEPRECATED (ASPECT, ID, MESSAGE ) __SYCL_ASPECT(ASPECT, ID)
2414+ // We don't need "case aspect::usm_allocator" here because it will duplicate
2415+ // "case aspect::usm_system_allocations", therefore leave this macro empty
2416+ #define __SYCL_ASPECT_DEPRECATED_ALIAS (ASPECT, ID, MESSAGE )
2417+ switch (AspectNum) {
2418+ #include < sycl/info/aspects.def>
2419+ #include < sycl/info/aspects_deprecated.def>
2420+ }
2421+ throw sycl::exception (errc::kernel_not_supported,
2422+ " Unknown aspect " +
2423+ std::to_string (static_cast <unsigned >(AspectNum)));
2424+ #undef __SYCL_ASPECT_DEPRECATED_ALIAS
2425+ #undef __SYCL_ASPECT_DEPRECATED
2426+ #undef __SYCL_ASPECT
2427+ }
2428+
2429+ // Check if the multiplication over unsigned integers overflows
2430+ template <typename T>
2431+ static std::enable_if_t <std::is_unsigned_v<T>, std::optional<T>>
2432+ multiply_with_overflow_check (T x, T y) {
2433+ if (y == 0 )
2434+ return 0 ;
2435+ if (x > std::numeric_limits<T>::max () / y)
2436+ return {};
2437+ else
2438+ return x * y;
2439+ }
2440+
2441+ std::optional<sycl::exception>
2442+ checkDevSupportDeviceRequirements (const device &Dev,
2443+ const RTDeviceBinaryImage &Img) {
24592444 auto getPropIt = [&Img](const std::string &PropName) {
24602445 const RTDeviceBinaryImage::PropertyRange &PropRange =
24612446 Img.getDeviceRequirements ();
@@ -2471,7 +2456,8 @@ bool doesDevSupportDeviceRequirements(const device &Dev,
24712456 };
24722457
24732458 auto AspectsPropIt = getPropIt (" aspects" );
2474- auto ReqdWGSizePropIt = getPropIt (" reqd_work_group_size" );
2459+ auto ReqdWGSizeUint32TPropIt = getPropIt (" reqd_work_group_size" );
2460+ auto ReqdWGSizeUint64TPropIt = getPropIt (" reqd_work_group_size_uint64_t" );
24752461 auto ReqdSubGroupSizePropIt = getPropIt (" reqd_sub_group_size" );
24762462
24772463 // Checking if device supports defined aspects
@@ -2483,28 +2469,54 @@ bool doesDevSupportDeviceRequirements(const device &Dev,
24832469 while (!Aspects.empty ()) {
24842470 aspect Aspect = Aspects.consume <aspect>();
24852471 if (!Dev.has (Aspect))
2486- return false ;
2472+ return sycl::exception (errc::kernel_not_supported,
2473+ " Required aspect " + getAspectNameStr (Aspect) +
2474+ " is not supported on the device" );
24872475 }
24882476 }
24892477
24902478 // Checking if device supports defined required work group size
2491- if (ReqdWGSizePropIt) {
2492- ByteArray ReqdWGSize =
2493- DeviceBinaryProperty (*(ReqdWGSizePropIt.value ())).asByteArray ();
2479+ if (ReqdWGSizeUint32TPropIt || ReqdWGSizeUint64TPropIt) {
2480+ // / TODO: Before intel/llvm#10620, the reqd_work_group_size attribute
2481+ // stores its values as uint32_t, but this needed to be expanded to
2482+ // uint64_t. However, this change did not happen in ABI-breaking
2483+ // window, so we attach the required work-group size as the
2484+ // reqd_work_group_size_uint64_t attribute. At the next ABI-breaking
2485+ // window, we can remove the logic for the 32 bit property.
2486+ bool usingUint64_t = ReqdWGSizeUint64TPropIt.has_value ();
2487+ auto it = usingUint64_t ? ReqdWGSizeUint64TPropIt : ReqdWGSizeUint32TPropIt;
2488+
2489+ ByteArray ReqdWGSize = DeviceBinaryProperty (*(it.value ())).asByteArray ();
24942490 // Drop 8 bytes describing the size of the byte array.
24952491 ReqdWGSize.dropBytes (8 );
2496- int ReqdWGSizeAllDimsTotal = 1 ;
2497- std::vector<int > ReqdWGSizeVec;
2492+ uint64_t ReqdWGSizeAllDimsTotal = 1 ;
2493+ std::vector<uint64_t > ReqdWGSizeVec;
24982494 int Dims = 0 ;
24992495 while (!ReqdWGSize.empty ()) {
2500- int SingleDimSize = ReqdWGSize.consume <int >();
2501- ReqdWGSizeAllDimsTotal *= SingleDimSize;
2496+ uint64_t SingleDimSize = usingUint64_t ? ReqdWGSize.consume <uint64_t >()
2497+ : ReqdWGSize.consume <uint32_t >();
2498+ if (auto res = multiply_with_overflow_check (ReqdWGSizeAllDimsTotal,
2499+ SingleDimSize))
2500+ ReqdWGSizeAllDimsTotal = *res;
2501+ else
2502+ return sycl::exception (
2503+ sycl::errc::kernel_not_supported,
2504+ " Required work-group size is not supported"
2505+ " (total number of work-items requested can't fit into size_t)" );
25022506 ReqdWGSizeVec.push_back (SingleDimSize);
25032507 Dims++;
25042508 }
2505- if (static_cast <size_t >(ReqdWGSizeAllDimsTotal) >
2506- Dev.get_info <info::device::max_work_group_size>())
2507- return false ;
2509+
2510+ // The SingleDimSize was computed in an uint64_t; size_t does not
2511+ // necessarily have to be the same uint64_t (but should fit in an
2512+ // uint64_t).
2513+ if (ReqdWGSizeAllDimsTotal >
2514+ Dev.get_info <info::device::max_work_group_size>() ||
2515+ ReqdWGSizeAllDimsTotal > std::numeric_limits<size_t >::max ())
2516+ return sycl::exception (sycl::errc::kernel_not_supported,
2517+ " Required work-group size " +
2518+ std::to_string (ReqdWGSizeAllDimsTotal) +
2519+ " is not supported on the device" );
25082520 // Creating std::variant to call max_work_item_sizes one time to avoid
25092521 // performance drop
25102522 std::variant<id<1 >, id<2 >, id<3 >> MaxWorkItemSizesVariant;
@@ -2522,17 +2534,26 @@ bool doesDevSupportDeviceRequirements(const device &Dev,
25222534 // issues after that
25232535 if (Dims == 1 ) {
25242536 // ReqdWGSizeVec is in reverse order compared to MaxWorkItemSizes
2525- if (static_cast < size_t >( ReqdWGSizeVec[i]) >
2537+ if (ReqdWGSizeVec[i] >
25262538 std::get<id<1 >>(MaxWorkItemSizesVariant)[Dims - i - 1 ])
2527- return false ;
2539+ return sycl::exception (sycl::errc::kernel_not_supported,
2540+ " Required work-group size " +
2541+ std::to_string (ReqdWGSizeVec[i]) +
2542+ " is not supported" );
25282543 } else if (Dims == 2 ) {
2529- if (static_cast < size_t >( ReqdWGSizeVec[i]) >
2544+ if (ReqdWGSizeVec[i] >
25302545 std::get<id<2 >>(MaxWorkItemSizesVariant)[Dims - i - 1 ])
2531- return false ;
2546+ return sycl::exception (sycl::errc::kernel_not_supported,
2547+ " Required work-group size " +
2548+ std::to_string (ReqdWGSizeVec[i]) +
2549+ " is not supported" );
25322550 } else // (Dims == 3)
2533- if (static_cast < size_t >( ReqdWGSizeVec[i]) >
2551+ if (ReqdWGSizeVec[i] >
25342552 std::get<id<3 >>(MaxWorkItemSizesVariant)[Dims - i - 1 ])
2535- return false ;
2553+ return sycl::exception (sycl::errc::kernel_not_supported,
2554+ " Required work-group size " +
2555+ std::to_string (ReqdWGSizeVec[i]) +
2556+ " is not supported" );
25362557 }
25372558 }
25382559
@@ -2541,14 +2562,21 @@ bool doesDevSupportDeviceRequirements(const device &Dev,
25412562 auto ReqdSubGroupSize =
25422563 DeviceBinaryProperty (*(ReqdSubGroupSizePropIt.value ())).asUint32 ();
25432564 auto SupportedSubGroupSizes = Dev.get_info <info::device::sub_group_sizes>();
2565+ // !getUint32PropAsBool(Img, "isEsimdImage") is a WA for ESIMD,
2566+ // as ESIMD images have a reqd-sub-group-size of 1, but currently
2567+ // no backend currently includes 1 as a valid sub-group size.
2568+ // This can be removed if backends add 1 as a valid sub-group size.
25442569 if (!getUint32PropAsBool (Img, " isEsimdImage" ) &&
25452570 std::none_of (SupportedSubGroupSizes.cbegin (),
25462571 SupportedSubGroupSizes.cend (),
25472572 [=](auto s) { return s == ReqdSubGroupSize; }))
2548- return false ;
2573+ return sycl::exception (sycl::errc::kernel_not_supported,
2574+ " Sub-group size " +
2575+ std::to_string (ReqdSubGroupSize) +
2576+ " is not supported on the device" );
25492577 }
25502578
2551- return true ;
2579+ return {} ;
25522580}
25532581
25542582} // namespace detail
0 commit comments