2020#include < string>
2121#include < thread>
2222#include < utility>
23- #include < vector>
2423
2524#include < level_zero/zet_api.h>
2625
@@ -487,6 +486,13 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
487486 return PI_INVALID_VALUE;
488487 }
489488
489+ // Cache pi_platforms for reuse in the future
490+ // It solves two problems;
491+ // 1. sycl::device equality issue; we always return the same pi_device.
492+ // 2. performance; we can save time by immediately return from cache.
493+ static std::vector<pi_platform> PiPlatformsCache;
494+ static std::mutex PiPlatformsCacheMutex;
495+
490496 // This is a good time to initialize Level Zero.
491497 // TODO: We can still safely recover if something goes wrong during the init.
492498 // Implement handling segfault using sigaction.
@@ -521,6 +527,18 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
521527 assert (ZeDriverCount == 1 );
522528 ZE_CALL (zeDriverGet (&ZeDriverCount, &ZeDriver));
523529
530+ std::lock_guard<std::mutex> Lock (PiPlatformsCacheMutex);
531+ for (const pi_platform CachedPlatform : PiPlatformsCache) {
532+ if (CachedPlatform->ZeDriver == ZeDriver) {
533+ Platforms[0 ] = CachedPlatform;
534+ // if the caller sent a valid NumPlatforms pointer, set it here
535+ if (NumPlatforms)
536+ *NumPlatforms = 1 ;
537+
538+ return PI_SUCCESS;
539+ }
540+ }
541+
524542 try {
525543 // TODO: figure out how/when to release this memory
526544 *Platforms = new _pi_platform (ZeDriver);
@@ -546,6 +564,9 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
546564 Platforms[0 ]->ZeDriverApiVersion =
547565 std::to_string (ZE_MAJOR_VERSION (ZeApiVersion)) + std::string (" ." ) +
548566 std::to_string (ZE_MINOR_VERSION (ZeApiVersion));
567+
568+ // save a copy in the cache for future uses
569+ PiPlatformsCache.push_back (Platforms[0 ]);
549570 } catch (const std::bad_alloc &) {
550571 return PI_OUT_OF_HOST_MEMORY;
551572 } catch (...) {
@@ -639,9 +660,16 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType,
639660
640661 // Get number of devices supporting Level Zero
641662 uint32_t ZeDeviceCount = 0 ;
663+ std::lock_guard<std::mutex> Lock (Platform->PiDevicesCacheMutex );
664+ ZeDeviceCount = Platform->PiDevicesCache .size ();
665+
642666 const bool AskingForGPU = (DeviceType & PI_DEVICE_TYPE_GPU);
643667 const bool AskingForDefault = (DeviceType == PI_DEVICE_TYPE_DEFAULT);
644- ZE_CALL (zeDeviceGet (ZeDriver, &ZeDeviceCount, nullptr ));
668+
669+ if (ZeDeviceCount == 0 ) {
670+ ZE_CALL (zeDeviceGet (ZeDriver, &ZeDeviceCount, nullptr ));
671+ }
672+
645673 if (ZeDeviceCount == 0 || !(AskingForGPU || AskingForDefault)) {
646674 if (NumDevices)
647675 *NumDevices = 0 ;
@@ -657,6 +685,14 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType,
657685 return PI_SUCCESS;
658686 }
659687
688+ // if devices are already captured in cache, return them from the cache.
689+ for (const pi_device CachedDevice : Platform->PiDevicesCache ) {
690+ *Devices++ = CachedDevice;
691+ }
692+ if (!Platform->PiDevicesCache .empty ()) {
693+ return PI_SUCCESS;
694+ }
695+
660696 try {
661697 std::vector<ze_device_handle_t > ZeDevices (ZeDeviceCount);
662698 ZE_CALL (zeDeviceGet (ZeDriver, &ZeDeviceCount, ZeDevices.data ()));
@@ -668,6 +704,8 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType,
668704 if (Result != PI_SUCCESS) {
669705 return Result;
670706 }
707+ // save a copy in the cache for future uses.
708+ Platform->PiDevicesCache .push_back (Devices[I]);
671709 }
672710 }
673711 } catch (const std::bad_alloc &) {
@@ -680,7 +718,6 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType,
680718
681719pi_result piDeviceRetain (pi_device Device) {
682720 assert (Device);
683-
684721 // The root-device ref-count remains unchanged (always 1).
685722 if (Device->IsSubDevice ) {
686723 ++(Device->RefCount );
@@ -690,14 +727,16 @@ pi_result piDeviceRetain(pi_device Device) {
690727
691728pi_result piDeviceRelease (pi_device Device) {
692729 assert (Device);
693-
730+ assert (Device-> RefCount > 0 && " Device is already released. " );
694731 // TODO: OpenCL says root-device ref-count remains unchanged (1),
695732 // but when would we free the device's data?
696- if (--(Device->RefCount ) == 0 ) {
697- // Destroy the command list used for initializations
698- ZE_CALL (zeCommandListDestroy (Device->ZeCommandListInit ));
699- delete Device;
700- }
733+ if (Device->IsSubDevice )
734+ --(Device->RefCount );
735+ // TODO: All cached pi_devices live until the program ends.
736+ // If L0 RT does not do its own cleanup for Ze_Device_Handle upon tear-down,
737+ // we need to figure out a way to call here
738+ // ZE_CALL(zeCommandListDestroy(Device->ZeCommandListInit)); and,
739+ // in piDevicesGet(), we need to call initialize for each cached pi_device.
701740
702741 return PI_SUCCESS;
703742}
0 commit comments