@@ -4832,16 +4832,13 @@ pi_result cuda_piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr,
48324832 pi_uint32 num_events_in_waitlist,
48334833 const pi_event *events_waitlist,
48344834 pi_event *event) {
4835+ pi_device device = queue->get_context ()->get_device ();
48354836
48364837 // Certain cuda devices and Windows do not have support for some Unified
48374838 // Memory features. cuMemPrefetchAsync requires concurrent memory access
48384839 // for managed memory. Therfore, ignore prefetch hint if concurrent managed
48394840 // memory access is not available.
4840- int isConcurrentManagedAccessAvailable = 0 ;
4841- cuDeviceGetAttribute (&isConcurrentManagedAccessAvailable,
4842- CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS,
4843- queue->get_context ()->get_device ()->get ());
4844- if (!isConcurrentManagedAccessAvailable) {
4841+ if (!getAttribute (device, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) {
48454842 setErrorMessage (" Prefetch hint ignored as device does not support "
48464843 " concurrent managed access" ,
48474844 PI_SUCCESS);
@@ -4875,9 +4872,8 @@ pi_result cuda_piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr,
48754872 PI_COMMAND_TYPE_MEM_BUFFER_COPY, queue, cuStream));
48764873 event_ptr->start ();
48774874 }
4878- result = PI_CHECK_ERROR (cuMemPrefetchAsync (
4879- (CUdeviceptr)ptr, size, queue->get_context ()->get_device ()->get (),
4880- cuStream));
4875+ result = PI_CHECK_ERROR (
4876+ cuMemPrefetchAsync ((CUdeviceptr)ptr, size, device->get (), cuStream));
48814877 if (event) {
48824878 result = event_ptr->record ();
48834879 *event = event_ptr.release ();
@@ -4895,6 +4891,29 @@ pi_result cuda_piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr,
48954891 assert (queue != nullptr );
48964892 assert (ptr != nullptr );
48974893
4894+ // Certain cuda devices and Windows do not have support for some Unified
4895+ // Memory features. Passing CU_MEM_ADVISE_[UN]SET_PREFERRED_LOCATION and
4896+ // CU_MEM_ADVISE_[UN]SET_ACCESSED_BY to cuMemAdvise on a GPU device requires
4897+ // the GPU device to report a non-zero value for
4898+ // CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. Therfore, ignore memory
4899+ // advise if concurrent managed memory access is not available.
4900+ if (advice == PI_MEM_ADVICE_CUDA_SET_PREFERRED_LOCATION ||
4901+ advice == PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION ||
4902+ advice == PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY ||
4903+ advice == PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY) {
4904+ pi_device device = queue->get_context ()->get_device ();
4905+ if (!getAttribute (device, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) {
4906+ setErrorMessage (" Mem advise ignored as device does not support "
4907+ " concurrent managed access" ,
4908+ PI_SUCCESS);
4909+ return PI_ERROR_PLUGIN_SPECIFIC_ERROR;
4910+ }
4911+
4912+ // TODO: If ptr points to valid system-allocated pageable memory we should
4913+ // check that the device also has the
4914+ // CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS property.
4915+ }
4916+
48984917 pi_result result = PI_SUCCESS;
48994918 std::unique_ptr<_pi_event> event_ptr{nullptr };
49004919
0 commit comments