Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/intel-llvm-mirror-base-commit
Original file line number Diff line number Diff line change
@@ -1 +1 @@
004f38eaec3db5b5c72fabd1e7f5b82a405eecff
25323c85d7091f92bea2c057202612ff941a36d2
4 changes: 4 additions & 0 deletions include/ur_api.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions include/ur_ddi.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions include/ur_print.hpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions scripts/core/virtual_memory.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ params:
[in][optional] is the device to get the granularity from, if the
device is null then the granularity is suitable for all devices in
context.
- type: size_t
name: allocationSize
desc: "[in] allocation size in bytes for which the alignment is being queried."
- type: $x_virtual_mem_granularity_info_t
name: propName
desc: "[in] type of the info to query."
Expand Down
6 changes: 2 additions & 4 deletions source/adapters/cuda/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1347,14 +1347,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
UR_CHECK_ERROR(validateCommandDesc(hCommandBuffer, pUpdateKernelLaunch[i]));
}

// Store changes in config struct in command handle object
// Store changes in config struct in command handle object and propagate
// changes to CUDA graph
for (uint32_t i = 0; i < numKernelUpdates; i++) {
UR_CHECK_ERROR(updateCommand(pUpdateKernelLaunch[i]));
UR_CHECK_ERROR(updateKernelArguments(pUpdateKernelLaunch[i]));
}

// Propagate changes to CUDA driver API
for (uint32_t i = 0; i < numKernelUpdates; i++) {
const auto &UpdateCommandDesc = pUpdateKernelLaunch[i];

// If no work-size is provided make sure we pass nullptr to setKernelParams
Expand Down
1 change: 1 addition & 0 deletions source/adapters/cuda/virtual_mem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
ur_context_handle_t, ur_device_handle_t hDevice,
[[maybe_unused]] size_t allocationSize,
ur_virtual_mem_granularity_info_t propName, size_t propSize,
void *pPropValue, size_t *pPropSizeRet) {
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
Expand Down
6 changes: 2 additions & 4 deletions source/adapters/hip/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -984,14 +984,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
UR_CHECK_ERROR(validateCommandDesc(hCommandBuffer, pUpdateKernelLaunch[i]));
}

// Store changes in config struct in command handle object
// Store changes in config struct in command handle object and propagate
// changes to HIP Graph.
for (uint32_t i = 0; i < numKernelUpdates; i++) {
UR_CHECK_ERROR(updateCommand(pUpdateKernelLaunch[i]));
UR_CHECK_ERROR(updateKernelArguments(pUpdateKernelLaunch[i]));
}

// Propagate changes to HIP driver API
for (uint32_t i = 0; i < numKernelUpdates; i++) {
const auto &UpdateCommandDesc = pUpdateKernelLaunch[i];

// If no worksize is provided make sure we pass nullptr to setKernelParams
Expand Down
4 changes: 2 additions & 2 deletions source/adapters/hip/virtual_mem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
#include "physical_mem.hpp"

UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t,
size_t, void *, size_t *) {
ur_context_handle_t, ur_device_handle_t, size_t,
ur_virtual_mem_granularity_info_t, size_t, void *, size_t *) {
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

Expand Down
2 changes: 2 additions & 0 deletions source/adapters/level_zero/adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#include <umf.h>
#endif

ZeUSMImportExtension ZeUSMImport;

// Due to multiple DLLMain definitions with SYCL, Global Adapter is init at
// variable creation.
#if defined(_WIN32)
Expand Down
2 changes: 0 additions & 2 deletions source/adapters/level_zero/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,6 @@ bool setEnvVar(const char *name, const char *value) {
return true;
}

ZeUSMImportExtension ZeUSMImport;

void zeParseError(ze_result_t ZeError, const char *&ErrorString) {
switch (ZeError) {
#define ZE_ERRCASE(ERR) \
Expand Down
4 changes: 2 additions & 2 deletions source/adapters/level_zero/ur_interface_loader.hpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 20 additions & 2 deletions source/adapters/level_zero/usm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -607,8 +607,26 @@ ur_result_t UR_APICALL urUSMPoolDestroyExp(ur_context_handle_t /*Context*/,
return UR_RESULT_SUCCESS;
}

ur_result_t UR_APICALL urUSMPoolSetInfoExp(ur_usm_pool_handle_t,
ur_usm_pool_info_t, void *, size_t) {
ur_result_t UR_APICALL urUSMPoolSetInfoExp(ur_usm_pool_handle_t /*Pool*/,
ur_usm_pool_info_t PropName,
void * /*PropValue*/,
size_t PropSize) {
if (PropSize < sizeof(size_t)) {
return UR_RESULT_ERROR_INVALID_SIZE;
}

switch (PropName) {
// TODO: Support for pool release threshold and maximum size hints.
case UR_USM_POOL_INFO_RELEASE_THRESHOLD_EXP:
case UR_USM_POOL_INFO_MAXIMUM_SIZE_EXP:
// TODO: Allow user to overwrite pool peak statistics.
case UR_USM_POOL_INFO_RESERVED_HIGH_EXP:
case UR_USM_POOL_INFO_USED_HIGH_EXP:
break;
default:
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
}

return UR_RESULT_SUCCESS;
}

Expand Down
22 changes: 0 additions & 22 deletions source/adapters/level_zero/v2/api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,28 +41,6 @@ ur_result_t urEventSetCallback(ur_event_handle_t hEvent,
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t UR_APICALL urUSMPoolCreateExp(ur_context_handle_t hContext,
ur_device_handle_t hDevice,
ur_usm_pool_desc_t *PoolDesc,
ur_usm_pool_handle_t *pPool) {
UR_LOG(ERR, "{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t UR_APICALL urUSMPoolDestroyExp(ur_context_handle_t hContext,
ur_device_handle_t hDevice,
ur_usm_pool_handle_t hPool) {
UR_LOG(ERR, "{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t UR_APICALL urUSMPoolSetInfoExp(ur_usm_pool_handle_t hPool,
ur_usm_pool_info_t propName,
void *pPropValue, size_t propSize) {
UR_LOG(ERR, "{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t UR_APICALL urUSMPoolGetDevicePoolExp(ur_context_handle_t hContext,
ur_device_handle_t hDevice,
ur_usm_pool_handle_t *pPool) {
Expand Down
5 changes: 4 additions & 1 deletion source/adapters/level_zero/v2/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ ur_result_t ur_exp_command_buffer_handle_t_::registerExecutionEventUnlocked(
return UR_RESULT_SUCCESS;
}

ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() try {
UR_CALL_NOCHECK(commandListManager.lock()->releaseSubmittedKernels());

if (currentExecution) {
Expand All @@ -175,6 +175,9 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
for (auto &event : syncPoints) {
event->release();
}
} catch (...) {
UR_LOG(DEBUG, "ur_exp_command_buffer_handle_t_ destructor failed with: {}",
exceptionToResult(std::current_exception()));
}

ur_result_t ur_exp_command_buffer_handle_t_::applyUpdateCommands(
Expand Down
44 changes: 28 additions & 16 deletions source/adapters/level_zero/v2/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,11 @@ void ur_usm_handle_t::unmapHostPtr(void * /*pMappedPtr*/,

ur_integrated_buffer_handle_t::ur_integrated_buffer_handle_t(
ur_context_handle_t hContext, void *hostPtr, size_t size,
host_ptr_action_t hostPtrAction, device_access_mode_t accessMode)
device_access_mode_t accessMode)
: ur_mem_buffer_t(hContext, size, accessMode) {
bool hostPtrImported = false;
if (hostPtrAction == host_ptr_action_t::import) {
hostPtrImported =
maybeImportUSM(hContext->getPlatform()->ZeDriverHandleExpTranslated,
hContext->getZeHandle(), hostPtr, size);
}
bool hostPtrImported =
maybeImportUSM(hContext->getPlatform()->ZeDriverHandleExpTranslated,
hContext->getZeHandle(), hostPtr, size);

if (hostPtrImported) {
this->ptr = usm_unique_ptr_t(hostPtr, [hContext](void *ptr) {
Expand Down Expand Up @@ -201,8 +198,23 @@ ur_discrete_buffer_handle_t::ur_discrete_buffer_handle_t(
device_access_mode_t accessMode)
: ur_mem_buffer_t(hContext, size, accessMode),
deviceAllocations(hContext->getPlatform()->getNumDevices()),
activeAllocationDevice(nullptr), mapToPtr(hostPtr), hostAllocations() {
activeAllocationDevice(nullptr), mapToPtr(nullptr, nullptr),
hostAllocations() {
if (hostPtr) {
// Try importing the pointer to speed up memory copies for map/unmap
bool hostPtrImported =
maybeImportUSM(hContext->getPlatform()->ZeDriverHandleExpTranslated,
hContext->getZeHandle(), hostPtr, size);

if (hostPtrImported) {
mapToPtr = usm_unique_ptr_t(hostPtr, [hContext](void *ptr) {
ZeUSMImport.doZeUSMRelease(
hContext->getPlatform()->ZeDriverHandleExpTranslated, ptr);
});
} else {
mapToPtr = usm_unique_ptr_t(hostPtr, [](void *) {});
}

auto initialDevice = hContext->getDevices()[0];
UR_CALL_THROWS(migrateBufferTo(initialDevice, hostPtr, size));
}
Expand Down Expand Up @@ -305,18 +317,18 @@ void *ur_discrete_buffer_handle_t::mapHostPtr(ur_map_flags_t flags,
TRACK_SCOPE_LATENCY("ur_discrete_buffer_handle_t::mapHostPtr");
// TODO: use async alloc?

void *ptr = mapToPtr;
void *ptr = mapToPtr.get();
if (!ptr) {
UR_CALL_THROWS(hContext->getDefaultUSMPool()->allocate(
hContext, nullptr, nullptr, UR_USM_TYPE_HOST, size, &ptr));
}

usm_unique_ptr_t mappedPtr =
usm_unique_ptr_t(ptr, [ownsAlloc = bool(mapToPtr), this](void *p) {
usm_unique_ptr_t(ptr, [ownsAlloc = !bool(mapToPtr), this](void *p) {
if (ownsAlloc) {
auto ret = hContext->getDefaultUSMPool()->free(p);
if (ret != UR_RESULT_SUCCESS) {
UR_LOG(ERR, "Failed to mapped memory: {}", ret);
UR_LOG(ERR, "Failed to free mapped memory: {}", ret);
}
}
});
Expand Down Expand Up @@ -541,16 +553,16 @@ ur_result_t urMemBufferCreate(ur_context_handle_t hContext,
// ignore the flag for now.
}

if (flags & UR_MEM_FLAG_USE_HOST_POINTER) {
// To speed up copies, we always import the host ptr to USM memory
}

void *hostPtr = pProperties ? pProperties->pHost : nullptr;
auto accessMode = ur_mem_buffer_t::getDeviceAccessMode(flags);

if (useHostBuffer(hContext)) {
auto hostPtrAction =
flags & UR_MEM_FLAG_USE_HOST_POINTER
? ur_integrated_buffer_handle_t::host_ptr_action_t::import
: ur_integrated_buffer_handle_t::host_ptr_action_t::copy;
*phBuffer = ur_mem_handle_t_::create<ur_integrated_buffer_handle_t>(
hContext, hostPtr, size, hostPtrAction, accessMode);
hContext, hostPtr, size, accessMode);
} else {
*phBuffer = ur_mem_handle_t_::create<ur_discrete_buffer_handle_t>(
hContext, hostPtr, size, accessMode);
Expand Down
19 changes: 8 additions & 11 deletions source/adapters/level_zero/v2/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ struct ur_mem_buffer_t : ur_object {
enum class device_access_mode_t { read_write, read_only, write_only };

ur_mem_buffer_t(ur_context_handle_t hContext, size_t size,
device_access_mode_t accesMode);
device_access_mode_t accessMode);
virtual ~ur_mem_buffer_t() = default;

virtual ur_shared_mutex &getMutex();
Expand Down Expand Up @@ -90,14 +90,11 @@ struct ur_usm_handle_t : ur_mem_buffer_t {
// For integrated devices the buffer has been allocated in host memory
// and can be accessed by the device without copying.
struct ur_integrated_buffer_handle_t : ur_mem_buffer_t {
enum class host_ptr_action_t { import, copy };

ur_integrated_buffer_handle_t(ur_context_handle_t hContext, void *hostPtr,
size_t size, host_ptr_action_t useHostPtr,
device_access_mode_t accesMode);
size_t size, device_access_mode_t accessMode);

ur_integrated_buffer_handle_t(ur_context_handle_t hContext, void *hostPtr,
size_t size, device_access_mode_t accesMode,
size_t size, device_access_mode_t accessMode,
bool ownHostPtr);

~ur_integrated_buffer_handle_t();
Expand Down Expand Up @@ -134,13 +131,13 @@ struct ur_discrete_buffer_handle_t : ur_mem_buffer_t {
// first device in the context. Otherwise, the buffer is allocated on
// firt getDevicePtr call.
ur_discrete_buffer_handle_t(ur_context_handle_t hContext, void *hostPtr,
size_t size, device_access_mode_t accesMode);
size_t size, device_access_mode_t accessMode);
~ur_discrete_buffer_handle_t();

// Create buffer on top of existing device memory.
ur_discrete_buffer_handle_t(ur_context_handle_t hContext,
ur_device_handle_t hDevice, void *devicePtr,
size_t size, device_access_mode_t accesMode,
size_t size, device_access_mode_t accessMode,
void *writeBackMemory, bool ownDevicePtr);

void *getDevicePtr(ur_device_handle_t, device_access_mode_t, size_t offset,
Expand All @@ -166,7 +163,7 @@ struct ur_discrete_buffer_handle_t : ur_mem_buffer_t {
void *writeBackPtr = nullptr;

// If not null, mapHostPtr should map memory to this ptr
void *mapToPtr = nullptr;
usm_unique_ptr_t mapToPtr;

std::vector<host_allocation_desc_t> hostAllocations;

Expand All @@ -178,7 +175,7 @@ struct ur_discrete_buffer_handle_t : ur_mem_buffer_t {

struct ur_shared_buffer_handle_t : ur_mem_buffer_t {
ur_shared_buffer_handle_t(ur_context_handle_t hContext, void *devicePtr,
size_t size, device_access_mode_t accesMode,
size_t size, device_access_mode_t accessMode,
bool ownDevicePtr);

void *getDevicePtr(ur_device_handle_t, device_access_mode_t, size_t offset,
Expand All @@ -196,7 +193,7 @@ struct ur_shared_buffer_handle_t : ur_mem_buffer_t {

struct ur_mem_sub_buffer_t : ur_mem_buffer_t {
ur_mem_sub_buffer_t(ur_mem_handle_t hParent, size_t offset, size_t size,
device_access_mode_t accesMode);
device_access_mode_t accessMode);
~ur_mem_sub_buffer_t();

void *getDevicePtr(ur_device_handle_t, device_access_mode_t, size_t offset,
Expand Down
Loading