diff --git a/.github/intel-llvm-mirror-base-commit b/.github/intel-llvm-mirror-base-commit
index 7090223e16..948fb08ab2 100644
--- a/.github/intel-llvm-mirror-base-commit
+++ b/.github/intel-llvm-mirror-base-commit
@@ -1 +1 @@
-004f38eaec3db5b5c72fabd1e7f5b82a405eecff
+25323c85d7091f92bea2c057202612ff941a36d2
diff --git a/include/ur_api.h b/include/ur_api.h
index 577bb4d5b2..8baf407095 100644
--- a/include/ur_api.h
+++ b/include/ur_api.h
@@ -4993,6 +4993,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
     /// device is null then the granularity is suitable for all devices in
     /// context.
     ur_device_handle_t hDevice,
+    /// [in] allocation size in bytes for which the alignment is being
+    /// queried.
+    size_t allocationSize,
     /// [in] type of the info to query.
     ur_virtual_mem_granularity_info_t propName,
     /// [in] size in bytes of the memory pointed to by pPropValue.
@@ -15324,6 +15327,7 @@ typedef struct ur_loader_init_params_t {
 typedef struct ur_virtual_mem_granularity_get_info_params_t {
   ur_context_handle_t *phContext;
   ur_device_handle_t *phDevice;
+  size_t *pallocationSize;
   ur_virtual_mem_granularity_info_t *ppropName;
   size_t *ppropSize;
   void **ppPropValue;
diff --git a/include/ur_ddi.h b/include/ur_ddi.h
index cb944b6c39..5f58d4c560 100644
--- a/include/ur_ddi.h
+++ b/include/ur_ddi.h
@@ -1834,8 +1834,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetUsmP2PExpProcAddrTable_t)(
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Function-pointer for urVirtualMemGranularityGetInfo
 typedef ur_result_t(UR_APICALL *ur_pfnVirtualMemGranularityGetInfo_t)(
-    ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t,
-    size_t, void *, size_t *);
+    ur_context_handle_t, ur_device_handle_t, size_t,
+    ur_virtual_mem_granularity_info_t, size_t, void *, size_t *);
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Function-pointer for urVirtualMemReserve
diff --git a/include/ur_print.hpp b/include/ur_print.hpp
index 7fc43237a2..c7dc701db3 100644
--- a/include/ur_print.hpp
+++ b/include/ur_print.hpp
@@ -20319,6 +20319,11 @@ inline std::ostream &operator<<(
 
   ur::details::printPtr(os, *(params->phDevice));
 
+  os << ", ";
+  os << ".allocationSize = ";
+
+  os << *(params->pallocationSize);
+
   os << ", ";
   os << ".propName = ";
 
diff --git a/scripts/core/virtual_memory.yml b/scripts/core/virtual_memory.yml
index 61fca47d1b..ec34ca4895 100644
--- a/scripts/core/virtual_memory.yml
+++ b/scripts/core/virtual_memory.yml
@@ -41,6 +41,9 @@ params:
           [in][optional] is the device to get the granularity from, if the
           device is null then the granularity is suitable for all devices in
           context.
+    - type: size_t
+      name: allocationSize
+      desc: "[in] allocation size in bytes for which the alignment is being queried."
     - type: $x_virtual_mem_granularity_info_t
       name: propName
       desc: "[in] type of the info to query."
diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp
index 3078605d42..19082b8947 100644
--- a/source/adapters/cuda/command_buffer.cpp
+++ b/source/adapters/cuda/command_buffer.cpp
@@ -1347,14 +1347,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
     UR_CHECK_ERROR(validateCommandDesc(hCommandBuffer, pUpdateKernelLaunch[i]));
   }
 
-  // Store changes in config struct in command handle object
+  // Store changes in config struct in command handle object and propagate
+  // changes to CUDA graph
   for (uint32_t i = 0; i < numKernelUpdates; i++) {
     UR_CHECK_ERROR(updateCommand(pUpdateKernelLaunch[i]));
     UR_CHECK_ERROR(updateKernelArguments(pUpdateKernelLaunch[i]));
-  }
 
-  // Propagate changes to CUDA driver API
-  for (uint32_t i = 0; i < numKernelUpdates; i++) {
     const auto &UpdateCommandDesc = pUpdateKernelLaunch[i];
 
     // If no work-size is provided make sure we pass nullptr to setKernelParams
diff --git a/source/adapters/cuda/virtual_mem.cpp b/source/adapters/cuda/virtual_mem.cpp
index 29908ad1d4..38f70e031d 100644
--- a/source/adapters/cuda/virtual_mem.cpp
+++ b/source/adapters/cuda/virtual_mem.cpp
@@ -18,6 +18,7 @@
 
 UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
     ur_context_handle_t, ur_device_handle_t hDevice,
+    [[maybe_unused]] size_t allocationSize,
     ur_virtual_mem_granularity_info_t propName, size_t propSize,
     void *pPropValue, size_t *pPropSizeRet) {
   UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp
index c8aac5b772..abac82900f 100644
--- a/source/adapters/hip/command_buffer.cpp
+++ b/source/adapters/hip/command_buffer.cpp
@@ -984,14 +984,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
     UR_CHECK_ERROR(validateCommandDesc(hCommandBuffer, pUpdateKernelLaunch[i]));
   }
 
-  // Store changes in config struct in command handle object
+  // Store changes in config struct in command handle object and propagate
+  // changes to HIP Graph.
   for (uint32_t i = 0; i < numKernelUpdates; i++) {
     UR_CHECK_ERROR(updateCommand(pUpdateKernelLaunch[i]));
     UR_CHECK_ERROR(updateKernelArguments(pUpdateKernelLaunch[i]));
-  }
 
-  // Propagate changes to HIP driver API
-  for (uint32_t i = 0; i < numKernelUpdates; i++) {
     const auto &UpdateCommandDesc = pUpdateKernelLaunch[i];
 
     // If no worksize is provided make sure we pass nullptr to setKernelParams
diff --git a/source/adapters/hip/virtual_mem.cpp b/source/adapters/hip/virtual_mem.cpp
index 12cf9f838e..1effbbfa06 100644
--- a/source/adapters/hip/virtual_mem.cpp
+++ b/source/adapters/hip/virtual_mem.cpp
@@ -14,8 +14,8 @@
 #include "physical_mem.hpp"
 
 UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
-    ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t,
-    size_t, void *, size_t *) {
+    ur_context_handle_t, ur_device_handle_t, size_t,
+    ur_virtual_mem_granularity_info_t, size_t, void *, size_t *) {
   return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
 }
 
diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp
index e338a4c439..388af44695 100644
--- a/source/adapters/level_zero/adapter.cpp
+++ b/source/adapters/level_zero/adapter.cpp
@@ -21,6 +21,8 @@
 #include <umf.h>
 #endif
 
+ZeUSMImportExtension ZeUSMImport;
+
 // Due to multiple DLLMain definitions with SYCL, Global Adapter is init at
 // variable creation.
 #if defined(_WIN32)
diff --git a/source/adapters/level_zero/common.cpp b/source/adapters/level_zero/common.cpp
index 8ed6d7e579..0433a2d52d 100644
--- a/source/adapters/level_zero/common.cpp
+++ b/source/adapters/level_zero/common.cpp
@@ -84,8 +84,6 @@ bool setEnvVar(const char *name, const char *value) {
   return true;
 }
 
-ZeUSMImportExtension ZeUSMImport;
-
 void zeParseError(ze_result_t ZeError, const char *&ErrorString) {
   switch (ZeError) {
 #define ZE_ERRCASE(ERR)                                                        \
diff --git a/source/adapters/level_zero/ur_interface_loader.hpp b/source/adapters/level_zero/ur_interface_loader.hpp
index 5e9fad25cb..bbbe1fce96 100644
--- a/source/adapters/level_zero/ur_interface_loader.hpp
+++ b/source/adapters/level_zero/ur_interface_loader.hpp
@@ -165,8 +165,8 @@ ur_result_t urUSMPoolGetInfo(ur_usm_pool_handle_t hPool,
                              void *pPropValue, size_t *pPropSizeRet);
 ur_result_t urVirtualMemGranularityGetInfo(
     ur_context_handle_t hContext, ur_device_handle_t hDevice,
-    ur_virtual_mem_granularity_info_t propName, size_t propSize,
-    void *pPropValue, size_t *pPropSizeRet);
+    size_t allocationSize, ur_virtual_mem_granularity_info_t propName,
+    size_t propSize, void *pPropValue, size_t *pPropSizeRet);
 ur_result_t urVirtualMemReserve(ur_context_handle_t hContext,
                                 const void *pStart, size_t size,
                                 void **ppStart);
diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp
index 41024be998..ca2b462067 100644
--- a/source/adapters/level_zero/usm.cpp
+++ b/source/adapters/level_zero/usm.cpp
@@ -607,8 +607,26 @@ ur_result_t UR_APICALL urUSMPoolDestroyExp(ur_context_handle_t /*Context*/,
   return UR_RESULT_SUCCESS;
 }
 
-ur_result_t UR_APICALL urUSMPoolSetInfoExp(ur_usm_pool_handle_t,
-                                           ur_usm_pool_info_t, void *, size_t) {
+ur_result_t UR_APICALL urUSMPoolSetInfoExp(ur_usm_pool_handle_t /*Pool*/,
+                                           ur_usm_pool_info_t PropName,
+                                           void * /*PropValue*/,
+                                           size_t PropSize) {
+  if (PropSize < sizeof(size_t)) {
+    return UR_RESULT_ERROR_INVALID_SIZE;
+  }
+
+  switch (PropName) {
+  // TODO: Support for pool release threshold and maximum size hints.
+  case UR_USM_POOL_INFO_RELEASE_THRESHOLD_EXP:
+  case UR_USM_POOL_INFO_MAXIMUM_SIZE_EXP:
+  // TODO: Allow user to overwrite pool peak statistics.
+  case UR_USM_POOL_INFO_RESERVED_HIGH_EXP:
+  case UR_USM_POOL_INFO_USED_HIGH_EXP:
+    break;
+  default:
+    return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
+  }
+
   return UR_RESULT_SUCCESS;
 }
 
diff --git a/source/adapters/level_zero/v2/api.cpp b/source/adapters/level_zero/v2/api.cpp
index 4d43e249a6..7835b3e93d 100644
--- a/source/adapters/level_zero/v2/api.cpp
+++ b/source/adapters/level_zero/v2/api.cpp
@@ -41,28 +41,6 @@ ur_result_t urEventSetCallback(ur_event_handle_t hEvent,
   return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
 }
 
-ur_result_t UR_APICALL urUSMPoolCreateExp(ur_context_handle_t hContext,
-                                          ur_device_handle_t hDevice,
-                                          ur_usm_pool_desc_t *PoolDesc,
-                                          ur_usm_pool_handle_t *pPool) {
-  UR_LOG(ERR, "{} function not implemented!", __FUNCTION__);
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-ur_result_t UR_APICALL urUSMPoolDestroyExp(ur_context_handle_t hContext,
-                                           ur_device_handle_t hDevice,
-                                           ur_usm_pool_handle_t hPool) {
-  UR_LOG(ERR, "{} function not implemented!", __FUNCTION__);
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-ur_result_t UR_APICALL urUSMPoolSetInfoExp(ur_usm_pool_handle_t hPool,
-                                           ur_usm_pool_info_t propName,
-                                           void *pPropValue, size_t propSize) {
-  UR_LOG(ERR, "{} function not implemented!", __FUNCTION__);
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
 ur_result_t UR_APICALL urUSMPoolGetDevicePoolExp(ur_context_handle_t hContext,
                                                  ur_device_handle_t hDevice,
                                                  ur_usm_pool_handle_t *pPool) {
diff --git a/source/adapters/level_zero/v2/command_buffer.cpp b/source/adapters/level_zero/v2/command_buffer.cpp
index 92118587d4..b4c2674bd3 100644
--- a/source/adapters/level_zero/v2/command_buffer.cpp
+++ b/source/adapters/level_zero/v2/command_buffer.cpp
@@ -166,7 +166,7 @@ ur_result_t ur_exp_command_buffer_handle_t_::registerExecutionEventUnlocked(
   return UR_RESULT_SUCCESS;
 }
 
-ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
+ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() try {
   UR_CALL_NOCHECK(commandListManager.lock()->releaseSubmittedKernels());
 
   if (currentExecution) {
@@ -175,6 +175,9 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
   for (auto &event : syncPoints) {
     event->release();
   }
+} catch (...) {
+  UR_LOG(DEBUG, "ur_exp_command_buffer_handle_t_ destructor failed with: {}",
+         exceptionToResult(std::current_exception()));
 }
 
 ur_result_t ur_exp_command_buffer_handle_t_::applyUpdateCommands(
diff --git a/source/adapters/level_zero/v2/memory.cpp b/source/adapters/level_zero/v2/memory.cpp
index 9c39a97d16..1b6855e630 100644
--- a/source/adapters/level_zero/v2/memory.cpp
+++ b/source/adapters/level_zero/v2/memory.cpp
@@ -55,14 +55,11 @@ void ur_usm_handle_t::unmapHostPtr(void * /*pMappedPtr*/,
 
 ur_integrated_buffer_handle_t::ur_integrated_buffer_handle_t(
     ur_context_handle_t hContext, void *hostPtr, size_t size,
-    host_ptr_action_t hostPtrAction, device_access_mode_t accessMode)
+    device_access_mode_t accessMode)
     : ur_mem_buffer_t(hContext, size, accessMode) {
-  bool hostPtrImported = false;
-  if (hostPtrAction == host_ptr_action_t::import) {
-    hostPtrImported =
-        maybeImportUSM(hContext->getPlatform()->ZeDriverHandleExpTranslated,
-                       hContext->getZeHandle(), hostPtr, size);
-  }
+  bool hostPtrImported =
+      maybeImportUSM(hContext->getPlatform()->ZeDriverHandleExpTranslated,
+                     hContext->getZeHandle(), hostPtr, size);
 
   if (hostPtrImported) {
     this->ptr = usm_unique_ptr_t(hostPtr, [hContext](void *ptr) {
@@ -201,8 +198,23 @@ ur_discrete_buffer_handle_t::ur_discrete_buffer_handle_t(
     device_access_mode_t accessMode)
     : ur_mem_buffer_t(hContext, size, accessMode),
       deviceAllocations(hContext->getPlatform()->getNumDevices()),
-      activeAllocationDevice(nullptr), mapToPtr(hostPtr), hostAllocations() {
+      activeAllocationDevice(nullptr), mapToPtr(nullptr, nullptr),
+      hostAllocations() {
   if (hostPtr) {
+    // Try importing the pointer to speed up memory copies for map/unmap
+    bool hostPtrImported =
+        maybeImportUSM(hContext->getPlatform()->ZeDriverHandleExpTranslated,
+                       hContext->getZeHandle(), hostPtr, size);
+
+    if (hostPtrImported) {
+      mapToPtr = usm_unique_ptr_t(hostPtr, [hContext](void *ptr) {
+        ZeUSMImport.doZeUSMRelease(
+            hContext->getPlatform()->ZeDriverHandleExpTranslated, ptr);
+      });
+    } else {
+      mapToPtr = usm_unique_ptr_t(hostPtr, [](void *) {});
+    }
+
     auto initialDevice = hContext->getDevices()[0];
     UR_CALL_THROWS(migrateBufferTo(initialDevice, hostPtr, size));
   }
@@ -305,18 +317,18 @@ void *ur_discrete_buffer_handle_t::mapHostPtr(ur_map_flags_t flags,
   TRACK_SCOPE_LATENCY("ur_discrete_buffer_handle_t::mapHostPtr");
   // TODO: use async alloc?
 
-  void *ptr = mapToPtr;
+  void *ptr = mapToPtr.get();
   if (!ptr) {
     UR_CALL_THROWS(hContext->getDefaultUSMPool()->allocate(
         hContext, nullptr, nullptr, UR_USM_TYPE_HOST, size, &ptr));
   }
 
   usm_unique_ptr_t mappedPtr =
-      usm_unique_ptr_t(ptr, [ownsAlloc = bool(mapToPtr), this](void *p) {
+      usm_unique_ptr_t(ptr, [ownsAlloc = !bool(mapToPtr), this](void *p) {
         if (ownsAlloc) {
           auto ret = hContext->getDefaultUSMPool()->free(p);
           if (ret != UR_RESULT_SUCCESS) {
-            UR_LOG(ERR, "Failed to mapped memory: {}", ret);
+            UR_LOG(ERR, "Failed to free mapped memory: {}", ret);
           }
         }
       });
@@ -541,16 +553,16 @@ ur_result_t urMemBufferCreate(ur_context_handle_t hContext,
     // ignore the flag for now.
   }
 
+  if (flags & UR_MEM_FLAG_USE_HOST_POINTER) {
+    // To speed up copies, we always import the host ptr to USM memory
+  }
+
   void *hostPtr = pProperties ? pProperties->pHost : nullptr;
   auto accessMode = ur_mem_buffer_t::getDeviceAccessMode(flags);
 
   if (useHostBuffer(hContext)) {
-    auto hostPtrAction =
-        flags & UR_MEM_FLAG_USE_HOST_POINTER
-            ? ur_integrated_buffer_handle_t::host_ptr_action_t::import
-            : ur_integrated_buffer_handle_t::host_ptr_action_t::copy;
     *phBuffer = ur_mem_handle_t_::create<ur_integrated_buffer_handle_t>(
-        hContext, hostPtr, size, hostPtrAction, accessMode);
+        hContext, hostPtr, size, accessMode);
   } else {
     *phBuffer = ur_mem_handle_t_::create<ur_discrete_buffer_handle_t>(
         hContext, hostPtr, size, accessMode);
diff --git a/source/adapters/level_zero/v2/memory.hpp b/source/adapters/level_zero/v2/memory.hpp
index 7201df57c9..61b0a00f40 100644
--- a/source/adapters/level_zero/v2/memory.hpp
+++ b/source/adapters/level_zero/v2/memory.hpp
@@ -28,7 +28,7 @@ struct ur_mem_buffer_t : ur_object {
   enum class device_access_mode_t { read_write, read_only, write_only };
 
   ur_mem_buffer_t(ur_context_handle_t hContext, size_t size,
-                  device_access_mode_t accesMode);
+                  device_access_mode_t accessMode);
   virtual ~ur_mem_buffer_t() = default;
 
   virtual ur_shared_mutex &getMutex();
@@ -90,14 +90,11 @@ struct ur_usm_handle_t : ur_mem_buffer_t {
 // For integrated devices the buffer has been allocated in host memory
 // and can be accessed by the device without copying.
 struct ur_integrated_buffer_handle_t : ur_mem_buffer_t {
-  enum class host_ptr_action_t { import, copy };
-
   ur_integrated_buffer_handle_t(ur_context_handle_t hContext, void *hostPtr,
-                                size_t size, host_ptr_action_t useHostPtr,
-                                device_access_mode_t accesMode);
+                                size_t size, device_access_mode_t accessMode);
 
   ur_integrated_buffer_handle_t(ur_context_handle_t hContext, void *hostPtr,
-                                size_t size, device_access_mode_t accesMode,
+                                size_t size, device_access_mode_t accessMode,
                                 bool ownHostPtr);
 
   ~ur_integrated_buffer_handle_t();
@@ -134,13 +131,13 @@ struct ur_discrete_buffer_handle_t : ur_mem_buffer_t {
   // first device in the context. Otherwise, the buffer is allocated on
   // firt getDevicePtr call.
   ur_discrete_buffer_handle_t(ur_context_handle_t hContext, void *hostPtr,
-                              size_t size, device_access_mode_t accesMode);
+                              size_t size, device_access_mode_t accessMode);
   ~ur_discrete_buffer_handle_t();
 
   // Create buffer on top of existing device memory.
   ur_discrete_buffer_handle_t(ur_context_handle_t hContext,
                               ur_device_handle_t hDevice, void *devicePtr,
-                              size_t size, device_access_mode_t accesMode,
+                              size_t size, device_access_mode_t accessMode,
                               void *writeBackMemory, bool ownDevicePtr);
 
   void *getDevicePtr(ur_device_handle_t, device_access_mode_t, size_t offset,
@@ -166,7 +163,7 @@ struct ur_discrete_buffer_handle_t : ur_mem_buffer_t {
   void *writeBackPtr = nullptr;
 
   // If not null, mapHostPtr should map memory to this ptr
-  void *mapToPtr = nullptr;
+  usm_unique_ptr_t mapToPtr;
 
   std::vector<host_allocation_desc_t> hostAllocations;
 
@@ -178,7 +175,7 @@ struct ur_discrete_buffer_handle_t : ur_mem_buffer_t {
 
 struct ur_shared_buffer_handle_t : ur_mem_buffer_t {
   ur_shared_buffer_handle_t(ur_context_handle_t hContext, void *devicePtr,
-                            size_t size, device_access_mode_t accesMode,
+                            size_t size, device_access_mode_t accessMode,
                             bool ownDevicePtr);
 
   void *getDevicePtr(ur_device_handle_t, device_access_mode_t, size_t offset,
@@ -196,7 +193,7 @@ struct ur_shared_buffer_handle_t : ur_mem_buffer_t {
 
 struct ur_mem_sub_buffer_t : ur_mem_buffer_t {
   ur_mem_sub_buffer_t(ur_mem_handle_t hParent, size_t offset, size_t size,
-                      device_access_mode_t accesMode);
+                      device_access_mode_t accessMode);
   ~ur_mem_sub_buffer_t();
 
   void *getDevicePtr(ur_device_handle_t, device_access_mode_t, size_t offset,
diff --git a/source/adapters/level_zero/v2/usm.cpp b/source/adapters/level_zero/v2/usm.cpp
index 216e0b0e25..0d49a8ad0a 100644
--- a/source/adapters/level_zero/v2/usm.cpp
+++ b/source/adapters/level_zero/v2/usm.cpp
@@ -188,6 +188,68 @@ ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t hContext,
   }
 }
 
+ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t hContext,
+                                             ur_device_handle_t hDevice,
+                                             ur_usm_pool_desc_t *pPoolDesc)
+    : hContext(hContext) {
+  // TODO: handle UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK from pPoolDesc
+  auto disjointPoolConfigs = initializeDisjointPoolConfig();
+
+  if (disjointPoolConfigs.has_value()) {
+    if (auto limits = find_stype_node<ur_usm_pool_limits_desc_t>(pPoolDesc)) {
+      for (auto &config : disjointPoolConfigs.value().Configs) {
+        config.MaxPoolableSize = limits->maxPoolableSize;
+        config.SlabMinSize = limits->minDriverAllocSize;
+      }
+    }
+  } else {
+    // If pooling is disabled, do nothing.
+    UR_LOG(INFO, "USM pooling is disabled. Skiping pool limits adjustment.");
+  }
+
+  // Create pool descriptor for single device provided
+  std::vector<usm::pool_descriptor> descriptors;
+  {
+    auto &desc = descriptors.emplace_back();
+    desc.poolHandle = this;
+    desc.hContext = hContext;
+    desc.hDevice = hDevice;
+    desc.type = UR_USM_TYPE_DEVICE;
+  }
+  {
+    auto &desc = descriptors.emplace_back();
+    desc.poolHandle = this;
+    desc.hContext = hContext;
+    desc.hDevice = hDevice;
+    desc.type = UR_USM_TYPE_SHARED;
+    desc.deviceReadOnly = false;
+  }
+  {
+    auto &desc = descriptors.emplace_back();
+    desc.poolHandle = this;
+    desc.hContext = hContext;
+    desc.hDevice = hDevice;
+    desc.type = UR_USM_TYPE_SHARED;
+    desc.deviceReadOnly = true;
+  }
+
+  for (auto &desc : descriptors) {
+    std::unique_ptr<UsmPool> usmPool;
+    if (disjointPoolConfigs.has_value()) {
+      auto &poolConfig =
+          disjointPoolConfigs.value().Configs[descToDisjoinPoolMemType(desc)];
+      auto pool = usm::makeDisjointPool(makeProvider(desc), poolConfig);
+      usmPool = std::make_unique<UsmPool>(this, std::move(pool));
+    } else {
+      auto pool = usm::makeProxyPool(makeProvider(desc));
+      usmPool = std::make_unique<UsmPool>(this, std::move(pool));
+    }
+    UMF_CALL_THROWS(
+        umfPoolSetTag(usmPool->umfPool.get(), usmPool.get(), nullptr));
+    poolManager.addPool(desc, std::move(usmPool));
+  }
+}
+
 ur_context_handle_t ur_usm_pool_handle_t_::getContextHandle() const {
   return hContext;
 }
@@ -358,27 +420,27 @@ size_t ur_usm_pool_handle_t_::getTotalReservedSize() {
 }
 
 size_t ur_usm_pool_handle_t_::getPeakReservedSize() {
-  size_t totalAllocatedSize = 0;
+  size_t maxPeakSize = 0;
   umf_result_t umfRet = UMF_RESULT_SUCCESS;
   poolManager.forEachPool([&](UsmPool *p) {
     umf_memory_provider_handle_t hProvider = nullptr;
-    size_t allocatedSize = 0;
+    size_t peakSize = 0;
     umfRet = umfPoolGetMemoryProvider(p->umfPool.get(), &hProvider);
     if (umfRet != UMF_RESULT_SUCCESS) {
       return false;
     }
 
-    umfRet = umfCtlGet("umf.provider.by_handle.{}.stats.peak_memory",
-                       &allocatedSize, sizeof(allocatedSize), hProvider);
+    umfRet = umfCtlGet("umf.provider.by_handle.{}.stats.peak_memory", &peakSize,
+                       sizeof(peakSize), hProvider);
     if (umfRet != UMF_RESULT_SUCCESS) {
       return false;
     }
 
-    totalAllocatedSize += allocatedSize;
+    maxPeakSize = std::max(maxPeakSize, peakSize);
     return true;
   });
 
-  return umfRet == UMF_RESULT_SUCCESS ? totalAllocatedSize : 0;
+  return umfRet == UMF_RESULT_SUCCESS ? maxPeakSize : 0;
 }
 
 size_t ur_usm_pool_handle_t_::getTotalUsedSize() {
@@ -460,6 +522,32 @@ ur_result_t urUSMPoolGetInfo(
   return exceptionToResult(std::current_exception());
 }
 
+ur_result_t urUSMPoolCreateExp(ur_context_handle_t hContext,
+                               ur_device_handle_t hDevice,
+                               ur_usm_pool_desc_t *pPoolDesc,
+                               ur_usm_pool_handle_t *pPool) try {
+  *pPool = new ur_usm_pool_handle_t_(hContext, hDevice, pPoolDesc);
+  hContext->addUsmPool(*pPool);
+  return UR_RESULT_SUCCESS;
+} catch (umf_result_t e) {
+  return umf::umf2urResult(e);
+} catch (...) {
+  return exceptionToResult(std::current_exception());
+}
+
+ur_result_t urUSMPoolDestroyExp(ur_context_handle_t, ur_device_handle_t,
+                                ur_usm_pool_handle_t hPool) try {
+  if (hPool->RefCount.release()) {
+    hPool->getContextHandle()->removeUsmPool(hPool);
+    delete hPool;
+  }
+  return UR_RESULT_SUCCESS;
+} catch (umf_result_t e) {
+  return umf::umf2urResult(e);
+} catch (...) {
+  return exceptionToResult(std::current_exception());
+}
+
 ur_result_t urUSMPoolGetInfoExp(ur_usm_pool_handle_t hPool,
                                 ur_usm_pool_info_t propName, void *pPropValue,
                                 size_t *pPropSizeRet) {
@@ -497,6 +585,28 @@ ur_result_t urUSMPoolGetInfoExp(ur_usm_pool_handle_t hPool,
   return UR_RESULT_SUCCESS;
 }
 
+ur_result_t urUSMPoolSetInfoExp(ur_usm_pool_handle_t /*hPool*/,
+                                ur_usm_pool_info_t propName,
+                                void * /*pPropValue*/, size_t propSize) {
+  if (propSize < sizeof(size_t)) {
+    return UR_RESULT_ERROR_INVALID_SIZE;
+  }
+
+  switch (propName) {
+  // TODO: Support for pool release threshold and maximum size hints.
+  case UR_USM_POOL_INFO_RELEASE_THRESHOLD_EXP:
+  case UR_USM_POOL_INFO_MAXIMUM_SIZE_EXP:
+  // TODO: Allow user to overwrite pool peak statistics.
+  case UR_USM_POOL_INFO_RESERVED_HIGH_EXP:
+  case UR_USM_POOL_INFO_USED_HIGH_EXP:
+    break;
+  default:
+    return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
+  }
+
+  return UR_RESULT_SUCCESS;
+}
+
 ur_result_t urUSMPoolGetDefaultDevicePoolExp(ur_context_handle_t hContext,
                                              ur_device_handle_t,
                                              ur_usm_pool_handle_t *pPool) {
diff --git a/source/adapters/level_zero/v2/usm.hpp b/source/adapters/level_zero/v2/usm.hpp
index 5b498b361c..825ecb5fcd 100644
--- a/source/adapters/level_zero/v2/usm.hpp
+++ b/source/adapters/level_zero/v2/usm.hpp
@@ -55,6 +55,9 @@ struct AllocationStats {
 struct ur_usm_pool_handle_t_ : ur_object {
   ur_usm_pool_handle_t_(ur_context_handle_t hContext,
                         ur_usm_pool_desc_t *pPoolDes);
+  ur_usm_pool_handle_t_(ur_context_handle_t hContext,
+                        ur_device_handle_t hDevice,
+                        ur_usm_pool_desc_t *pPoolDes);
 
   ur_context_handle_t getContextHandle() const;
 
diff --git a/source/adapters/level_zero/virtual_mem.cpp b/source/adapters/level_zero/virtual_mem.cpp
index f61c8fd43f..0488d21023 100644
--- a/source/adapters/level_zero/virtual_mem.cpp
+++ b/source/adapters/level_zero/virtual_mem.cpp
@@ -23,8 +23,8 @@ namespace ur::level_zero {
 
 ur_result_t urVirtualMemGranularityGetInfo(
     ur_context_handle_t hContext, ur_device_handle_t hDevice,
-    ur_virtual_mem_granularity_info_t propName, size_t propSize,
-    void *pPropValue, size_t *pPropSizeRet) {
+    size_t allocationSize, ur_virtual_mem_granularity_info_t propName,
+    size_t propSize, void *pPropValue, size_t *pPropSizeRet) {
   UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
   switch (propName) {
   case UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM:
@@ -34,7 +34,8 @@ ur_result_t urVirtualMemGranularityGetInfo(
     // aligned size.
     size_t PageSize;
     ZE2UR_CALL(zeVirtualMemQueryPageSize,
-               (hContext->getZeHandle(), hDevice->ZeDevice, 1, &PageSize));
+               (hContext->getZeHandle(), hDevice->ZeDevice, allocationSize,
+                &PageSize));
     return ReturnValue(PageSize);
   }
   default:
diff --git a/source/adapters/mock/ur_mockddi.cpp b/source/adapters/mock/ur_mockddi.cpp
index fb5529f95b..3ab79444f3 100644
--- a/source/adapters/mock/ur_mockddi.cpp
+++ b/source/adapters/mock/ur_mockddi.cpp
@@ -2729,6 +2729,9 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
     /// device is null then the granularity is suitable for all devices in
     /// context.
     ur_device_handle_t hDevice,
+    /// [in] allocation size in bytes for which the alignment is being
+    /// queried.
+    size_t allocationSize,
     /// [in] type of the info to query.
     ur_virtual_mem_granularity_info_t propName,
     /// [in] size in bytes of the memory pointed to by pPropValue.
@@ -2744,7 +2747,8 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
   ur_result_t result = UR_RESULT_SUCCESS;
 
   ur_virtual_mem_granularity_get_info_params_t params = {
-      &hContext, &hDevice, &propName, &propSize, &pPropValue, &pPropSizeRet};
+      &hContext, &hDevice,    &allocationSize, &propName,
+      &propSize, &pPropValue, &pPropSizeRet};
 
   auto beforeCallback = reinterpret_cast<ur_mock_callback_t>(
       mock::getCallbacks().get_before_callback(
diff --git a/source/adapters/native_cpu/virtual_mem.cpp b/source/adapters/native_cpu/virtual_mem.cpp
index 131b480ac1..6697902564 100644
--- a/source/adapters/native_cpu/virtual_mem.cpp
+++ b/source/adapters/native_cpu/virtual_mem.cpp
@@ -13,8 +13,8 @@
 #include "physical_mem.hpp"
 
 UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
-    ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t,
-    size_t, void *, size_t *) {
+    ur_context_handle_t, ur_device_handle_t, size_t,
+    ur_virtual_mem_granularity_info_t, size_t, void *, size_t *) {
   return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
 }
 
diff --git a/source/adapters/offload/device.cpp b/source/adapters/offload/device.cpp
index ebe0405b89..76ea3e6c4f 100644
--- a/source/adapters/offload/device.cpp
+++ b/source/adapters/offload/device.cpp
@@ -67,6 +67,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
   case UR_DEVICE_INFO_PLATFORM:
     return ReturnValue(hDevice->Platform);
     break;
+  case UR_DEVICE_INFO_USM_DEVICE_SUPPORT:
+  case UR_DEVICE_INFO_USM_HOST_SUPPORT:
   case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT:
     return ReturnValue(UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS);
   case UR_DEVICE_INFO_BUILD_ON_SUBDEVICE:
@@ -76,6 +78,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
     return ReturnValue(uint32_t{1});
   case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS:
     return ReturnValue(uint32_t{3});
+  case UR_DEVICE_INFO_COMPILER_AVAILABLE:
+    return ReturnValue(true);
+  case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL:
+    // TODO: Implement subgroups in Offload
+    return ReturnValue(1);
   // Unimplemented features
   case UR_DEVICE_INFO_PROGRAM_SET_SPECIALIZATION_CONSTANTS:
   case UR_DEVICE_INFO_GLOBAL_VARIABLE_SUPPORT:
@@ -83,12 +90,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
   case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP:
   case UR_DEVICE_INFO_IMAGE_SUPPORT:
   case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT:
+  case UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT:
+  // TODO: Atomic queries in Offload
+  case UR_DEVICE_INFO_ATOMIC_64:
+  case UR_DEVICE_INFO_IMAGE_SRGB:
+  case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY:
+  case UR_DEVICE_INFO_LINKER_AVAILABLE:
     return ReturnValue(false);
   case UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT:
-  case UR_DEVICE_INFO_USM_DEVICE_SUPPORT:
-  case UR_DEVICE_INFO_USM_HOST_SUPPORT:
   case UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT:
     return ReturnValue(uint32_t{0});
+  case UR_DEVICE_INFO_QUEUE_PROPERTIES:
+  case UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES:
+    return ReturnValue(0);
+  case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: {
+    if (pPropSizeRet) {
+      *pPropSizeRet = 0;
+    }
+    return UR_RESULT_SUCCESS;
+  }
   default:
     return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
   }
diff --git a/source/adapters/offload/program.cpp b/source/adapters/offload/program.cpp
index cf497c571f..e889f59ef8 100644
--- a/source/adapters/offload/program.cpp
+++ b/source/adapters/offload/program.cpp
@@ -125,6 +125,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary(
   return UR_RESULT_SUCCESS;
 }
 
+UR_APIEXPORT ur_result_t UR_APICALL
+urProgramCreateWithIL(ur_context_handle_t hContext, const void *pIL,
+                      size_t length, const ur_program_properties_t *pProperties,
+                      ur_program_handle_t *phProgram) {
+  // Liboffload consumes both IR and binaries through the same entrypoint
+  return urProgramCreateWithBinary(hContext, 1, &hContext->Device, &length,
+                                   reinterpret_cast<const uint8_t **>(&pIL),
+                                   pProperties, phProgram);
+}
+
 UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(ur_context_handle_t,
                                                    ur_program_handle_t,
                                                    const char *) {
@@ -147,12 +157,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile(ur_context_handle_t,
   return UR_RESULT_SUCCESS;
 }
 
-UR_APIEXPORT ur_result_t UR_APICALL
-urProgramCreateWithIL(ur_context_handle_t, const void *, size_t,
-                      const ur_program_properties_t *, ur_program_handle_t *) {
-  return UR_RESULT_ERROR_COMPILER_NOT_AVAILABLE;
-}
-
 UR_APIEXPORT ur_result_t UR_APICALL
 urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName,
                  size_t propSize, void *pPropValue, size_t *pPropSizeRet) {
diff --git a/source/adapters/opencl/virtual_mem.cpp b/source/adapters/opencl/virtual_mem.cpp
index 7c411d9b7b..c7db068eca 100644
--- a/source/adapters/opencl/virtual_mem.cpp
+++ b/source/adapters/opencl/virtual_mem.cpp
@@ -13,8 +13,8 @@
 #include "physical_mem.hpp"
 
 UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
-    ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t,
-    size_t, void *, size_t *) {
+    ur_context_handle_t, ur_device_handle_t, size_t,
+    ur_virtual_mem_granularity_info_t, size_t, void *, size_t *) {
   return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
 }
 
diff --git a/source/common/cuda-hip/stream_queue.hpp b/source/common/cuda-hip/stream_queue.hpp
index 2547070fa9..be52421437 100644
--- a/source/common/cuda-hip/stream_queue.hpp
+++ b/source/common/cuda-hip/stream_queue.hpp
@@ -30,47 +30,90 @@ struct stream_queue_t {
   static constexpr int DefaultNumComputeStreams = CS;
   static constexpr int DefaultNumTransferStreams = TS;
 
+  // Mutex to guard modifications to the ComputeStreams vector, and
+  // NumComputeStreams.
+  std::mutex ComputeStreamMutex;
   std::vector<native_type> ComputeStreams;
+  // Number of compute streams that have been created
+  unsigned int NumComputeStreams{0};
+
+  // Mutex to guard modifications to the TransferStreams vector, and
+  // NumTransferStreams.
+  std::mutex TransferStreamMutex;
   std::vector<native_type> TransferStreams;
+  // Number of transfer streams that have been created
+  unsigned int NumTransferStreams{0};
+
+  // The stream indices are incremented every time we return a stream. This
+  // means that they encode both the index of the next stream in the round
+  // robin, as well as which iteration of the round robin we're on. Dividing
+  // the stream index by the size of the associated stream vector will give the
+  // number of round robins we've done as quotient, and the index of the next
+  // stream to use as remainder.
+  std::atomic_uint32_t ComputeStreamIndex{0};
+  std::atomic_uint32_t TransferStreamIndex{0};
+
+  // The LastSync indices keep track of the index based on ComputeStreamIndex
+  // or TransferStreamIndex of the last stream that was synchronized during a
+  // syncStreams operation.
+  unsigned int LastSyncComputeStreams{0};
+  unsigned int LastSyncTransferStreams{0};
+
   // Stream used for recording EvQueue, which holds information about when the
   // command in question is enqueued on host, as opposed to started. It is
   // created only if profiling is enabled - either for queue or per event.
   native_type HostSubmitTimeStream{0};
+  // Flag to keep track of the creation og HostSubmitTimeStream, it is created
+  // either in the queue constructor when profiling is enabled or whenever it
+  // is requested for the first time through timestamp entry points.
   std::once_flag HostSubmitTimeStreamFlag;
-  // delay_compute_ keeps track of which streams have been recently reused and
+
+  // DelayCompute keeps track of which streams have been recently reused and
   // their next use should be delayed. If a stream has been recently reused it
   // will be skipped the next time it would be selected round-robin style. When
   // skipped, its delay flag is cleared.
   std::vector<bool> DelayCompute;
-  // keep track of which streams have applied barrier
+
+  // ComputeStreamSyncMutex is used to guard compute streams when they are
+  // being re-used.
+  //
+  // When ComputeStreamSyncMutex and ComputeStreamMutex both need to be
+  // locked at the same time, ComputeStreamSyncMutex should be locked first
+  // to avoid deadlocks.
+  std::mutex ComputeStreamSyncMutex;
+
+  // Guards barrier insertion in urEnqueueEventsWaitWithBarrier.
+  std::mutex BarrierMutex;
+  BarrierEventT BarrierEvent = nullptr;
+  BarrierEventT BarrierTmpEvent = nullptr;
+
+  // Keep track of which streams have applied barrier.
   std::vector<bool> ComputeAppliedBarrier;
   std::vector<bool> TransferAppliedBarrier;
-  ur_context_handle_t_ *Context;
-  ur_device_handle_t_ *Device;
+
+  ur_context_handle_t Context;
+  ur_device_handle_t Device;
+
+  // Reference count for the queue object.
   ur::RefCount RefCount;
+
+  // Event count used to give events an ordering used in the event class
+  // forLatestEvents.
   std::atomic_uint32_t EventCount{0};
-  std::atomic_uint32_t ComputeStreamIndex{0};
-  std::atomic_uint32_t TransferStreamIndex{0};
-  unsigned int NumComputeStreams{0};
-  unsigned int NumTransferStreams{0};
-  unsigned int LastSyncComputeStreams{0};
-  unsigned int LastSyncTransferStreams{0};
+
+  // Queue flags in the native API format as well as UR format.
   unsigned int Flags;
   ur_queue_flags_t URFlags;
+
+  // Priority of this queue, matches underlying API priority.
   int Priority;
-  // When ComputeStreamSyncMutex and ComputeStreamMutex both need to be
-  // locked at the same time, ComputeStreamSyncMutex should be locked first
-  // to avoid deadlocks
-  std::mutex ComputeStreamSyncMutex;
-  std::mutex ComputeStreamMutex;
-  std::mutex TransferStreamMutex;
-  std::mutex BarrierMutex;
+
+  // Tracks if the queue owns the underlying native streams, this may happen
+  // for queues created from interop.
   bool HasOwnership;
-  BarrierEventT BarrierEvent = nullptr;
-  BarrierEventT BarrierTmpEvent = nullptr;
 
-  stream_queue_t(bool IsOutOfOrder, ur_context_handle_t_ *Context,
-                 ur_device_handle_t_ *Device, unsigned int Flags,
+  stream_queue_t(bool IsOutOfOrder, ur_context_handle_t Context,
+                 ur_device_handle_t Device, unsigned int Flags,
                  ur_queue_flags_t URFlags, int Priority)
       : ComputeStreams(IsOutOfOrder ? DefaultNumComputeStreams : 1),
         TransferStreams(IsOutOfOrder ? DefaultNumTransferStreams : 0),
@@ -87,16 +130,16 @@ struct stream_queue_t {
     }
   }
 
-  // Create a queue from a native handle
-  stream_queue_t(native_type stream, ur_context_handle_t_ *Context,
-                 ur_device_handle_t_ *Device, unsigned int Flags,
+  // Create a queue from a native handle.
+  stream_queue_t(native_type stream, ur_context_handle_t Context,
+                 ur_device_handle_t Device, unsigned int Flags,
                  ur_queue_flags_t URFlags, bool BackendOwns)
-      : ComputeStreams(1, stream), TransferStreams(0),
+      : ComputeStreams(1, stream), NumComputeStreams{1}, TransferStreams(0),
         DelayCompute(this->ComputeStreams.size(), false),
         ComputeAppliedBarrier(this->ComputeStreams.size()),
         TransferAppliedBarrier(this->TransferStreams.size()), Context{Context},
-        Device{Device}, NumComputeStreams{1}, Flags(Flags), URFlags(URFlags),
-        Priority(0), HasOwnership{BackendOwns} {
+        Device{Device}, Flags(Flags), URFlags(URFlags), Priority(0),
+        HasOwnership{BackendOwns} {
     urContextRetain(Context);
 
     // Create timing stream if profiling is enabled.
@@ -107,6 +150,7 @@ struct stream_queue_t {
 
   ~stream_queue_t() { urContextRelease(Context); }
 
+  // Methods defined by the specific adapters.
   void computeStreamWaitForBarrierIfNeeded(native_type Strean,
                                            uint32_t StreamI);
   void transferStreamWaitForBarrierIfNeeded(native_type Stream,
@@ -206,9 +250,6 @@ struct stream_queue_t {
     return Result;
   }
 
-  native_type get() { return getNextComputeStream(); };
-  ur_device_handle_t getDevice() const noexcept { return Device; };
-
   native_type getHostSubmitTimeStream() { return HostSubmitTimeStream; }
 
   bool hasBeenSynchronized(uint32_t StreamToken) {
@@ -345,7 +386,8 @@ struct stream_queue_t {
     }
   }
 
-  ur_context_handle_t_ *getContext() const { return Context; };
+  ur_device_handle_t getDevice() const noexcept { return Device; };
+  ur_context_handle_t getContext() const noexcept { return Context; };
 
   uint32_t getNextEventId() noexcept { return ++EventCount; }
 
diff --git a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp
index 3539a2d2a5..f8f7c58bf5 100644
--- a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp
+++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp
@@ -272,10 +272,13 @@ size_t GetKernelPrivateMemorySize(ur_kernel_handle_t Kernel,
 size_t GetVirtualMemGranularity(ur_context_handle_t Context,
                                 ur_device_handle_t Device) {
   size_t Size;
+  const size_t allocationSize =
+      1; // probably we want to use actual allocation size
   [[maybe_unused]] auto Result =
       getContext()->urDdiTable.VirtualMem.pfnGranularityGetInfo(
-          Context, Device, UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED,
-          sizeof(Size), &Size, nullptr);
+          Context, Device, allocationSize,
+          UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED, sizeof(Size), &Size,
+          nullptr);
   assert(Result == UR_RESULT_SUCCESS);
   return Size;
 }
diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp
index e0d57228e4..0abbb7604c 100644
--- a/source/loader/layers/tracing/ur_trcddi.cpp
+++ b/source/loader/layers/tracing/ur_trcddi.cpp
@@ -2236,6 +2236,9 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
     /// device is null then the granularity is suitable for all devices in
     /// context.
     ur_device_handle_t hDevice,
+    /// [in] allocation size in bytes for which the alignment is being
+    /// queried.
+    size_t allocationSize,
     /// [in] type of the info to query.
     ur_virtual_mem_granularity_info_t propName,
     /// [in] size in bytes of the memory pointed to by pPropValue.
@@ -2255,7 +2258,8 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
     return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
 
   ur_virtual_mem_granularity_get_info_params_t params = {
-      &hContext, &hDevice, &propName, &propSize, &pPropValue, &pPropSizeRet};
+      &hContext, &hDevice,    &allocationSize, &propName,
+      &propSize, &pPropValue, &pPropSizeRet};
   uint64_t instance =
       getContext()->notify_begin(UR_FUNCTION_VIRTUAL_MEM_GRANULARITY_GET_INFO,
                                  "urVirtualMemGranularityGetInfo", &params);
@@ -2263,8 +2267,9 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
   auto &logger = getContext()->logger;
   UR_LOG_L(logger, INFO, "   ---> urVirtualMemGranularityGetInfo\n");
 
-  ur_result_t result = pfnGranularityGetInfo(
-      hContext, hDevice, propName, propSize, pPropValue, pPropSizeRet);
+  ur_result_t result =
+      pfnGranularityGetInfo(hContext, hDevice, allocationSize, propName,
+                            propSize, pPropValue, pPropSizeRet);
 
   getContext()->notify_end(UR_FUNCTION_VIRTUAL_MEM_GRANULARITY_GET_INFO,
                            "urVirtualMemGranularityGetInfo", &params, &result,
diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp
index 979eb3ef22..b61356afd2 100644
--- a/source/loader/layers/validation/ur_valddi.cpp
+++ b/source/loader/layers/validation/ur_valddi.cpp
@@ -2182,6 +2182,9 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
     /// device is null then the granularity is suitable for all devices in
     /// context.
     ur_device_handle_t hDevice,
+    /// [in] allocation size in bytes for which the alignment is being
+    /// queried.
+    size_t allocationSize,
     /// [in] type of the info to query.
     ur_virtual_mem_granularity_info_t propName,
     /// [in] size in bytes of the memory pointed to by pPropValue.
@@ -2228,8 +2231,9 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
     URLOG_CTX_INVALID_REFERENCE(hDevice);
   }
 
-  ur_result_t result = pfnGranularityGetInfo(
-      hContext, hDevice, propName, propSize, pPropValue, pPropSizeRet);
+  ur_result_t result =
+      pfnGranularityGetInfo(hContext, hDevice, allocationSize, propName,
+                            propSize, pPropValue, pPropSizeRet);
 
   return result;
 }
diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp
index 1cf13837bd..74712c5c4d 100644
--- a/source/loader/ur_ldrddi.cpp
+++ b/source/loader/ur_ldrddi.cpp
@@ -1238,6 +1238,9 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
     /// device is null then the granularity is suitable for all devices in
     /// context.
     ur_device_handle_t hDevice,
+    /// [in] allocation size in bytes for which the alignment is being
+    /// queried.
+    size_t allocationSize,
     /// [in] type of the info to query.
     ur_virtual_mem_granularity_info_t propName,
     /// [in] size in bytes of the memory pointed to by pPropValue.
@@ -1258,8 +1261,8 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
     return UR_RESULT_ERROR_UNINITIALIZED;
 
   // forward to device-platform
-  return pfnGranularityGetInfo(hContext, hDevice, propName, propSize,
-                               pPropValue, pPropSizeRet);
+  return pfnGranularityGetInfo(hContext, hDevice, allocationSize, propName,
+                               propSize, pPropValue, pPropSizeRet);
 }
 
 ///////////////////////////////////////////////////////////////////////////////
diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp
index 1261145424..cad6de4dd9 100644
--- a/source/loader/ur_libapi.cpp
+++ b/source/loader/ur_libapi.cpp
@@ -2725,6 +2725,9 @@ ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
     /// device is null then the granularity is suitable for all devices in
     /// context.
     ur_device_handle_t hDevice,
+    /// [in] allocation size in bytes for which the alignment is being
+    /// queried.
+    size_t allocationSize,
     /// [in] type of the info to query.
     ur_virtual_mem_granularity_info_t propName,
     /// [in] size in bytes of the memory pointed to by pPropValue.
@@ -2742,8 +2745,8 @@ ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
   if (nullptr == pfnGranularityGetInfo)
     return UR_RESULT_ERROR_UNINITIALIZED;
 
-  return pfnGranularityGetInfo(hContext, hDevice, propName, propSize,
-                               pPropValue, pPropSizeRet);
+  return pfnGranularityGetInfo(hContext, hDevice, allocationSize, propName,
+                               propSize, pPropValue, pPropSizeRet);
 } catch (...) {
   return exceptionToResult(std::current_exception());
 }
diff --git a/source/ur_api.cpp b/source/ur_api.cpp
index cc69811f57..426ca95027 100644
--- a/source/ur_api.cpp
+++ b/source/ur_api.cpp
@@ -2410,6 +2410,9 @@ ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
     /// device is null then the granularity is suitable for all devices in
     /// context.
     ur_device_handle_t hDevice,
+    /// [in] allocation size in bytes for which the alignment is being
+    /// queried.
+    size_t allocationSize,
     /// [in] type of the info to query.
     ur_virtual_mem_granularity_info_t propName,
     /// [in] size in bytes of the memory pointed to by pPropValue.
diff --git a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp
index 327728bb5a..fa3eb3f4b5 100644
--- a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp
+++ b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp
@@ -491,11 +491,11 @@ struct urEnqueueKernelLaunchWithVirtualMemory : uur::urKernelExecutionTest {
       GTEST_SKIP() << "Virtual memory is not supported.";
     }
 
+    alloc_size = 1024;
     ASSERT_SUCCESS(urVirtualMemGranularityGetInfo(
-        context, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM,
+        context, device, alloc_size, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM,
         sizeof(granularity), &granularity, nullptr));
 
-    alloc_size = 1024;
     virtual_page_size = uur::RoundUpToNearestFactor(alloc_size, granularity);
 
     ASSERT_SUCCESS(urPhysicalMemCreate(context, device, virtual_page_size,
diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h
index b67eddd8f8..fff0be4a01 100644
--- a/test/conformance/testing/include/uur/fixtures.h
+++ b/test/conformance/testing/include/uur/fixtures.h
@@ -976,9 +976,12 @@ struct urVirtualMemGranularityTest : urContextTest {
       GTEST_SKIP() << "Virtual memory is not supported.";
     }
 
+    const size_t allocationSize =
+        1; // assuming allocations in test are small enough and minimal granularity is used
     ASSERT_SUCCESS(urVirtualMemGranularityGetInfo(
-        context, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM,
-        sizeof(granularity), &granularity, nullptr));
+        context, device, allocationSize,
+        UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, sizeof(granularity),
+        &granularity, nullptr));
   }
   size_t granularity;
 };
@@ -995,10 +998,12 @@ struct urVirtualMemGranularityTestWithParam : urContextTestWithParam<T> {
     if (!virtual_memory_support) {
       GTEST_SKIP() << "Virtual memory is not supported.";
     }
-
+    const size_t allocationSize =
+        1; // assuming allocations in test are small and use smallest granularity
     ASSERT_SUCCESS(urVirtualMemGranularityGetInfo(
-        this->context, this->device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM,
-        sizeof(granularity), &granularity, nullptr));
+        this->context, this->device, allocationSize,
+        UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, sizeof(granularity),
+        &granularity, nullptr));
     ASSERT_NE(granularity, 0);
   }
 
diff --git a/test/conformance/virtual_memory/urVirtualMemGranularityGetInfo.cpp b/test/conformance/virtual_memory/urVirtualMemGranularityGetInfo.cpp
index 0507b8903a..cd4e3ed076 100644
--- a/test/conformance/virtual_memory/urVirtualMemGranularityGetInfo.cpp
+++ b/test/conformance/virtual_memory/urVirtualMemGranularityGetInfo.cpp
@@ -20,89 +20,96 @@ struct urVirtualMemGranularityGetInfoTest : uur::urContextTest {
 
 UUR_INSTANTIATE_DEVICE_TEST_SUITE(urVirtualMemGranularityGetInfoTest);
 
-TEST_P(urVirtualMemGranularityGetInfoTest, SuccessMinimum) {
+void urVirtualMemGranularityGetInfoTest_successCase(
+    ur_context_handle_t context, ur_device_handle_t device,
+    const ur_virtual_mem_granularity_info_t property_name,
+    const size_t allocation_size) {
   size_t property_size = 0;
-  const ur_virtual_mem_granularity_info_t property_name =
-      UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM;
 
   ASSERT_SUCCESS_OR_OPTIONAL_QUERY(
-      urVirtualMemGranularityGetInfo(context, device, property_name, 0, nullptr,
-                                     &property_size),
+      urVirtualMemGranularityGetInfo(context, device, allocation_size,
+                                     property_name, 0, nullptr, &property_size),
       property_name);
   ASSERT_EQ(sizeof(size_t), property_size);
 
   size_t property_value = 0;
   ASSERT_QUERY_RETURNS_VALUE(
-      urVirtualMemGranularityGetInfo(context, device, property_name,
-                                     property_size, &property_value, nullptr),
+      urVirtualMemGranularityGetInfo(context, device, allocation_size,
+                                     property_name, property_size,
+                                     &property_value, nullptr),
       property_value);
 
   ASSERT_GT(property_value, 0);
 }
 
-TEST_P(urVirtualMemGranularityGetInfoTest, SuccessRecommended) {
-  size_t property_size = 0;
-  const ur_virtual_mem_granularity_info_t property_name =
-      UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED;
+TEST_P(urVirtualMemGranularityGetInfoTest, SuccessMinimum_smallAllocation) {
+  urVirtualMemGranularityGetInfoTest_successCase(
+      context, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, 1);
+}
 
-  ASSERT_SUCCESS_OR_OPTIONAL_QUERY(
-      urVirtualMemGranularityGetInfo(context, device, property_name, 0, nullptr,
-                                     &property_size),
-      property_name);
-  ASSERT_EQ(sizeof(size_t), property_size);
+TEST_P(urVirtualMemGranularityGetInfoTest, SuccessMinimum_largeAllocation) {
+  urVirtualMemGranularityGetInfoTest_successCase(
+      context, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, 191439360);
+}
 
-  size_t property_value = 0;
-  ASSERT_QUERY_RETURNS_VALUE(
-      urVirtualMemGranularityGetInfo(context, device, property_name,
-                                     property_size, &property_value, nullptr),
-      property_value);
+TEST_P(urVirtualMemGranularityGetInfoTest, SuccessRecommended_smallAllocation) {
+  urVirtualMemGranularityGetInfoTest_successCase(
+      context, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, 19);
+}
 
-  ASSERT_GT(property_value, 0);
+TEST_P(urVirtualMemGranularityGetInfoTest, SuccessRecommended_largeAllocation) {
+  urVirtualMemGranularityGetInfoTest_successCase(
+      context, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, 211739367);
 }
 
 TEST_P(urVirtualMemGranularityGetInfoTest, InvalidNullHandleContext) {
   size_t property_size = 0;
-  ASSERT_EQ_RESULT(urVirtualMemGranularityGetInfo(
-                       nullptr, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM,
-                       0, nullptr, &property_size),
-                   UR_RESULT_ERROR_INVALID_NULL_HANDLE);
+  ASSERT_EQ_RESULT(
+      urVirtualMemGranularityGetInfo(nullptr, device, 1,
+                                     UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, 0,
+                                     nullptr, &property_size),
+      UR_RESULT_ERROR_INVALID_NULL_HANDLE);
 }
 
 TEST_P(urVirtualMemGranularityGetInfoTest, InvalidEnumeration) {
   size_t property_size = 0;
   ASSERT_EQ_RESULT(urVirtualMemGranularityGetInfo(
-                       context, device,
+                       context, device, 1,
                        UR_VIRTUAL_MEM_GRANULARITY_INFO_FORCE_UINT32, 0, nullptr,
                        &property_size),
                    UR_RESULT_ERROR_INVALID_ENUMERATION);
 }
 
 TEST_P(urVirtualMemGranularityGetInfoTest, InvalidNullPointerPropSizeRet) {
-  ASSERT_EQ_RESULT(urVirtualMemGranularityGetInfo(
-                       context, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM,
-                       0, nullptr, nullptr),
-                   UR_RESULT_ERROR_INVALID_NULL_POINTER);
+  ASSERT_EQ_RESULT(
+      urVirtualMemGranularityGetInfo(context, device, 1,
+                                     UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, 0,
+                                     nullptr, nullptr),
+      UR_RESULT_ERROR_INVALID_NULL_POINTER);
 }
 
 TEST_P(urVirtualMemGranularityGetInfoTest, InvalidNullPointerPropValue) {
-  ASSERT_EQ_RESULT(urVirtualMemGranularityGetInfo(
-                       context, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM,
-                       sizeof(size_t), nullptr, nullptr),
-                   UR_RESULT_ERROR_INVALID_NULL_POINTER);
+  ASSERT_EQ_RESULT(
+      urVirtualMemGranularityGetInfo(context, device, 1,
+                                     UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM,
+                                     sizeof(size_t), nullptr, nullptr),
+      UR_RESULT_ERROR_INVALID_NULL_POINTER);
 }
 
 TEST_P(urVirtualMemGranularityGetInfoTest, InvalidPropSizeZero) {
   size_t minimum = 0;
-  ASSERT_EQ_RESULT(urVirtualMemGranularityGetInfo(
-                       context, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM,
-                       0, &minimum, nullptr),
-                   UR_RESULT_ERROR_INVALID_SIZE);
+  ASSERT_EQ_RESULT(
+      urVirtualMemGranularityGetInfo(context, device, 1,
+                                     UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, 0,
+                                     &minimum, nullptr),
+      UR_RESULT_ERROR_INVALID_SIZE);
 }
 
 TEST_P(urVirtualMemGranularityGetInfoTest, InvalidSizePropSizeSmall) {
   size_t minimum = 0;
-  ASSERT_EQ_RESULT(urVirtualMemGranularityGetInfo(
-                       context, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM,
-                       sizeof(size_t) - 1, &minimum, nullptr),
-                   UR_RESULT_ERROR_INVALID_SIZE);
+  ASSERT_EQ_RESULT(
+      urVirtualMemGranularityGetInfo(context, device, 1,
+                                     UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM,
+                                     sizeof(size_t) - 1, &minimum, nullptr),
+      UR_RESULT_ERROR_INVALID_SIZE);
 }