Skip to content
5 changes: 1 addition & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

cmake_minimum_required(VERSION 3.14.0 FATAL_ERROR)
project(unified-runtime VERSION 0.8.2)
project(unified-runtime VERSION 0.8.3)

include(GNUInstallDirs)
include(CheckCXXSourceCompiles)
Expand Down Expand Up @@ -111,9 +111,6 @@ if(UR_ENABLE_TRACING)
)
if (MSVC)
set(TARGET_XPTI $<IF:$<CONFIG:Release>,xpti,xptid>)

# disable warning C4267: The compiler detected a conversion from size_t to a smaller type.
target_compile_options(xptifw PRIVATE /wd4267)
else()
set(TARGET_XPTI xpti)
endif()
Expand Down
8 changes: 7 additions & 1 deletion cmake/helpers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,16 @@ function(add_ur_target_compile_options name)
/W3
/MD$<$<CONFIG:Debug>:d>
/GS
/DWIN32_LEAN_AND_MEAN
/DNOMINMAX
)

if(UR_DEVELOPER_MODE)
target_compile_options(${name} PRIVATE /WX /GS)
# _CRT_SECURE_NO_WARNINGS used mainly because of getenv
# C4267: The compiler detected a conversion from size_t to a smaller type.
target_compile_options(${name} PRIVATE
/WX /GS /D_CRT_SECURE_NO_WARNINGS /wd4267
)
endif()
endif()
endfunction()
Expand Down
8 changes: 6 additions & 2 deletions source/adapters/cuda/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "context.hpp"
#include "device.hpp"
#include "platform.hpp"
#include "ur_util.hpp"

int getAttribute(ur_device_handle_t device, CUdevice_attribute attribute) {
int value;
Expand All @@ -40,7 +41,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
ur_device_info_t propName,
size_t propSize,
void *pPropValue,
size_t *pPropSizeRet) {
size_t *pPropSizeRet) try {
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);

static constexpr uint32_t MaxWorkItemDimensions = 3u;
Expand Down Expand Up @@ -1033,6 +1034,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
break;
}
return UR_RESULT_ERROR_INVALID_ENUMERATION;
} catch (...) {
return exceptionToResult(std::current_exception());
}

/// \return PI_SUCCESS if the function is executed successfully
Expand Down Expand Up @@ -1097,7 +1100,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform,

UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle(
ur_device_handle_t hDevice, ur_native_handle_t *phNativeHandle) {
*phNativeHandle = reinterpret_cast<ur_native_handle_t>(hDevice->get());
*phNativeHandle = reinterpret_cast<ur_native_handle_t>(
static_cast<std::uintptr_t>(hDevice->get()));
return UR_RESULT_SUCCESS;
}

Expand Down
6 changes: 5 additions & 1 deletion source/adapters/cuda/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
#include "context.hpp"
#include "device.hpp"
#include "queue.hpp"
#include "ur_api.h"
#include "ur_util.hpp"

#include <cassert>
#include <cuda.h>
Expand Down Expand Up @@ -65,7 +67,7 @@ ur_result_t ur_event_handle_t_::start() {
return Result;
}

bool ur_event_handle_t_::isCompleted() const noexcept {
bool ur_event_handle_t_::isCompleted() const noexcept try {
if (!IsRecorded) {
return false;
}
Expand All @@ -80,6 +82,8 @@ bool ur_event_handle_t_::isCompleted() const noexcept {
}
}
return true;
} catch (...) {
return exceptionToResult(std::current_exception()) == UR_RESULT_SUCCESS;
}

uint64_t ur_event_handle_t_::getQueuedTime() const {
Expand Down
4 changes: 2 additions & 2 deletions source/adapters/cuda/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ urToCudaImageChannelFormat(ur_image_channel_type_t image_channel_type,
std::make_pair(image_channel_type, num_channels));
cuda_format = cuda_format_and_size.first;
pixel_size_bytes = cuda_format_and_size.second;
} catch (std::out_of_range &e) {
} catch (const std::out_of_range &) {
return UR_RESULT_ERROR_IMAGE_FORMAT_NOT_SUPPORTED;
}
}
Expand Down Expand Up @@ -276,7 +276,7 @@ ur_result_t urTextureCreate(ur_sampler_handle_t hSampler,
ImageTexDesc.mipmapFilterMode = MipFilterMode;
ImageTexDesc.maxMipmapLevelClamp = hSampler->MaxMipmapLevelClamp;
ImageTexDesc.minMipmapLevelClamp = hSampler->MinMipmapLevelClamp;
ImageTexDesc.maxAnisotropy = hSampler->MaxAnisotropy;
ImageTexDesc.maxAnisotropy = static_cast<unsigned>(hSampler->MaxAnisotropy);

// The address modes can interfere with other dimensionsenqueueEventsWait
// e.g. 1D texture sampling can be interfered with when setting other
Expand Down
3 changes: 2 additions & 1 deletion source/adapters/cuda/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,8 @@ ur_result_t ur_program_handle_t_::buildProgram(const char *BuildOptions) {
getMaxRegistersJitOptionValue(this->BuildOptions, MaxRegs);
if (Valid) {
Options.push_back(CU_JIT_MAX_REGISTERS);
OptionVals.push_back(reinterpret_cast<void *>(MaxRegs));
OptionVals.push_back(
reinterpret_cast<void *>(static_cast<std::uintptr_t>(MaxRegs)));
}
}

Expand Down
3 changes: 1 addition & 2 deletions source/adapters/cuda/sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ urSamplerCreate(ur_context_handle_t hContext, const ur_sampler_desc_t *pDesc,
new ur_sampler_handle_t_(hContext)};

if (pDesc->stype == UR_STRUCTURE_TYPE_SAMPLER_DESC) {
Sampler->Props |= pDesc->normalizedCoords;
Sampler->Props |= static_cast<uint32_t>(pDesc->normalizedCoords);
Sampler->Props |= pDesc->filterMode << 1;
Sampler->Props |= pDesc->addressingMode << 2;
} else {
Expand Down Expand Up @@ -71,7 +71,6 @@ urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName,
default:
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
}
return {};
}

UR_APIEXPORT ur_result_t UR_APICALL
Expand Down
19 changes: 17 additions & 2 deletions source/adapters/hip/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -98,15 +98,30 @@ if("${UR_HIP_PLATFORM}" STREQUAL "AMD")
)

if(UR_ENABLE_COMGR)
set(UR_COMGR_VERSION5_HEADER "${UR_HIP_INCLUDE_DIR}/amd_comgr/amd_comgr.h")
set(UR_COMGR_VERSION4_HEADER "${UR_HIP_INCLUDE_DIR}/amd_comgr.h")
# The COMGR header changed location between ROCm versions 4 and 5.
# Check for existence in the version 5 location or fallback to version 4
if(NOT EXISTS "${UR_COMGR_VERSION5_HEADER}")
if(NOT EXISTS "${UR_COMGR_VERSION4_HEADER}")
message(FATAL_ERROR "Could not find AMD COMGR header at "
"${UR_COMGR_VERSION5_HEADER} or"
"${UR_COMGR_VERSION4_HEADER}, "
"check ROCm installation")
else()
target_compile_definitions(${TARGET_NAME} PRIVATE UR_COMGR_VERSION4_INCLUDE)
endif()
endif()

add_library(amd_comgr SHARED IMPORTED GLOBAL)
set_target_properties(
amd_comgr PROPERTIES
IMPORTED_LOCATION "${UR_HIP_LIB_DIR}/libamd_comgr.so"
INTERFACE_INCLUDE_DIRECTORIES "${HIP_HEADERS}"
INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_HEADERS}"
)
target_link_libraries(pi_hip PUBLIC amd_comgr)
target_compile_definitions(pi_hip PRIVATE SYCL_ENABLE_KERNEL_FUSION)
target_link_libraries(${TARGET_NAME} PUBLIC amd_comgr)
target_compile_definitions(${TARGET_NAME} PRIVATE SYCL_ENABLE_KERNEL_FUSION)
endif(UR_ENABLE_COMGR)

target_link_libraries(${TARGET_NAME} PRIVATE
Expand Down
39 changes: 29 additions & 10 deletions source/adapters/hip/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,29 +10,48 @@
#pragma once

#ifdef SYCL_ENABLE_KERNEL_FUSION
#ifdef UR_COMGR_VERSION4_INCLUDE
#include <amd_comgr.h>
#else
#include <amd_comgr/amd_comgr.h>
#endif
#endif
#include <hip/hip_runtime.h>
#include <ur/ur.hpp>

// Hipify doesn't support cuArrayGetDescriptor, on AMD the hipArray can just be
// indexed, but on NVidia it is an opaque type and needs to go through
// cuArrayGetDescriptor so implement a utility function to get the array
// properties
inline void getArrayDesc(hipArray *Array, hipArray_Format &Format,
size_t &Channels) {
// Before ROCm 6, hipify doesn't support cuArrayGetDescriptor, on AMD the
// hipArray can just be indexed, but on NVidia it is an opaque type and needs to
// go through cuArrayGetDescriptor so implement a utility function to get the
// array properties
inline static hipError_t getArrayDesc(hipArray *Array, hipArray_Format &Format,
size_t &Channels) {
#if HIP_VERSION_MAJOR >= 6
HIP_ARRAY_DESCRIPTOR ArrayDesc;
hipError_t err = hipArrayGetDescriptor(&ArrayDesc, Array);
if (err == hipSuccess) {
Format = ArrayDesc.Format;
Channels = ArrayDesc.NumChannels;
}
return err;
#else
#if defined(__HIP_PLATFORM_AMD__)
Format = Array->Format;
Channels = Array->NumChannels;
return hipSuccess;
#elif defined(__HIP_PLATFORM_NVIDIA__)
CUDA_ARRAY_DESCRIPTOR ArrayDesc;
cuArrayGetDescriptor(&ArrayDesc, (CUarray)Array);

Format = ArrayDesc.Format;
Channels = ArrayDesc.NumChannels;
CUresult err = cuArrayGetDescriptor(&ArrayDesc, (CUarray)Array);
if (err == CUDA_SUCCESS) {
Format = ArrayDesc.Format;
Channels = ArrayDesc.NumChannels;
return hipSuccess;
} else {
return hipErrorUnknown; // No easy way to map CUerror to hipError
}
#else
#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
#endif
#endif
}

// HIP on NVIDIA headers guard hipArray3DCreate behind __CUDACC__, this does not
Expand Down
29 changes: 6 additions & 23 deletions source/adapters/hip/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,9 @@
#include "memory.hpp"
#include "queue.hpp"

namespace {
extern size_t imageElementByteSize(hipArray_Format ArrayFormat);

static size_t imageElementByteSize(hipArray_Format ArrayFormat) {
switch (ArrayFormat) {
case HIP_AD_FORMAT_UNSIGNED_INT8:
case HIP_AD_FORMAT_SIGNED_INT8:
return 1;
case HIP_AD_FORMAT_UNSIGNED_INT16:
case HIP_AD_FORMAT_SIGNED_INT16:
case HIP_AD_FORMAT_HALF:
return 2;
case HIP_AD_FORMAT_UNSIGNED_INT32:
case HIP_AD_FORMAT_SIGNED_INT32:
case HIP_AD_FORMAT_FLOAT:
return 4;
default:
detail::ur::die("Invalid image format.");
}
return 0;
}
namespace {

ur_result_t enqueueEventsWait(ur_queue_handle_t CommandQueue,
hipStream_t Stream, uint32_t NumEventsInWaitList,
Expand Down Expand Up @@ -898,7 +881,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead(

hipArray_Format Format;
size_t NumChannels;
getArrayDesc(Array, Format, NumChannels);
UR_CHECK_ERROR(getArrayDesc(Array, Format, NumChannels));

int ElementByteSize = imageElementByteSize(Format);

Expand Down Expand Up @@ -959,7 +942,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite(

hipArray_Format Format;
size_t NumChannels;
getArrayDesc(Array, Format, NumChannels);
UR_CHECK_ERROR(getArrayDesc(Array, Format, NumChannels));

int ElementByteSize = imageElementByteSize(Format);

Expand Down Expand Up @@ -1023,12 +1006,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy(
hipArray *SrcArray = std::get<SurfaceMem>(hImageSrc->Mem).getArray();
hipArray_Format SrcFormat;
size_t SrcNumChannels;
getArrayDesc(SrcArray, SrcFormat, SrcNumChannels);
UR_CHECK_ERROR(getArrayDesc(SrcArray, SrcFormat, SrcNumChannels));

hipArray *DstArray = std::get<SurfaceMem>(hImageDst->Mem).getArray();
hipArray_Format DstFormat;
size_t DstNumChannels;
getArrayDesc(DstArray, DstFormat, DstNumChannels);
UR_CHECK_ERROR(getArrayDesc(DstArray, DstFormat, DstNumChannels));

UR_ASSERT(SrcFormat == DstFormat,
UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR);
Expand Down
2 changes: 1 addition & 1 deletion source/adapters/hip/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj(
auto array = std::get<SurfaceMem>(hArgValue->Mem).getArray();
hipArray_Format Format;
size_t NumChannels;
getArrayDesc(array, Format, NumChannels);
UR_CHECK_ERROR(getArrayDesc(array, Format, NumChannels));
if (Format != HIP_AD_FORMAT_UNSIGNED_INT32 &&
Format != HIP_AD_FORMAT_SIGNED_INT32 &&
Format != HIP_AD_FORMAT_HALF && Format != HIP_AD_FORMAT_FLOAT) {
Expand Down
32 changes: 5 additions & 27 deletions source/adapters/hip/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,8 @@
#include <cassert>
#include <ur_util.hpp>

namespace {

size_t GetHipFormatPixelSize(hipArray_Format Format) {
switch (Format) {
size_t imageElementByteSize(hipArray_Format ArrayFormat) {
switch (ArrayFormat) {
case HIP_AD_FORMAT_UNSIGNED_INT8:
case HIP_AD_FORMAT_SIGNED_INT8:
return 1;
Expand All @@ -31,10 +29,9 @@ size_t GetHipFormatPixelSize(hipArray_Format Format) {
default:
detail::ur::die("Invalid HIP format specifier");
}
return 0;
}

} // namespace

/// Decreases the reference count of the Mem object.
/// If this is zero, calls the relevant HIP Free function
/// \return UR_RESULT_SUCCESS unless deallocation error
Expand Down Expand Up @@ -280,7 +277,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory,
UR_CHECK_ERROR(
hipArray3DGetDescriptor(&ArrayDescriptor, Mem.getArray()));
const auto PixelSizeBytes =
GetHipFormatPixelSize(ArrayDescriptor.Format) *
imageElementByteSize(ArrayDescriptor.Format) *
ArrayDescriptor.NumChannels;
const auto ImageSizeBytes =
PixelSizeBytes *
Expand Down Expand Up @@ -573,25 +570,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory,
}
};

const auto hipFormatToElementSize =
[](hipArray_Format HipFormat) -> size_t {
switch (HipFormat) {
case HIP_AD_FORMAT_UNSIGNED_INT8:
case HIP_AD_FORMAT_SIGNED_INT8:
return 1;
case HIP_AD_FORMAT_UNSIGNED_INT16:
case HIP_AD_FORMAT_SIGNED_INT16:
case HIP_AD_FORMAT_HALF:
return 2;
case HIP_AD_FORMAT_UNSIGNED_INT32:
case HIP_AD_FORMAT_SIGNED_INT32:
case HIP_AD_FORMAT_FLOAT:
return 4;
default:
detail::ur::die("Invalid Hip format specified.");
}
};

switch (propName) {
case UR_IMAGE_INFO_FORMAT:
return ReturnValue(ur_image_format_t{UR_IMAGE_CHANNEL_ORDER_RGBA,
Expand All @@ -603,7 +581,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory,
case UR_IMAGE_INFO_DEPTH:
return ReturnValue(ArrayInfo.Depth);
case UR_IMAGE_INFO_ELEMENT_SIZE:
return ReturnValue(hipFormatToElementSize(ArrayInfo.Format));
return ReturnValue(imageElementByteSize(ArrayInfo.Format));
case UR_IMAGE_INFO_ROW_PITCH:
case UR_IMAGE_INFO_SLICE_PITCH:
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
Expand Down
4 changes: 4 additions & 0 deletions source/adapters/hip/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@
#include "program.hpp"

#ifdef SYCL_ENABLE_KERNEL_FUSION
#ifdef UR_COMGR_VERSION4_INCLUDE
#include <amd_comgr.h>
#else
#include <amd_comgr/amd_comgr.h>
#endif
namespace {
template <typename ReleaseType, ReleaseType Release, typename T>
struct COMgrObjCleanUp {
Expand Down
Loading