From 69cdc2360021f017872f93bdc22bc845e286fa51 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Fri, 27 Jun 2025 13:40:40 +0800 Subject: [PATCH 1/7] [SYCL] Disable imf libdevice for NV and AMD backend. imf libdevice shouldn't be used by NV and AMD backend, this PR stops building it for these 2 backends. We also move all imf e2e test cases in a separate directory following exp tests. Signed-off-by: jinge90 --- libdevice/cmake/modules/SYCLLibdevice.cmake | 33 +++++-------------- .../bfloat16_integeral_convesions.cpp} | 3 +- .../double2bfloat16.cpp} | 1 + .../double2half.cpp} | 1 + .../float2bfloat16.cpp} | 1 + .../fp16_trivial_test.cpp} | 2 ++ .../fp32_rounding_test.cpp} | 1 + .../{imf_fp32_test.cpp => imf/fp32_test.cpp} | 3 +- .../fp64_rounding_test.cpp} | 1 + .../{imf_fp64_test.cpp => imf/fp64_test.cpp} | 2 ++ .../fp64_test2.cpp} | 1 + .../half_type_cast.cpp} | 3 +- .../DeviceLib/{ => imf}/imf_utils.hpp | 0 .../simd_emulate_test.cpp} | 1 + .../no-unsupported-without-info.cpp | 18 +--------- 15 files changed, 27 insertions(+), 44 deletions(-) rename sycl/test-e2e/DeviceLib/{imf_bfloat16_integeral_convesions.cpp => imf/bfloat16_integeral_convesions.cpp} (99%) rename sycl/test-e2e/DeviceLib/{imf_double2bfloat16.cpp => imf/double2bfloat16.cpp} (97%) rename sycl/test-e2e/DeviceLib/{imf_double2half.cpp => imf/double2half.cpp} (95%) rename sycl/test-e2e/DeviceLib/{imf_float2bfloat16.cpp => imf/float2bfloat16.cpp} (97%) rename sycl/test-e2e/DeviceLib/{imf_fp16_trivial_test.cpp => imf/fp16_trivial_test.cpp} (99%) rename sycl/test-e2e/DeviceLib/{imf_fp32_rounding_test.cpp => imf/fp32_rounding_test.cpp} (99%) rename sycl/test-e2e/DeviceLib/{imf_fp32_test.cpp => imf/fp32_test.cpp} (99%) rename sycl/test-e2e/DeviceLib/{imf_fp64_rounding_test.cpp => imf/fp64_rounding_test.cpp} (99%) rename sycl/test-e2e/DeviceLib/{imf_fp64_test.cpp => imf/fp64_test.cpp} (99%) rename sycl/test-e2e/DeviceLib/{imf_fp64_test2.cpp => imf/fp64_test2.cpp} (90%) rename sycl/test-e2e/DeviceLib/{imf_half_type_cast.cpp => imf/half_type_cast.cpp} (99%) rename sycl/test-e2e/DeviceLib/{ => imf}/imf_utils.hpp (100%) rename sycl/test-e2e/DeviceLib/{imf_simd_emulate_test.cpp => imf/simd_emulate_test.cpp} (99%) diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 9a56ffbb35c2..6f7ab38f2232 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -1,4 +1,3 @@ -include(CheckCXXCompilerFlag) set(obj_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") set(obj-new-offload_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") if (MSVC) @@ -377,13 +376,19 @@ add_devicelibs(libsycl-cmath-fp64 DEPENDENCIES ${cmath_obj_deps} ) add_devicelibs(libsycl-imf SRC imf_wrapper.cpp - DEPENDENCIES ${imf_obj_deps}) + DEPENDENCIES ${imf_obj_deps} + SKIP_ARCHS nvptx64-nvidia-cuda + amdgcn-amd-amdhsa) add_devicelibs(libsycl-imf-fp64 SRC imf_wrapper_fp64.cpp - DEPENDENCIES ${imf_obj_deps}) + DEPENDENCIES ${imf_obj_deps} + SKIP_ARCHS nvptx64-nvidia-cuda + amdgcn-amd-amdhsa) add_devicelibs(libsycl-imf-bf16 SRC imf_wrapper_bf16.cpp - DEPENDENCIES ${imf_obj_deps}) + DEPENDENCIES ${imf_obj_deps} + SKIP_ARCHS nvptx64-nvidia-cuda + amdgcn-amd-amdhsa) add_devicelibs(libsycl-bfloat16 SRC bfloat16_wrapper.cpp DEPENDENCIES ${cmath_obj_deps}) @@ -651,26 +656,6 @@ foreach(dtype IN ITEMS bf16 fp32 fp64) endforeach() endforeach() -# Add device fallback imf libraries for the NVPTX and AMD targets. -# The output files are bitcode. -foreach(arch IN LISTS devicelib_arch) - foreach(dtype IN ITEMS bf16 fp32 fp64) - set(tgt_name imf_fallback_${dtype}_bc_${arch}) - - add_lib_imf(libsycl-fallback-imf-${arch}-${dtype} - ARCH ${arch} - DIR ${bc_binary_dir} - FTYPE bc - DTYPE ${dtype} - EXTRA_OPTS ${bc_device_compile_opts} ${compile_opts_${arch}} - TGT_NAME ${tgt_name}) - - append_to_property( - ${bc_binary_dir}/libsycl-fallback-imf-${arch}-${dtype}.${bc-suffix} - PROPERTY_NAME BC_DEVICE_LIBS_${arch}) - endforeach() -endforeach() - # Create one large bitcode file for the NVPTX and AMD targets. # Use all the files collected in the respective global properties. foreach(arch IN LISTS devicelib_arch) diff --git a/sycl/test-e2e/DeviceLib/imf_bfloat16_integeral_convesions.cpp b/sycl/test-e2e/DeviceLib/imf/bfloat16_integeral_convesions.cpp similarity index 99% rename from sycl/test-e2e/DeviceLib/imf_bfloat16_integeral_convesions.cpp rename to sycl/test-e2e/DeviceLib/imf/bfloat16_integeral_convesions.cpp index 35d158744c07..8356d73439f7 100644 --- a/sycl/test-e2e/DeviceLib/imf_bfloat16_integeral_convesions.cpp +++ b/sycl/test-e2e/DeviceLib/imf/bfloat16_integeral_convesions.cpp @@ -5,9 +5,10 @@ // RUN: %{run} %t2.out // // UNSUPPORTED: target-nvidia || target-amd +// UNSUPPORTED-INTENDED: imf device library is not used by NV and AMD backend. -// Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows +// UNSUPPORTED-INTENDED: Windows doesn't yet have full shutdown(). #include "imf_utils.hpp" #include diff --git a/sycl/test-e2e/DeviceLib/imf_double2bfloat16.cpp b/sycl/test-e2e/DeviceLib/imf/double2bfloat16.cpp similarity index 97% rename from sycl/test-e2e/DeviceLib/imf_double2bfloat16.cpp rename to sycl/test-e2e/DeviceLib/imf/double2bfloat16.cpp index 8a98c9e5cdaa..767b77749493 100644 --- a/sycl/test-e2e/DeviceLib/imf_double2bfloat16.cpp +++ b/sycl/test-e2e/DeviceLib/imf/double2bfloat16.cpp @@ -7,6 +7,7 @@ // RUN: %{run} %t1.out // // UNSUPPORTED: target-nvidia || target-amd +// UNSUPPORTED-INTENDED: imf device library is not used by NV and AMD backend. #include "imf_utils.hpp" #include diff --git a/sycl/test-e2e/DeviceLib/imf_double2half.cpp b/sycl/test-e2e/DeviceLib/imf/double2half.cpp similarity index 95% rename from sycl/test-e2e/DeviceLib/imf_double2half.cpp rename to sycl/test-e2e/DeviceLib/imf/double2half.cpp index b65c1519594e..747ccfda95c7 100644 --- a/sycl/test-e2e/DeviceLib/imf_double2half.cpp +++ b/sycl/test-e2e/DeviceLib/imf/double2half.cpp @@ -8,6 +8,7 @@ // RUN: %{run} %t2.out // UNSUPPORTED: target-nvidia, target-amd +// UNSUPPORTED-INTENDED: imf device library is not used by NV and AMD backend. #include "imf_utils.hpp" #include diff --git a/sycl/test-e2e/DeviceLib/imf_float2bfloat16.cpp b/sycl/test-e2e/DeviceLib/imf/float2bfloat16.cpp similarity index 97% rename from sycl/test-e2e/DeviceLib/imf_float2bfloat16.cpp rename to sycl/test-e2e/DeviceLib/imf/float2bfloat16.cpp index 56356294ab4d..06bc76f2f214 100644 --- a/sycl/test-e2e/DeviceLib/imf_float2bfloat16.cpp +++ b/sycl/test-e2e/DeviceLib/imf/float2bfloat16.cpp @@ -5,6 +5,7 @@ // RUN: %{run} %t2.out // // UNSUPPORTED: target-nvidia || target-amd +// UNSUPPORTED-INTENDED: imf device library is not used by NV and AMD backend. // All __imf_* bf16 functions are implemented via fp32 emulation, so we don't // need to check whether underlying device supports bf16 or not. diff --git a/sycl/test-e2e/DeviceLib/imf_fp16_trivial_test.cpp b/sycl/test-e2e/DeviceLib/imf/fp16_trivial_test.cpp similarity index 99% rename from sycl/test-e2e/DeviceLib/imf_fp16_trivial_test.cpp rename to sycl/test-e2e/DeviceLib/imf/fp16_trivial_test.cpp index 1fa8627c4c7e..22dd2597e717 100644 --- a/sycl/test-e2e/DeviceLib/imf_fp16_trivial_test.cpp +++ b/sycl/test-e2e/DeviceLib/imf/fp16_trivial_test.cpp @@ -9,9 +9,11 @@ // RUN: %{run} %t.out // UNSUPPORTED: target-nvidia, target-amd +// UNSUPPORTED-INTENDED: imf device library is not used by NV and AMD backend. // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows +// UNSUPPORTED-INTENDED: Windows doesn't yet have full shutdown(). #include "imf_utils.hpp" #include diff --git a/sycl/test-e2e/DeviceLib/imf_fp32_rounding_test.cpp b/sycl/test-e2e/DeviceLib/imf/fp32_rounding_test.cpp similarity index 99% rename from sycl/test-e2e/DeviceLib/imf_fp32_rounding_test.cpp rename to sycl/test-e2e/DeviceLib/imf/fp32_rounding_test.cpp index 461399c1c50e..9ade7331d838 100644 --- a/sycl/test-e2e/DeviceLib/imf_fp32_rounding_test.cpp +++ b/sycl/test-e2e/DeviceLib/imf/fp32_rounding_test.cpp @@ -5,6 +5,7 @@ // RUN: %{run} %t2.out // // UNSUPPORTED: target-nvidia || target-amd +// UNSUPPORTED-INTENDED: imf device library is not used by NV and AMD backend. #include "imf_utils.hpp" #include diff --git a/sycl/test-e2e/DeviceLib/imf_fp32_test.cpp b/sycl/test-e2e/DeviceLib/imf/fp32_test.cpp similarity index 99% rename from sycl/test-e2e/DeviceLib/imf_fp32_test.cpp rename to sycl/test-e2e/DeviceLib/imf/fp32_test.cpp index 71c73cbdbec4..cf509133ed71 100644 --- a/sycl/test-e2e/DeviceLib/imf_fp32_test.cpp +++ b/sycl/test-e2e/DeviceLib/imf/fp32_test.cpp @@ -5,9 +5,10 @@ // RUN: %{run} %t2.out // // UNSUPPORTED: target-nvidia || target-amd +// UNSUPPORTED-INTENDED: imf device library is not used by NV and AMD backend. -// Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows +// UNSUPPORTED-INTENDED: Windows doesn't yet have full shutdown(). #include "imf_utils.hpp" #include diff --git a/sycl/test-e2e/DeviceLib/imf_fp64_rounding_test.cpp b/sycl/test-e2e/DeviceLib/imf/fp64_rounding_test.cpp similarity index 99% rename from sycl/test-e2e/DeviceLib/imf_fp64_rounding_test.cpp rename to sycl/test-e2e/DeviceLib/imf/fp64_rounding_test.cpp index 3b7fe2f5c677..0a93b06ac898 100644 --- a/sycl/test-e2e/DeviceLib/imf_fp64_rounding_test.cpp +++ b/sycl/test-e2e/DeviceLib/imf/fp64_rounding_test.cpp @@ -6,6 +6,7 @@ // RUN: %{run} %t2.out // // UNSUPPORTED: target-nvidia || target-amd +// UNSUPPORTED-INTENDED: imf device library is not used by NV and AMD backend. // Depends on SPIR-V Backend & run-time drivers version. // XFAIL: spirv-backend && run-mode diff --git a/sycl/test-e2e/DeviceLib/imf_fp64_test.cpp b/sycl/test-e2e/DeviceLib/imf/fp64_test.cpp similarity index 99% rename from sycl/test-e2e/DeviceLib/imf_fp64_test.cpp rename to sycl/test-e2e/DeviceLib/imf/fp64_test.cpp index ec94aa634e68..617dd252b1bf 100644 --- a/sycl/test-e2e/DeviceLib/imf_fp64_test.cpp +++ b/sycl/test-e2e/DeviceLib/imf/fp64_test.cpp @@ -6,6 +6,8 @@ // RUN: %{run} %t2.out // // UNSUPPORTED: target-nvidia || target-amd +// UNSUPPORTED-INTENDED: imf device library is not used by NV and AMD backend. + #include "imf_utils.hpp" #include diff --git a/sycl/test-e2e/DeviceLib/imf_fp64_test2.cpp b/sycl/test-e2e/DeviceLib/imf/fp64_test2.cpp similarity index 90% rename from sycl/test-e2e/DeviceLib/imf_fp64_test2.cpp rename to sycl/test-e2e/DeviceLib/imf/fp64_test2.cpp index 7e06c74d8379..58d861abb862 100644 --- a/sycl/test-e2e/DeviceLib/imf_fp64_test2.cpp +++ b/sycl/test-e2e/DeviceLib/imf/fp64_test2.cpp @@ -6,6 +6,7 @@ // RUN: %{run} %t2.out // // UNSUPPORTED: target-nvidia || target-amd +// UNSUPPORTED-INTENDED: imf device library is not used by NV and AMD backend. #include "imf_utils.hpp" #include diff --git a/sycl/test-e2e/DeviceLib/imf_half_type_cast.cpp b/sycl/test-e2e/DeviceLib/imf/half_type_cast.cpp similarity index 99% rename from sycl/test-e2e/DeviceLib/imf_half_type_cast.cpp rename to sycl/test-e2e/DeviceLib/imf/half_type_cast.cpp index c9d2c721664a..b7385b00b4f4 100644 --- a/sycl/test-e2e/DeviceLib/imf_half_type_cast.cpp +++ b/sycl/test-e2e/DeviceLib/imf/half_type_cast.cpp @@ -8,9 +8,10 @@ // RUN: %{run} %t2.out // UNSUPPORTED: target-nvidia, target-amd +// UNSUPPORTED-INTENDED: imf device library is not used by NV and AMD backend. -// Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows +// UNSUPPORTED-INTENDED: Windows doesn't yet have full shutdown(). // Depends on SPIR-V Backend & run-time drivers version. // XFAIL: spirv-backend && run-mode diff --git a/sycl/test-e2e/DeviceLib/imf_utils.hpp b/sycl/test-e2e/DeviceLib/imf/imf_utils.hpp similarity index 100% rename from sycl/test-e2e/DeviceLib/imf_utils.hpp rename to sycl/test-e2e/DeviceLib/imf/imf_utils.hpp diff --git a/sycl/test-e2e/DeviceLib/imf_simd_emulate_test.cpp b/sycl/test-e2e/DeviceLib/imf/simd_emulate_test.cpp similarity index 99% rename from sycl/test-e2e/DeviceLib/imf_simd_emulate_test.cpp rename to sycl/test-e2e/DeviceLib/imf/simd_emulate_test.cpp index ea5e0a82def3..348619e4d3e6 100644 --- a/sycl/test-e2e/DeviceLib/imf_simd_emulate_test.cpp +++ b/sycl/test-e2e/DeviceLib/imf/simd_emulate_test.cpp @@ -5,6 +5,7 @@ // RUN: %{run} %t2.out // // UNSUPPORTED: target-nvidia || target-amd +// UNSUPPORTED-INTENDED: imf device library is not used by NV and AMD backend. // UNSUPPORTED: igc-dev // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/17008 diff --git a/sycl/test/e2e_test_requirements/no-unsupported-without-info.cpp b/sycl/test/e2e_test_requirements/no-unsupported-without-info.cpp index 087da2de5f85..b6044de38087 100644 --- a/sycl/test/e2e_test_requirements/no-unsupported-without-info.cpp +++ b/sycl/test/e2e_test_requirements/no-unsupported-without-info.cpp @@ -54,7 +54,7 @@ // tests to match the required format and in that case you should just update // (i.e. reduce) the number and the list below. // -// NUMBER-OF-UNSUPPORTED-WITHOUT-INFO: 248 +// NUMBER-OF-UNSUPPORTED-WITHOUT-INFO: 232 // // List of improperly UNSUPPORTED tests. // Remove the CHECK once the test has been properly UNSUPPORTED. @@ -94,22 +94,6 @@ // CHECK-NEXT: DeprecatedFeatures/DiscardEvents/discard_events_using_assert.cpp // CHECK-NEXT: DeviceLib/built-ins/printf.cpp // CHECK-NEXT: DeviceLib/cmath-aot.cpp -// CHECK-NEXT: DeviceLib/imf_bfloat16_integeral_convesions.cpp -// CHECK-NEXT: DeviceLib/imf_bfloat16_integeral_convesions.cpp -// CHECK-NEXT: DeviceLib/imf_double2bfloat16.cpp -// CHECK-NEXT: DeviceLib/imf_double2half.cpp -// CHECK-NEXT: DeviceLib/imf_float2bfloat16.cpp -// CHECK-NEXT: DeviceLib/imf_fp16_trivial_test.cpp -// CHECK-NEXT: DeviceLib/imf_fp16_trivial_test.cpp -// CHECK-NEXT: DeviceLib/imf_fp32_rounding_test.cpp -// CHECK-NEXT: DeviceLib/imf_fp32_test.cpp -// CHECK-NEXT: DeviceLib/imf_fp32_test.cpp -// CHECK-NEXT: DeviceLib/imf_fp64_rounding_test.cpp -// CHECK-NEXT: DeviceLib/imf_fp64_test.cpp -// CHECK-NEXT: DeviceLib/imf_fp64_test2.cpp -// CHECK-NEXT: DeviceLib/imf_half_type_cast.cpp -// CHECK-NEXT: DeviceLib/imf_half_type_cast.cpp -// CHECK-NEXT: DeviceLib/imf_simd_emulate_test.cpp // CHECK-NEXT: DeviceLib/separate_compile_test.cpp // CHECK-NEXT: ESIMD/PerformanceTests/BitonicSortK.cpp // CHECK-NEXT: ESIMD/PerformanceTests/BitonicSortKv2.cpp From acaa6c4fa6ff40107638b2a8721663969dafc41d Mon Sep 17 00:00:00 2001 From: jinge90 Date: Fri, 27 Jun 2025 14:34:19 +0800 Subject: [PATCH 2/7] revert unneeded removal Signed-off-by: jinge90 --- libdevice/cmake/modules/SYCLLibdevice.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 6f7ab38f2232..53dc5b77343a 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -1,3 +1,4 @@ +include(CheckCXXCompilerFlag) set(obj_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") set(obj-new-offload_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") if (MSVC) From 0c270cee32b46188b2725346fc4b05c7322968ff Mon Sep 17 00:00:00 2001 From: jinge90 Date: Mon, 30 Jun 2025 16:55:00 +0800 Subject: [PATCH 3/7] get rid of skip archs Signed-off-by: jinge90 --- libdevice/cmake/modules/SYCLLibdevice.cmake | 52 +++++++++++++-------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 53dc5b77343a..74943c015fb4 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -78,15 +78,15 @@ endforeach() # file and all files created this way are linked into one large bitcode # library. # Additional compilation options are needed for compiling each device library. -set(devicelib_arch) +set(common_build_archs) if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) - list(APPEND devicelib_arch nvptx64-nvidia-cuda) + list(APPEND common_build_archs nvptx64-nvidia-cuda) set(compile_opts_nvptx64-nvidia-cuda "-fsycl-targets=nvptx64-nvidia-cuda" "-Xsycl-target-backend" "--cuda-gpu-arch=sm_50" "-nocudalib") set(opt_flags_nvptx64-nvidia-cuda "-O3" "--nvvm-reflect-enable=false") endif() if("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD) - list(APPEND devicelib_arch amdgcn-amd-amdhsa) + list(APPEND common_build_archs amdgcn-amd-amdhsa) set(compile_opts_amdgcn-amd-amdhsa "-nogpulib" "-fsycl-targets=amdgcn-amd-amdhsa" "-Xsycl-target-backend" "--offload-arch=gfx942") set(opt_flags_amdgcn-amd-amdhsa "-O3" "--amdgpu-oclc-reflect-enable=false") @@ -195,13 +195,14 @@ function(add_devicelibs filename) cmake_parse_arguments(ARG "" "" - "SRC;EXTRA_OPTS;DEPENDENCIES;SKIP_ARCHS;FILETYPES" + "SRC;EXTRA_OPTS;DEPENDENCIES;BUILD_ARCHS;FILETYPES" ${ARGN}) if(ARG_FILETYPES) set(devicelib_filetypes "${ARG_FILETYPES}") else() set(devicelib_filetypes "${filetypes}") endif() + set(devicelib_buildarchs "${ARG_BUILD_ARCHS}") foreach(filetype IN LISTS devicelib_filetypes) compile_lib(${filename} FILETYPE ${filetype} @@ -210,10 +211,7 @@ function(add_devicelibs filename) EXTRA_OPTS ${ARG_EXTRA_OPTS} ${${filetype}_device_compile_opts}) endforeach() - foreach(arch IN LISTS devicelib_arch) - if(arch IN_LIST ARG_SKIP_ARCHS) - continue() - endif() + foreach(arch IN LISTS devicelib_buildarchs) compile_lib(${filename}-${arch} FILETYPE bc SRC ${ARG_SRC} @@ -350,61 +348,69 @@ check_cxx_compiler_flag(-Wno-invalid-noreturn HAS_NO_INVALID_NORETURN_WARN_FLAG) # ones. add_devicelibs(libsycl-itt-stubs SRC itt_stubs.cpp + BUILD_ARCHS ${common_build_archs} DEPENDENCIES ${itt_obj_deps}) add_devicelibs(libsycl-itt-compiler-wrappers SRC itt_compiler_wrappers.cpp + BUILD_ARCHS ${common_build_archs} DEPENDENCIES ${itt_obj_deps}) add_devicelibs(libsycl-itt-user-wrappers SRC itt_user_wrappers.cpp + BUILD_ARCHS ${common_build_archs} DEPENDENCIES ${itt_obj_deps}) add_devicelibs(libsycl-crt SRC crt_wrapper.cpp + BUILD_ARCHS ${common_build_archs} DEPENDENCIES ${crt_obj_deps} EXTRA_OPTS $<$:-Wno-invalid-noreturn>) add_devicelibs(libsycl-complex SRC complex_wrapper.cpp + BUILD_ARCHS ${common_build_archs} DEPENDENCIES ${complex_obj_deps}) add_devicelibs(libsycl-complex-fp64 SRC complex_wrapper_fp64.cpp + BUILD_ARCHS ${common_build_archs} DEPENDENCIES ${complex_obj_deps} ) add_devicelibs(libsycl-cmath SRC cmath_wrapper.cpp + BUILD_ARCHS ${common_build_archs} DEPENDENCIES ${cmath_obj_deps}) add_devicelibs(libsycl-cmath-fp64 SRC cmath_wrapper_fp64.cpp + BUILD_ARCHS ${common_build_archs} DEPENDENCIES ${cmath_obj_deps} ) +set(imf_build_archs) add_devicelibs(libsycl-imf SRC imf_wrapper.cpp DEPENDENCIES ${imf_obj_deps} - SKIP_ARCHS nvptx64-nvidia-cuda - amdgcn-amd-amdhsa) + BUILD_ARCHS ${imf_build_archs}) add_devicelibs(libsycl-imf-fp64 SRC imf_wrapper_fp64.cpp DEPENDENCIES ${imf_obj_deps} - SKIP_ARCHS nvptx64-nvidia-cuda - amdgcn-amd-amdhsa) + BUILD_ARCHS ${imf_build_archs}) add_devicelibs(libsycl-imf-bf16 SRC imf_wrapper_bf16.cpp DEPENDENCIES ${imf_obj_deps} - SKIP_ARCHS nvptx64-nvidia-cuda - amdgcn-amd-amdhsa) + BUILD_ARCHS ${imf_build_archs}) add_devicelibs(libsycl-bfloat16 SRC bfloat16_wrapper.cpp + BUILD_ARCHS ${common_build_archs} DEPENDENCIES ${cmath_obj_deps}) if(MSVC) add_devicelibs(libsycl-msvc-math SRC msvc_math.cpp + BUILD_ARCHS ${common_build_archs} DEPENDENCIES ${cmath_obj_deps}) else() if(UR_SANITIZER_INCLUDE_DIR) + set(sanitizer_build_archs "") # asan jit add_devicelibs(libsycl-asan SRC sanitizer/asan_rtl.cpp DEPENDENCIES ${asan_obj_deps} - SKIP_ARCHS nvptx64-nvidia-cuda - amdgcn-amd-amdhsa + BUILD_ARCHS ${sanitizer_build_archs} FILETYPES "${filetypes_no_spv}" EXTRA_OPTS -fno-sycl-instrument-device-code -I${UR_SANITIZER_INCLUDE_DIR} @@ -427,8 +433,7 @@ else() add_devicelibs(libsycl-msan SRC sanitizer/msan_rtl.cpp DEPENDENCIES ${msan_obj_deps} - SKIP_ARCHS nvptx64-nvidia-cuda - amdgcn-amd-amdhsa + BUILD_ARCHS ${sanitizer_build_archs} FILETYPES "${filetypes_no_spv}" EXTRA_OPTS -fno-sycl-instrument-device-code -I${UR_SANITIZER_INCLUDE_DIR} @@ -451,8 +456,7 @@ else() add_devicelibs(libsycl-tsan SRC sanitizer/tsan_rtl.cpp DEPENDENCIES ${tsan_obj_deps} - SKIP_ARCHS nvptx64-nvidia-cuda - amdgcn-amd-amdhsa + BUILD_ARCHS ${sanitizer_build_archs} FILETYPES "${filetypes_no_spv}" EXTRA_OPTS -fno-sycl-instrument-device-code -I${UR_SANITIZER_INCLUDE_DIR} @@ -475,30 +479,38 @@ endif() add_devicelibs(libsycl-fallback-cassert SRC fallback-cassert.cpp + BUILD_ARCHS ${common_build_archs} DEPENDENCIES ${crt_obj_deps} EXTRA_OPTS -fno-sycl-instrument-device-code) add_devicelibs(libsycl-fallback-cstring SRC fallback-cstring.cpp + BUILD_ARCHS ${common_build_archs} DEPENDENCIES ${crt_obj_deps}) add_devicelibs(libsycl-fallback-complex SRC fallback-complex.cpp + BUILD_ARCHS ${common_build_archs} DEPENDENCIES ${complex_obj_deps}) add_devicelibs(libsycl-fallback-complex-fp64 SRC fallback-complex-fp64.cpp + BUILD_ARCHS ${common_build_archs} DEPENDENCIES ${complex_obj_deps}) add_devicelibs(libsycl-fallback-cmath SRC fallback-cmath.cpp + BUILD_ARCHS ${common_build_archs} DEPENDENCIES ${cmath_obj_deps}) add_devicelibs(libsycl-fallback-cmath-fp64 SRC fallback-cmath-fp64.cpp + BUILD_ARCHS ${common_build_archs} DEPENDENCIES ${cmath_obj_deps}) add_devicelibs(libsycl-fallback-bfloat16 SRC fallback-bfloat16.cpp FILETYPES "${filetypes_no_spv}" + BUILD_ARCHS ${common_build_archs} DEPENDENCIES ${bfloat16_obj_deps}) add_devicelibs(libsycl-native-bfloat16 SRC bfloat16_wrapper.cpp FILETYPES "${filetypes_no_spv}" + BUILD_ARCHS ${common_build_archs} DEPENDENCIES ${bfloat16_obj_deps}) # Create dependency and source lists for Intel math function libraries. From 82cf5a3d27a99e4f01164c3d97cb7b37d50c8743 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 1 Jul 2025 09:17:19 +0800 Subject: [PATCH 4/7] fix building failure for devicelib-cuda Signed-off-by: jinge90 --- libdevice/cmake/modules/SYCLLibdevice.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 74943c015fb4..4d7092074b0d 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -178,7 +178,7 @@ function(append_to_property list) endfunction() # Creates device libaries for all filetypes. -# Adds bitcode library files additionally for each devicelib_arch target and +# Adds bitcode library files additionally for each devicelib build arch and # adds the created file to an arch specific global property. # # Arguments: @@ -671,7 +671,7 @@ endforeach() # Create one large bitcode file for the NVPTX and AMD targets. # Use all the files collected in the respective global properties. -foreach(arch IN LISTS devicelib_arch) +foreach(arch IN LISTS common_build_archs) get_property(BC_DEVICE_LIBS_${arch} GLOBAL PROPERTY BC_DEVICE_LIBS_${arch}) # Link the bitcode files together. link_bc(TARGET device_lib_device_${arch} From 905960afef84baa41fa3cf267c56f251d7652265 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 1 Jul 2025 16:24:24 +0800 Subject: [PATCH 5/7] rename common_build_archs Signed-off-by: jinge90 --- libdevice/cmake/modules/SYCLLibdevice.cmake | 44 ++++++++++----------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 4d7092074b0d..705ed20d8da2 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -78,15 +78,15 @@ endforeach() # file and all files created this way are linked into one large bitcode # library. # Additional compilation options are needed for compiling each device library. -set(common_build_archs) +set(full_build_archs) if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) - list(APPEND common_build_archs nvptx64-nvidia-cuda) + list(APPEND full_build_archs nvptx64-nvidia-cuda) set(compile_opts_nvptx64-nvidia-cuda "-fsycl-targets=nvptx64-nvidia-cuda" "-Xsycl-target-backend" "--cuda-gpu-arch=sm_50" "-nocudalib") set(opt_flags_nvptx64-nvidia-cuda "-O3" "--nvvm-reflect-enable=false") endif() if("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD) - list(APPEND common_build_archs amdgcn-amd-amdhsa) + list(APPEND full_build_archs amdgcn-amd-amdhsa) set(compile_opts_amdgcn-amd-amdhsa "-nogpulib" "-fsycl-targets=amdgcn-amd-amdhsa" "-Xsycl-target-backend" "--offload-arch=gfx942") set(opt_flags_amdgcn-amd-amdhsa "-O3" "--amdgpu-oclc-reflect-enable=false") @@ -348,38 +348,38 @@ check_cxx_compiler_flag(-Wno-invalid-noreturn HAS_NO_INVALID_NORETURN_WARN_FLAG) # ones. add_devicelibs(libsycl-itt-stubs SRC itt_stubs.cpp - BUILD_ARCHS ${common_build_archs} + BUILD_ARCHS ${full_build_archs} DEPENDENCIES ${itt_obj_deps}) add_devicelibs(libsycl-itt-compiler-wrappers SRC itt_compiler_wrappers.cpp - BUILD_ARCHS ${common_build_archs} + BUILD_ARCHS ${full_build_archs} DEPENDENCIES ${itt_obj_deps}) add_devicelibs(libsycl-itt-user-wrappers SRC itt_user_wrappers.cpp - BUILD_ARCHS ${common_build_archs} + BUILD_ARCHS ${full_build_archs} DEPENDENCIES ${itt_obj_deps}) add_devicelibs(libsycl-crt SRC crt_wrapper.cpp - BUILD_ARCHS ${common_build_archs} + BUILD_ARCHS ${full_build_archs} DEPENDENCIES ${crt_obj_deps} EXTRA_OPTS $<$:-Wno-invalid-noreturn>) add_devicelibs(libsycl-complex SRC complex_wrapper.cpp - BUILD_ARCHS ${common_build_archs} + BUILD_ARCHS ${full_build_archs} DEPENDENCIES ${complex_obj_deps}) add_devicelibs(libsycl-complex-fp64 SRC complex_wrapper_fp64.cpp - BUILD_ARCHS ${common_build_archs} + BUILD_ARCHS ${full_build_archs} DEPENDENCIES ${complex_obj_deps} ) add_devicelibs(libsycl-cmath SRC cmath_wrapper.cpp - BUILD_ARCHS ${common_build_archs} + BUILD_ARCHS ${full_build_archs} DEPENDENCIES ${cmath_obj_deps}) add_devicelibs(libsycl-cmath-fp64 SRC cmath_wrapper_fp64.cpp - BUILD_ARCHS ${common_build_archs} + BUILD_ARCHS ${full_build_archs} DEPENDENCIES ${cmath_obj_deps} ) set(imf_build_archs) add_devicelibs(libsycl-imf @@ -396,12 +396,12 @@ add_devicelibs(libsycl-imf-bf16 BUILD_ARCHS ${imf_build_archs}) add_devicelibs(libsycl-bfloat16 SRC bfloat16_wrapper.cpp - BUILD_ARCHS ${common_build_archs} + BUILD_ARCHS ${full_build_archs} DEPENDENCIES ${cmath_obj_deps}) if(MSVC) add_devicelibs(libsycl-msvc-math SRC msvc_math.cpp - BUILD_ARCHS ${common_build_archs} + BUILD_ARCHS ${full_build_archs} DEPENDENCIES ${cmath_obj_deps}) else() if(UR_SANITIZER_INCLUDE_DIR) @@ -479,38 +479,38 @@ endif() add_devicelibs(libsycl-fallback-cassert SRC fallback-cassert.cpp - BUILD_ARCHS ${common_build_archs} + BUILD_ARCHS ${full_build_archs} DEPENDENCIES ${crt_obj_deps} EXTRA_OPTS -fno-sycl-instrument-device-code) add_devicelibs(libsycl-fallback-cstring SRC fallback-cstring.cpp - BUILD_ARCHS ${common_build_archs} + BUILD_ARCHS ${full_build_archs} DEPENDENCIES ${crt_obj_deps}) add_devicelibs(libsycl-fallback-complex SRC fallback-complex.cpp - BUILD_ARCHS ${common_build_archs} + BUILD_ARCHS ${full_build_archs} DEPENDENCIES ${complex_obj_deps}) add_devicelibs(libsycl-fallback-complex-fp64 SRC fallback-complex-fp64.cpp - BUILD_ARCHS ${common_build_archs} + BUILD_ARCHS ${full_build_archs} DEPENDENCIES ${complex_obj_deps}) add_devicelibs(libsycl-fallback-cmath SRC fallback-cmath.cpp - BUILD_ARCHS ${common_build_archs} + BUILD_ARCHS ${full_build_archs} DEPENDENCIES ${cmath_obj_deps}) add_devicelibs(libsycl-fallback-cmath-fp64 SRC fallback-cmath-fp64.cpp - BUILD_ARCHS ${common_build_archs} + BUILD_ARCHS ${full_build_archs} DEPENDENCIES ${cmath_obj_deps}) add_devicelibs(libsycl-fallback-bfloat16 SRC fallback-bfloat16.cpp FILETYPES "${filetypes_no_spv}" - BUILD_ARCHS ${common_build_archs} + BUILD_ARCHS ${full_build_archs} DEPENDENCIES ${bfloat16_obj_deps}) add_devicelibs(libsycl-native-bfloat16 SRC bfloat16_wrapper.cpp FILETYPES "${filetypes_no_spv}" - BUILD_ARCHS ${common_build_archs} + BUILD_ARCHS ${full_build_archs} DEPENDENCIES ${bfloat16_obj_deps}) # Create dependency and source lists for Intel math function libraries. @@ -671,7 +671,7 @@ endforeach() # Create one large bitcode file for the NVPTX and AMD targets. # Use all the files collected in the respective global properties. -foreach(arch IN LISTS common_build_archs) +foreach(arch IN LISTS full_build_archs) get_property(BC_DEVICE_LIBS_${arch} GLOBAL PROPERTY BC_DEVICE_LIBS_${arch}) # Link the bitcode files together. link_bc(TARGET device_lib_device_${arch} From fcafc3fa495a3e2dd426053b58bf3b6d37f338b6 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 8 Jul 2025 13:00:02 +0800 Subject: [PATCH 6/7] address review comments Signed-off-by: jinge90 --- libdevice/cmake/modules/SYCLLibdevice.cmake | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 705ed20d8da2..dc8b3c26bc52 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -202,7 +202,6 @@ function(add_devicelibs filename) else() set(devicelib_filetypes "${filetypes}") endif() - set(devicelib_buildarchs "${ARG_BUILD_ARCHS}") foreach(filetype IN LISTS devicelib_filetypes) compile_lib(${filename} FILETYPE ${filetype} @@ -211,7 +210,7 @@ function(add_devicelibs filename) EXTRA_OPTS ${ARG_EXTRA_OPTS} ${${filetype}_device_compile_opts}) endforeach() - foreach(arch IN LISTS devicelib_buildarchs) + foreach(arch IN LISTS ARG_BUILD_ARCHS) compile_lib(${filename}-${arch} FILETYPE bc SRC ${ARG_SRC} @@ -405,7 +404,7 @@ if(MSVC) DEPENDENCIES ${cmath_obj_deps}) else() if(UR_SANITIZER_INCLUDE_DIR) - set(sanitizer_build_archs "") + set(sanitizer_build_archs) # asan jit add_devicelibs(libsycl-asan SRC sanitizer/asan_rtl.cpp From 94cdb51e0141478db43aea72e03d168106f35eb5 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 8 Jul 2025 16:57:57 +0800 Subject: [PATCH 7/7] retain imf build for other targets Signed-off-by: jinge90 --- libdevice/cmake/modules/SYCLLibdevice.cmake | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index dc8b3c26bc52..9e95fa4ed8f3 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -668,6 +668,26 @@ foreach(dtype IN ITEMS bf16 fp32 fp64) endforeach() endforeach() +# Add device fallback imf libraries for single bitcode targets. +# The output files are bitcode. +foreach(arch IN LISTS imf_build_archs) + foreach(dtype IN ITEMS bf16 fp32 fp64) + set(tgt_name imf_fallback_${dtype}_bc_${arch}) + + add_lib_imf(libsycl-fallback-imf-${arch}-${dtype} + ARCH ${arch} + DIR ${bc_binary_dir} + FTYPE bc + DTYPE ${dtype} + EXTRA_OPTS ${bc_device_compile_opts} ${compile_opts_${arch}} + TGT_NAME ${tgt_name}) + + append_to_property( + ${bc_binary_dir}/libsycl-fallback-imf-${arch}-${dtype}.${bc-suffix} + PROPERTY_NAME BC_DEVICE_LIBS_${arch}) + endforeach() +endforeach() + # Create one large bitcode file for the NVPTX and AMD targets. # Use all the files collected in the respective global properties. foreach(arch IN LISTS full_build_archs)