diff --git a/conda-recipe/run_test.bat b/conda-recipe/run_test.bat index 7b9ed8e820..dbcdc5d6e9 100644 --- a/conda-recipe/run_test.bat +++ b/conda-recipe/run_test.bat @@ -1,4 +1,12 @@ -pytest -q -ra --disable-warnings --pyargs numba_dpex -vv -IF %ERRORLEVEL% NEQ 0 exit /B 1 +set "ONEAPI_DEVICE_SELECTOR=" + +for /F "USEBACKQ tokens=* delims=" %%F in ( +`python -c "import dpctl; print(\"\n\".join([dev.backend.name+\":\"+dev.device_type.name for dev in dpctl.get_devices() if dev.device_type.name in [\"cpu\",\"gpu\"]]))"` +) do ( + set "ONEAPI_DEVICE_SELECTOR=%%F" + + pytest -q -ra --disable-warnings --pyargs numba_dpex -vv + IF %ERRORLEVEL% NEQ 0 exit /B 1 +) exit /B 0 diff --git a/conda-recipe/run_test.sh b/conda-recipe/run_test.sh old mode 100644 new mode 100755 index dbcd713d64..4454e3abae --- a/conda-recipe/run_test.sh +++ b/conda-recipe/run_test.sh @@ -1,12 +1,19 @@ #!/bin/bash set -euxo pipefail +unset ONEAPI_DEVICE_SELECTOR -pytest -q -ra --disable-warnings --pyargs numba_dpex -vv +for selector in $(python -c "import dpctl; print(\" \".join([dev.backend.name+\":\"+dev.device_type.name for dev in dpctl.get_devices() if dev.device_type.name in [\"cpu\",\"gpu\"]]))") +do + export "ONEAPI_DEVICE_SELECTOR=$selector" + unset NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE=1 -export NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE=1 + pytest -q -ra --disable-warnings --pyargs numba_dpex -vv -pytest -q -ra --disable-warnings -vv \ - --pyargs numba_dpex.tests.kernel_tests.test_atomic_op::test_atomic_fp_native + export NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE=1 + + pytest -q -ra --disable-warnings -vv \ + --pyargs numba_dpex.tests.kernel_tests.test_atomic_op::test_atomic_fp_native +done exit 0 diff --git a/numba_dpex/core/parfors/kernel_builder.py b/numba_dpex/core/parfors/kernel_builder.py index 7200a6e62a..a941e03c10 100644 --- a/numba_dpex/core/parfors/kernel_builder.py +++ b/numba_dpex/core/parfors/kernel_builder.py @@ -28,7 +28,7 @@ from numba_dpex import config from ..descriptor import dpex_kernel_target -from ..types.dpnp_ndarray_type import DpnpNdArray +from ..types import DpnpNdArray, USMNdArray from ..utils.kernel_templates import RangeKernelTemplate @@ -70,6 +70,30 @@ def _compile_kernel_parfor( func_ir, kernel_name ) + # A cast from DpnpNdArray type to USMNdArray is needed for all arguments of + # DpnpNdArray type. Although, DpnpNdArray derives from USMNdArray the two + # types use different data models. USMNdArray uses the + # numba_dpex.core.datamodel.models.ArrayModel data model that defines all + # CPointer type members in the GLOBAL address space. The DpnpNdArray uses + # Numba's default ArrayModel that does not define pointers in any specific + # address space. For OpenCL HD Graphics devices, defining a kernel function + # (spir_kernel calling convention) with pointer arguments that have no + # address space qualifier causes a run time crash. By casting the argument + # type for parfor arguments from DpnpNdArray type to the USMNdArray type the + # generated kernel always has an address space qualifier, avoiding the issue + # on OpenCL HD graphics devices. + + for i, argty in enumerate(argtypes): + if isinstance(argty, DpnpNdArray): + new_argty = USMNdArray( + ndim=argty.ndim, + layout=argty.layout, + dtype=argty.dtype, + usm_type=argty.usm_type, + queue=argty.queue, + ) + argtypes[i] = new_argty + # compile the kernel kernel.compile( args=argtypes, diff --git a/numba_dpex/core/types/dpnp_ndarray_type.py b/numba_dpex/core/types/dpnp_ndarray_type.py index 75d77141c4..04edec02b1 100644 --- a/numba_dpex/core/types/dpnp_ndarray_type.py +++ b/numba_dpex/core/types/dpnp_ndarray_type.py @@ -58,6 +58,12 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): else: return + def __str__(self): + return self.name.replace("USMNdArray", "DpnpNdarray") + + def __repr__(self): + return self.__str__() + def __allocate__( self, typingctx, diff --git a/numba_dpex/core/types/usm_ndarray_type.py b/numba_dpex/core/types/usm_ndarray_type.py index f6eb08564f..f5d83783b1 100644 --- a/numba_dpex/core/types/usm_ndarray_type.py +++ b/numba_dpex/core/types/usm_ndarray_type.py @@ -87,7 +87,7 @@ def __init__( self.dtype = dtype if name is None: - type_name = "usm_ndarray" + type_name = "USMNdArray" if readonly: type_name = "readonly " + type_name if not aligned: @@ -116,6 +116,9 @@ def __init__( aligned=aligned, ) + def __repr__(self): + return self.name + def copy( self, dtype=None, diff --git a/numba_dpex/tests/core/passes/test_parfor_legalize_cfd_pass.py b/numba_dpex/tests/core/passes/test_parfor_legalize_cfd_pass.py index 6c23bd6147..81ebfe32d6 100644 --- a/numba_dpex/tests/core/passes/test_parfor_legalize_cfd_pass.py +++ b/numba_dpex/tests/core/passes/test_parfor_legalize_cfd_pass.py @@ -14,7 +14,7 @@ from numba_dpex import dpjit from numba_dpex.core.exceptions import ExecutionQueueInferenceError -from numba_dpex.tests._helper import skip_no_opencl_gpu +from numba_dpex.tests._helper import skip_no_opencl_cpu, skip_no_opencl_gpu shapes = [10, (2, 5)] dtypes = [dpnp.int32, dpnp.int64, dpnp.float32, dpnp.float64] @@ -58,6 +58,7 @@ def test_parfor_legalize_cfd_pass(shape, dtype, usm_type, device): @skip_no_opencl_gpu +@skip_no_opencl_cpu def test_parfor_legalize_cfd_pass_raise(): a = dpnp.zeros(shape=10, device="cpu") b = dpnp.ones(shape=10, device="gpu") @@ -67,6 +68,7 @@ def test_parfor_legalize_cfd_pass_raise(): @skip_no_opencl_gpu +@skip_no_opencl_cpu def test_cfd_error_due_to_lhs(): a = dpnp.zeros(shape=10, device="cpu") b = dpnp.ones(shape=10, device="cpu")