diff --git a/conda-recipe/run_test.bat b/conda-recipe/run_test.bat
index 7b9ed8e820..dbcdc5d6e9 100644
--- a/conda-recipe/run_test.bat
+++ b/conda-recipe/run_test.bat
@@ -1,4 +1,12 @@
-pytest -q -ra --disable-warnings --pyargs numba_dpex -vv
-IF %ERRORLEVEL% NEQ 0 exit /B 1
+set "ONEAPI_DEVICE_SELECTOR="
+
+for /F "USEBACKQ tokens=* delims=" %%F in (
+`python -c "import dpctl; print(\"\n\".join([dev.backend.name+\":\"+dev.device_type.name for dev in dpctl.get_devices() if dev.device_type.name in [\"cpu\",\"gpu\"]]))"`
+) do (
+    set "ONEAPI_DEVICE_SELECTOR=%%F"
+
+    pytest -q -ra --disable-warnings --pyargs numba_dpex -vv
+    IF %ERRORLEVEL% NEQ 0 exit /B 1
+)
 
 exit /B 0
diff --git a/conda-recipe/run_test.sh b/conda-recipe/run_test.sh
old mode 100644
new mode 100755
index dbcd713d64..4454e3abae
--- a/conda-recipe/run_test.sh
+++ b/conda-recipe/run_test.sh
@@ -1,12 +1,19 @@
 #!/bin/bash
 
 set -euxo pipefail
+unset ONEAPI_DEVICE_SELECTOR
 
-pytest -q -ra --disable-warnings --pyargs numba_dpex -vv
+for selector in $(python -c "import dpctl; print(\" \".join([dev.backend.name+\":\"+dev.device_type.name for dev in dpctl.get_devices() if dev.device_type.name in [\"cpu\",\"gpu\"]]))")
+do
+    export "ONEAPI_DEVICE_SELECTOR=$selector"
+    unset NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE=1
 
-export NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE=1
+    pytest -q -ra --disable-warnings --pyargs numba_dpex -vv
 
-pytest -q -ra --disable-warnings -vv \
-    --pyargs numba_dpex.tests.kernel_tests.test_atomic_op::test_atomic_fp_native
+    export NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE=1
+
+    pytest -q -ra --disable-warnings -vv \
+        --pyargs numba_dpex.tests.kernel_tests.test_atomic_op::test_atomic_fp_native
+done
 
 exit 0
diff --git a/numba_dpex/core/parfors/kernel_builder.py b/numba_dpex/core/parfors/kernel_builder.py
index 7200a6e62a..a941e03c10 100644
--- a/numba_dpex/core/parfors/kernel_builder.py
+++ b/numba_dpex/core/parfors/kernel_builder.py
@@ -28,7 +28,7 @@
 from numba_dpex import config
 
 from ..descriptor import dpex_kernel_target
-from ..types.dpnp_ndarray_type import DpnpNdArray
+from ..types import DpnpNdArray, USMNdArray
 from ..utils.kernel_templates import RangeKernelTemplate
 
 
@@ -70,6 +70,30 @@ def _compile_kernel_parfor(
         func_ir, kernel_name
     )
 
+    # A cast from DpnpNdArray type to USMNdArray is needed for all arguments of
+    # DpnpNdArray type. Although, DpnpNdArray derives from USMNdArray the two
+    # types use different data models. USMNdArray uses the
+    # numba_dpex.core.datamodel.models.ArrayModel data model that defines all
+    # CPointer type members in the GLOBAL address space. The DpnpNdArray uses
+    # Numba's default ArrayModel that does not define pointers in any specific
+    # address space. For OpenCL HD Graphics devices, defining a kernel function
+    # (spir_kernel calling convention) with pointer arguments that have no
+    # address space qualifier causes a run time crash. By casting the argument
+    # type for parfor arguments from DpnpNdArray type to the USMNdArray type the
+    # generated kernel always has an address space qualifier, avoiding the issue
+    # on OpenCL HD graphics devices.
+
+    for i, argty in enumerate(argtypes):
+        if isinstance(argty, DpnpNdArray):
+            new_argty = USMNdArray(
+                ndim=argty.ndim,
+                layout=argty.layout,
+                dtype=argty.dtype,
+                usm_type=argty.usm_type,
+                queue=argty.queue,
+            )
+            argtypes[i] = new_argty
+
     # compile the kernel
     kernel.compile(
         args=argtypes,
diff --git a/numba_dpex/core/types/dpnp_ndarray_type.py b/numba_dpex/core/types/dpnp_ndarray_type.py
index 75d77141c4..04edec02b1 100644
--- a/numba_dpex/core/types/dpnp_ndarray_type.py
+++ b/numba_dpex/core/types/dpnp_ndarray_type.py
@@ -58,6 +58,12 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
         else:
             return
 
+    def __str__(self):
+        return self.name.replace("USMNdArray", "DpnpNdarray")
+
+    def __repr__(self):
+        return self.__str__()
+
     def __allocate__(
         self,
         typingctx,
diff --git a/numba_dpex/core/types/usm_ndarray_type.py b/numba_dpex/core/types/usm_ndarray_type.py
index f6eb08564f..f5d83783b1 100644
--- a/numba_dpex/core/types/usm_ndarray_type.py
+++ b/numba_dpex/core/types/usm_ndarray_type.py
@@ -87,7 +87,7 @@ def __init__(
             self.dtype = dtype
 
         if name is None:
-            type_name = "usm_ndarray"
+            type_name = "USMNdArray"
             if readonly:
                 type_name = "readonly " + type_name
             if not aligned:
@@ -116,6 +116,9 @@ def __init__(
             aligned=aligned,
         )
 
+    def __repr__(self):
+        return self.name
+
     def copy(
         self,
         dtype=None,
diff --git a/numba_dpex/tests/core/passes/test_parfor_legalize_cfd_pass.py b/numba_dpex/tests/core/passes/test_parfor_legalize_cfd_pass.py
index 6c23bd6147..81ebfe32d6 100644
--- a/numba_dpex/tests/core/passes/test_parfor_legalize_cfd_pass.py
+++ b/numba_dpex/tests/core/passes/test_parfor_legalize_cfd_pass.py
@@ -14,7 +14,7 @@
 
 from numba_dpex import dpjit
 from numba_dpex.core.exceptions import ExecutionQueueInferenceError
-from numba_dpex.tests._helper import skip_no_opencl_gpu
+from numba_dpex.tests._helper import skip_no_opencl_cpu, skip_no_opencl_gpu
 
 shapes = [10, (2, 5)]
 dtypes = [dpnp.int32, dpnp.int64, dpnp.float32, dpnp.float64]
@@ -58,6 +58,7 @@ def test_parfor_legalize_cfd_pass(shape, dtype, usm_type, device):
 
 
 @skip_no_opencl_gpu
+@skip_no_opencl_cpu
 def test_parfor_legalize_cfd_pass_raise():
     a = dpnp.zeros(shape=10, device="cpu")
     b = dpnp.ones(shape=10, device="gpu")
@@ -67,6 +68,7 @@ def test_parfor_legalize_cfd_pass_raise():
 
 
 @skip_no_opencl_gpu
+@skip_no_opencl_cpu
 def test_cfd_error_due_to_lhs():
     a = dpnp.zeros(shape=10, device="cpu")
     b = dpnp.ones(shape=10, device="cpu")