From c30e1bcd61455e43f5c4c7394f29c61ddd38e576 Mon Sep 17 00:00:00 2001 From: Mingjie Wang Date: Sun, 12 Feb 2023 19:42:34 -0600 Subject: [PATCH 01/15] Changes to usm_ndarray_type. - Made all args to the constructor except ndim as optional. - If no queue or device is provided then select a default queue using dpctl. - Select a default dtype using the same logic as dpctl.tensor. - Bugfix: make sure the derived dtype is passed to the parent Array type's constuctor. - Fix tests and examples impacted by UsmNdArray type changes. - Skip all dpnp.empty tests for now. These tests will be changed once the new implementation for dpnp.empty is merged. --- numba_dpex/core/types/usm_ndarray_type.py | 67 +++++++++++++++---- .../examples/kernel/kernel_specialization.py | 4 +- numba_dpex/tests/kernel_tests/test_barrier.py | 2 +- .../test_kernel_has_return_value_error.py | 2 +- .../test_kernel_specialization.py | 4 +- .../dpnp_ndarray/test_dpnp_empty.py | 1 + .../njit_tests/dpnp_ndarray/test_models.py | 4 +- numba_dpex/tests/test_debuginfo.py | 2 +- 8 files changed, 65 insertions(+), 21 deletions(-) diff --git a/numba_dpex/core/types/usm_ndarray_type.py b/numba_dpex/core/types/usm_ndarray_type.py index b2b8cdd24b..8c6a493f2c 100644 --- a/numba_dpex/core/types/usm_ndarray_type.py +++ b/numba_dpex/core/types/usm_ndarray_type.py @@ -8,7 +8,9 @@ import dpctl import dpctl.tensor from numba.core.typeconv import Conversion +from numba.core.typeinfer import CallConstraint from numba.core.types.npytypes import Array +from numba.np.numpy_support import from_dtype from numba_dpex.utils import address_space @@ -18,10 +20,10 @@ class USMNdArray(Array): def __init__( self, - dtype, ndim, - layout, - usm_type="unknown", + layout="C", + dtype=None, + usm_type="device", device="unknown", queue=None, readonly=False, @@ -32,15 +34,53 @@ def __init__( self.usm_type = usm_type self.addrspace = addrspace - # Normalize the device filter string and get the fully qualified three - # tuple (backend:device_type:device_num) filter string from dpctl. - if device != "unknown": - _d = dpctl.SyclDevice(device) - self.device = _d.filter_string + if queue is not None and device != "unknown": + if not isinstance(device, str): + raise TypeError( + "The device keyword arg should be a str object specifying " + "a SYCL filter selector" + ) + if not isinstance(queue, dpctl.SyclQueue): + raise TypeError( + "The queue keyword arg should be a dpctl.SyclQueue object" + ) + d1 = queue.sycl_device + d2 = dpctl.SyclDevice(device) + if d1 != d2: + raise TypeError( + "The queue keyword arg and the device keyword arg specify " + "different SYCL devices" + ) + self.queue = queue + self.device = device + elif queue is None and device != "unknown": + if not isinstance(device, str): + raise TypeError( + "The device keyword arg should be a str object specifying " + "a SYCL filter selector" + ) + self.queue = dpctl.SyclQueue(device) + self.device = device + elif queue is not None and device == "unknown": + if not isinstance(queue, dpctl.SyclQueue): + raise TypeError( + "The queue keyword arg should be a dpctl.SyclQueue object" + ) + self.device = self.queue.sycl_device.filter_string + self.queue = queue else: - self.device = "unknown" + self.queue = dpctl.SyclQueue() + self.device = self.queue.sycl_device.filter_string - self.queue = queue + if not dtype: + dummy_tensor = dpctl.tensor.empty( + sh=1, order=layout, usm_type=usm_type, sycl_queue=self.queue + ) + # convert dpnp type to numba/numpy type + _dtype = dummy_tensor.dtype + self.dtype = from_dtype(_dtype) + else: + self.dtype = dtype if name is None: type_name = "usm_ndarray" @@ -50,20 +90,21 @@ def __init__( type_name = "unaligned " + type_name name_parts = ( type_name, - dtype, + self.dtype, ndim, layout, self.addrspace, usm_type, self.device, + self.queue, ) name = ( "%s(dtype=%s, ndim=%s, layout=%s, address_space=%s, " - "usm_type=%s, sycl_device=%s)" % name_parts + "usm_type=%s, device=%s, sycl_device=%s)" % name_parts ) super().__init__( - dtype, + self.dtype, ndim, layout, readonly=readonly, diff --git a/numba_dpex/examples/kernel/kernel_specialization.py b/numba_dpex/examples/kernel/kernel_specialization.py index 1651835a77..2c78bd76a8 100644 --- a/numba_dpex/examples/kernel/kernel_specialization.py +++ b/numba_dpex/examples/kernel/kernel_specialization.py @@ -19,8 +19,8 @@ # ------------ Example 1. ------------ # # Define type specializations using the numba_ndpx usm_ndarray data type. -i64arrty = usm_ndarray(int64, 1, "C") -f32arrty = usm_ndarray(float32, 1, "C") +i64arrty = usm_ndarray(1, "C", int64) +f32arrty = usm_ndarray(1, "C", float32) # specialize a kernel for the i64arrty diff --git a/numba_dpex/tests/kernel_tests/test_barrier.py b/numba_dpex/tests/kernel_tests/test_barrier.py index 9223caa641..fe3ff86cb7 100644 --- a/numba_dpex/tests/kernel_tests/test_barrier.py +++ b/numba_dpex/tests/kernel_tests/test_barrier.py @@ -11,7 +11,7 @@ from numba_dpex import float32, usm_ndarray, void from numba_dpex.tests._helper import filter_strings -f32arrty = usm_ndarray(float32, 1, "C") +f32arrty = usm_ndarray(ndim=1, dtype=float32, layout="C") @pytest.mark.parametrize("filter_str", filter_strings) diff --git a/numba_dpex/tests/kernel_tests/test_kernel_has_return_value_error.py b/numba_dpex/tests/kernel_tests/test_kernel_has_return_value_error.py index d417924513..872d702b33 100644 --- a/numba_dpex/tests/kernel_tests/test_kernel_has_return_value_error.py +++ b/numba_dpex/tests/kernel_tests/test_kernel_has_return_value_error.py @@ -9,7 +9,7 @@ import numba_dpex as dpex from numba_dpex import int32, usm_ndarray -i32arrty = usm_ndarray(int32, 1, "C") +i32arrty = usm_ndarray(ndim=1, dtype=int32, layout="C") def f(a): diff --git a/numba_dpex/tests/kernel_tests/test_kernel_specialization.py b/numba_dpex/tests/kernel_tests/test_kernel_specialization.py index e156ea27ac..12a07701c1 100644 --- a/numba_dpex/tests/kernel_tests/test_kernel_specialization.py +++ b/numba_dpex/tests/kernel_tests/test_kernel_specialization.py @@ -13,8 +13,8 @@ ) from numba_dpex.core.kernel_interface.utils import Range -i64arrty = usm_ndarray(int64, 1, "C") -f32arrty = usm_ndarray(float32, 1, "C") +i64arrty = usm_ndarray(ndim=1, dtype=int64, layout="C") +f32arrty = usm_ndarray(ndim=1, dtype=float32, layout="C") specialized_kernel1 = dpex.kernel((i64arrty, i64arrty, i64arrty)) specialized_kernel2 = dpex.kernel( diff --git a/numba_dpex/tests/njit_tests/dpnp_ndarray/test_dpnp_empty.py b/numba_dpex/tests/njit_tests/dpnp_ndarray/test_dpnp_empty.py index 435a59d10b..a5f918e535 100644 --- a/numba_dpex/tests/njit_tests/dpnp_ndarray/test_dpnp_empty.py +++ b/numba_dpex/tests/njit_tests/dpnp_ndarray/test_dpnp_empty.py @@ -13,6 +13,7 @@ usm_types = ["device", "shared", "host"] +@pytest.mark.skip(reason="Disabling old dpnp.empty tests") @pytest.mark.parametrize("shape", shapes) @pytest.mark.parametrize("dtype", dtypes) @pytest.mark.parametrize("usm_type", usm_types) diff --git a/numba_dpex/tests/njit_tests/dpnp_ndarray/test_models.py b/numba_dpex/tests/njit_tests/dpnp_ndarray/test_models.py index 9dc7a5cf84..fad076294c 100644 --- a/numba_dpex/tests/njit_tests/dpnp_ndarray/test_models.py +++ b/numba_dpex/tests/njit_tests/dpnp_ndarray/test_models.py @@ -16,7 +16,9 @@ def test_model_for_DpnpNdArray(): """ - model = default_manager.lookup(DpnpNdArray(types.float64, 1, "C")) + model = default_manager.lookup( + DpnpNdArray(ndim=1, dtype=types.float64, layout="C") + ) assert isinstance(model, ArrayModel) diff --git a/numba_dpex/tests/test_debuginfo.py b/numba_dpex/tests/test_debuginfo.py index 4df788e6ac..2b02b6edca 100644 --- a/numba_dpex/tests/test_debuginfo.py +++ b/numba_dpex/tests/test_debuginfo.py @@ -15,7 +15,7 @@ debug_options = [True, False] -f32arrty = usm_ndarray(float32, 1, "C") +f32arrty = usm_ndarray(ndim=1, dtype=float32, layout="C") @pytest.fixture(params=debug_options) From 7fe8d9b85edf0ddf587b35e6a0f696801944ef04 Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Sun, 12 Feb 2023 20:57:25 -0600 Subject: [PATCH 02/15] Add basic support for dpctl.SyclQueue as a Numba type. - Adds a Numba type to represent dpctl.SyclQueue and infer it as an opaque pointer inside the compiler. --- numba_dpex/core/datamodel/models.py | 9 ++++- numba_dpex/core/types/__init__.py | 2 + numba_dpex/core/types/dpctl_types.py | 40 +++++++++++++++++++ numba_dpex/core/typing/typeof.py | 22 +++++++++- .../tests/dpjit_tests/test_box_unbox.py | 27 +++++++++++++ 5 files changed, 96 insertions(+), 4 deletions(-) create mode 100644 numba_dpex/core/types/dpctl_types.py create mode 100644 numba_dpex/tests/dpjit_tests/test_box_unbox.py diff --git a/numba_dpex/core/datamodel/models.py b/numba_dpex/core/datamodel/models.py index 4abb2182c2..4018527b5f 100644 --- a/numba_dpex/core/datamodel/models.py +++ b/numba_dpex/core/datamodel/models.py @@ -4,12 +4,13 @@ from numba.core import datamodel, types from numba.core.datamodel.models import ArrayModel as DpnpNdArrayModel -from numba.core.datamodel.models import PrimitiveModel, StructModel +from numba.core.datamodel.models import OpaqueModel, PrimitiveModel, StructModel from numba.core.extending import register_model -from numba_dpex.core.types import Array, DpnpNdArray, USMNdArray from numba_dpex.utils import address_space +from ..types import Array, DpctlSyclQueue, DpnpNdArray, USMNdArray + class GenericPointerModel(PrimitiveModel): def __init__(self, dmm, fe_type): @@ -81,3 +82,7 @@ def _init_data_model_manager(): # Register the DpnpNdArray type with the Numba ArrayModel register_model(DpnpNdArray)(DpnpNdArrayModel) dpex_data_model_manager.register(DpnpNdArray, DpnpNdArrayModel) + +# Register the DpctlSyclQueue type with Numba's OpaqueModel +register_model(DpctlSyclQueue)(OpaqueModel) +dpex_data_model_manager.register(DpctlSyclQueue, OpaqueModel) diff --git a/numba_dpex/core/types/__init__.py b/numba_dpex/core/types/__init__.py index e4a255041c..1ca210cd4b 100644 --- a/numba_dpex/core/types/__init__.py +++ b/numba_dpex/core/types/__init__.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from .array_type import Array +from .dpctl_types import DpctlSyclQueue from .dpnp_ndarray_type import DpnpNdArray from .numba_types_short_names import ( b1, @@ -31,6 +32,7 @@ __all__ = [ "Array", + "DpctlSyclQueue", "DpnpNdArray", "USMNdArray", "none", diff --git a/numba_dpex/core/types/dpctl_types.py b/numba_dpex/core/types/dpctl_types.py new file mode 100644 index 0000000000..c839864fbe --- /dev/null +++ b/numba_dpex/core/types/dpctl_types.py @@ -0,0 +1,40 @@ +# SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +from dpctl import SyclQueue +from numba import types +from numba.extending import NativeValue, box, type_callable, unbox + + +class DpctlSyclQueue(types.Type): + """A Numba type to represent a dpctl.SyclQueue PyObject. + + For now, a dpctl.SyclQueue is represented as a Numba opaque type that allows + passing in and using a SyclQueue object as an opaque pointer type inside + Numba. + """ + + def __init__(self): + super().__init__(name="DpctlSyclQueueType") + + +sycl_queue_ty = DpctlSyclQueue() + + +@type_callable(SyclQueue) +def type_interval(context): + def typer(): + return sycl_queue_ty + + return typer + + +@unbox(DpctlSyclQueue) +def unbox_sycl_queue(typ, obj, c): + return NativeValue(obj) + + +@box(DpctlSyclQueue) +def box_pyobject(typ, val, c): + return val diff --git a/numba_dpex/core/typing/typeof.py b/numba_dpex/core/typing/typeof.py index 334b3a1a5d..024180399a 100644 --- a/numba_dpex/core/typing/typeof.py +++ b/numba_dpex/core/typing/typeof.py @@ -2,15 +2,19 @@ # # SPDX-License-Identifier: Apache-2.0 +from dpctl import SyclQueue from dpctl.tensor import usm_ndarray from dpnp import ndarray +from numba.core import types from numba.extending import typeof_impl from numba.np import numpy_support -from numba_dpex.core.types.dpnp_ndarray_type import DpnpNdArray -from numba_dpex.core.types.usm_ndarray_type import USMNdArray from numba_dpex.utils import address_space +from ..types.dpctl_types import sycl_queue_ty +from ..types.dpnp_ndarray_type import DpnpNdArray +from ..types.usm_ndarray_type import USMNdArray + def _typeof_helper(val, array_class_type): """Creates a Numba type of the specified ``array_class_type`` for ``val``.""" @@ -90,3 +94,17 @@ def typeof_dpnp_ndarray(val, c): Returns: The Numba type corresponding to dpnp.ndarray """ return _typeof_helper(val, DpnpNdArray) + + +@typeof_impl.register(SyclQueue) +def typeof_dpctl_sycl_queue(val, c): + """Registers the type inference implementation function for a + dpctl.SyclQueue PyObject. + + Args: + val : An instance of dpctl.SyclQueue. + c : Unused argument used to be consistent with Numba API. + + Returns: A numba_dpex.core.types.dpctl_types.DpctlSyclQueue instance. + """ + return sycl_queue_ty diff --git a/numba_dpex/tests/dpjit_tests/test_box_unbox.py b/numba_dpex/tests/dpjit_tests/test_box_unbox.py new file mode 100644 index 0000000000..39f7fd0893 --- /dev/null +++ b/numba_dpex/tests/dpjit_tests/test_box_unbox.py @@ -0,0 +1,27 @@ +# SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +""" +Tests for boxing and unboxing of types supported inside dpjit +""" + +import dpctl +import pytest + +from numba_dpex import dpjit + + +@pytest.mark.parametrize( + "obj", + [ + pytest.param(dpctl.SyclQueue()), + ], +) +def test_boxing_unboxing(obj): + @dpjit + def func(a): + return a + + o = func(obj) + assert id(o) == id(obj) From 894fc1db5cff7754a1cdba90264338bd93ac274e Mon Sep 17 00:00:00 2001 From: Mingjie Wang Date: Sun, 12 Feb 2023 22:12:25 -0600 Subject: [PATCH 03/15] Implement a Python extension module to box and unbox dpnp.ndarray. - Copy over needed headers and private functions from Numba's nrt_python extesion. - Adds a boxing function for dpnp.ndarrays. - takes a Numba native arystruct object and creates a dpctl.tensor.usm_ndarray object and then a dpnp.ndarray PyObject and returns it. - If the arystruct object had a Numba allocated meminfo pointer, the dpctl.tensor.usm_ndarray uses the meminfo object as its base pointer. - If the arystruct object had a parent pointer, i.e., it was created from an externally passed dpnp.ndarray PyObject, returns the parent back instead of creating a new dpnp.ndarray. --- numba_dpex/core/runtime/_dpexrt_python.c | 372 +++++++++++++++++++ numba_dpex/core/runtime/_meminfo_helper.h | 51 +++ numba_dpex/core/runtime/_nrt_helper.c | 120 ++++++ numba_dpex/core/runtime/_nrt_helper.h | 23 ++ numba_dpex/core/runtime/_nrt_python_helper.c | 192 ++++++++++ numba_dpex/core/runtime/_nrt_python_helper.h | 47 +++ setup.py | 30 +- 7 files changed, 830 insertions(+), 5 deletions(-) create mode 100644 numba_dpex/core/runtime/_dpexrt_python.c create mode 100644 numba_dpex/core/runtime/_meminfo_helper.h create mode 100644 numba_dpex/core/runtime/_nrt_helper.c create mode 100644 numba_dpex/core/runtime/_nrt_helper.h create mode 100644 numba_dpex/core/runtime/_nrt_python_helper.c create mode 100644 numba_dpex/core/runtime/_nrt_python_helper.h diff --git a/numba_dpex/core/runtime/_dpexrt_python.c b/numba_dpex/core/runtime/_dpexrt_python.c new file mode 100644 index 0000000000..e006a3c5c1 --- /dev/null +++ b/numba_dpex/core/runtime/_dpexrt_python.c @@ -0,0 +1,372 @@ +// SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +//===----------------------------------------------------------------------===// +/// +/// \file +/// A Python module that provides constructors to create a Numba MemInfo +/// PyObject using a sycl USM allocator as the external memory allocator. +/// The Module also provides the Numba box and unbox implementations for a +/// dpnp.ndarray object. +/// +//===----------------------------------------------------------------------===// + +#include "dpctl_capi.h" +#include "dpctl_sycl_interface.h" + +#include "_meminfo_helper.h" +#include "_nrt_helper.h" +#include "_nrt_python_helper.h" + +#include "numba/_arraystruct.h" + +// forward declarations +static struct PyUSMArrayObject *PyUSMNdArray_ARRAYOBJ(PyObject *obj); +static PyObject *box_from_arystruct_parent(arystruct_t *arystruct, + int ndim, + PyArray_Descr *descr); +static PyObject * +DPEXRT_sycl_usm_ndarray_to_python_acqref(arystruct_t *arystruct, + PyTypeObject *retty, + int ndim, + int writeable, + PyArray_Descr *descr); + +/* + * Debugging printf function used internally + */ +void nrt_debug_print(char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); +} + +/*----------------------------------------------------------------------------*/ +/*--------- Helpers to get attributes out of a dpnp.ndarray PyObject ---------*/ +/*----------------------------------------------------------------------------*/ + +/*! + * @brief Returns the ``_array_obj`` attribute of the PyObject cast to + * PyUSMArrayObject, if no such attribute exists returns NULL. + * + * @param obj A PyObject that will be checked for an + * ``_array_obj`` attribute. + * @return {return} A PyUSMArrayObject object if the input has the + * ``_array_obj`` attribute, otherwise NULL. + */ +static struct PyUSMArrayObject *PyUSMNdArray_ARRAYOBJ(PyObject *obj) +{ + PyObject *arrayobj = NULL; + + arrayobj = PyObject_GetAttrString(obj, "_array_obj"); + + if (!arrayobj) + return NULL; + if (!PyObject_TypeCheck(arrayobj, &PyUSMArrayType)) + return NULL; + + struct PyUSMArrayObject *pyusmarrayobj = + (struct PyUSMArrayObject *)(arrayobj); + + return pyusmarrayobj; +} + +/*----- Boxing and Unboxing implementations for a dpnp.ndarray PyObject ------*/ + +/*! + * @brief A helper function that boxes a Numba arystruct_t object into a + * dpnp.ndarray PyObject using the arystruct_t's parent attribute. + * + * @param arystruct A Numba arystruct_t object. + * @param ndim Number of dimensions of the boxed array. + * @param descr A PyArray_Desc object for the dtype of the array. + * @return {return} A PyObject created from the arystruct_t->parent, if + * the PyObject could not be created return NULL. + */ +static PyObject *box_from_arystruct_parent(arystruct_t *arystruct, + int ndim, + PyArray_Descr *descr) +{ + int i; + npy_intp *p; + npy_intp *shape = NULL, *strides = NULL; + PyObject *array = arystruct->parent; + struct PyUSMArrayObject *arrayobj = NULL; + + nrt_debug_print("DPEXRT-DEBUG: In try_to_return_parent.\n"); + + if (!(arrayobj = PyUSMNdArray_ARRAYOBJ(arystruct->parent))) + return NULL; + + if ((void *)UsmNDArray_GetData(arrayobj) != arystruct->data) + return NULL; + + if (UsmNDArray_GetNDim(arrayobj) != ndim) + return NULL; + + p = arystruct->shape_and_strides; + shape = UsmNDArray_GetShape(arrayobj); + strides = UsmNDArray_GetStrides(arrayobj); + + for (i = 0; i < ndim; i++, p++) { + if (shape[i] != *p) + return NULL; + } + + if (strides) { + if (strides[i] != *p) + return NULL; + } + else { + for (i = 1; i < ndim; ++i, ++p) { + if (shape[i] != *p) + return NULL; + } + if (*p != 1) + return NULL; + } + + // At the end of boxing our Meminfo destructor gets called and that will + // decref any PyObject that was stored inside arraystruct->parent. Since, + // we are stealing the reference and returning the original PyObject, i.e., + // parent, we need to increment the reference count of the parent here. + Py_IncRef(array); + + nrt_debug_print( + "DPEXRT-DEBUG: try_to_return_parent found a valid parent.\n"); + + /* Yes, it is the same array return a new reference */ + return array; +} + +/*! + * @brief Used to implement the boxing, i.e., conversion from Numba + * representation of a dpnp.ndarray object to a dpnp.ndarray PyObject. + * + * It used to steal the reference of the arystruct. + * + * @param arystruct The Numba internal representation of a dpnp.ndarray object. + * @param retty Unused to be removed. + * @param ndim is the number of dimension of the array. + * @param writeable corresponds to the "writable" flag in the dpnp.ndarray. + * @param descr is the data type description. + * + */ +static PyObject * +DPEXRT_sycl_usm_ndarray_to_python_acqref(arystruct_t *arystruct, + PyTypeObject *retty, + int ndim, + int writeable, + PyArray_Descr *descr) +{ + PyObject *dpnp_ary = NULL; + PyObject *dpnp_array_mod = NULL; + PyObject *dpnp_array_type = NULL; + PyObject *usm_ndarr_obj = NULL; + PyObject *args = NULL; + PyTypeObject *dpnp_array_type_obj = NULL; + MemInfoObject *miobj = NULL; + npy_intp *shape = NULL, *strides = NULL; + int typenum = 0; + + nrt_debug_print( + "DPEXRT-DEBUG: In DPEXRT_sycl_usm_ndarray_to_python_acqref.\n"); + + if (descr == NULL) { + PyErr_Format( + PyExc_RuntimeError, + "In 'DPEXRT_sycl_usm_ndarray_to_python_acqref', 'descr' is NULL"); + return NULL; + } + + if (!NUMBA_PyArray_DescrCheck(descr)) { + PyErr_Format(PyExc_TypeError, "expected dtype object, got '%.200s'", + Py_TYPE(descr)->tp_name); + return NULL; + } + + // If the arystruct has a parent attribute, try to box the parent and + // return it. + if (arystruct->parent) { + nrt_debug_print("DPEXRT-DEBUG: arystruct has a parent, therefore " + "trying to box and return the parent at %s, line %d\n", + __FILE__, __LINE__); + + PyObject *obj = box_from_arystruct_parent(arystruct, ndim, descr); + if (obj) { + return obj; + } + } + + // If the arystruct has a meminfo pointer, then use the meminfo to create + // a MemInfoType PyTypeObject (_nrt_python_helper.h|c). The MemInfoType + // object will then be used to create a dpctl.tensor.usm_ndarray object and + // set as the `base` pointer of that object. The dpctl.tensor.usm_ndarray + // object will then be used to create the final boxed dpnp.ndarray object. + // + // The rationale for boxing the dpnp.ndarray from the meminfo pointer is to + // return back to Python memory that was allocated inside Numba and let + // Python manage the lifetime of the memory. + if (arystruct->meminfo) { + // wrap into MemInfoObject + miobj = PyObject_New(MemInfoObject, &MemInfoType); + args = PyTuple_New(1); + // PyTuple_SET_ITEM steals reference + PyTuple_SET_ITEM(args, 0, PyLong_FromVoidPtr(arystruct->meminfo)); + + NRT_Debug(nrt_debug_print( + "NRT_adapt_ndarray_to_python arystruct->meminfo=%p\n", + arystruct->meminfo)); + + NRT_Debug(nrt_debug_print( + "NRT_adapt_ndarray_to_python_acqref created MemInfo=%p\n", miobj)); + + // Note: MemInfo_init() does not incref. The function steals the + // NRT reference, which we need to acquire. + // Increase the refcount of the NRT_MemInfo object, i.e., mi->refct++ + NRT_MemInfo_acquire(arystruct->meminfo); + + if (MemInfo_init(miobj, args, NULL)) { + NRT_Debug(nrt_debug_print("MemInfo_init failed.\n")); + return NULL; + } + Py_DECREF(args); + } + + shape = arystruct->shape_and_strides; + strides = shape + ndim; + + typenum = descr->type_num; + usm_ndarr_obj = UsmNDArray_MakeFromPtr( + ndim, shape, typenum, strides, (DPCTLSyclUSMRef)arystruct->data, + (DPCTLSyclQueueRef)miobj->meminfo->external_allocator->opaque_data, 0, + (PyObject *)miobj); + + if (usm_ndarr_obj == NULL || + !PyObject_TypeCheck(usm_ndarr_obj, &PyUSMArrayType)) + { + return NULL; + } + + // call new on dpnp_array + dpnp_array_mod = PyImport_ImportModule("dpnp.dpnp_array"); + if (!dpnp_array_mod) { + return MOD_ERROR_VAL; + } + dpnp_array_type = PyObject_GetAttrString(dpnp_array_mod, "dpnp_array"); + + if (!PyType_Check(dpnp_array_type)) { + Py_DECREF(dpnp_array_mod); + Py_XDECREF(dpnp_array_type); + return MOD_ERROR_VAL; + } + + Py_DECREF(dpnp_array_mod); + + dpnp_array_type_obj = (PyTypeObject *)(dpnp_array_type); + + dpnp_ary = (PyObject *)dpnp_array_type_obj->tp_new( + dpnp_array_type_obj, PyTuple_New(0), PyDict_New()); + + if (dpnp_ary == NULL) { + nrt_debug_print("dpnp_ary==NULL \n"); + } + else { + nrt_debug_print("dpnp_ary=%p \n", dpnp_ary); + } + int status = PyObject_SetAttrString((PyObject *)dpnp_ary, "_array_obj", + usm_ndarr_obj); + nrt_debug_print("returning from status \n"); + if (status == -1) { + nrt_debug_print("returning from status ==NULL \n"); + Py_DECREF(dpnp_array_type_obj); + PyErr_SetString(PyExc_TypeError, "Oh no!"); + return (PyObject *)NULL; + } + nrt_debug_print( + "returning from DPEXRT_sycl_usm_ndarray_to_python_acqref 1 \n"); + + if (dpnp_ary == NULL) { + nrt_debug_print( + "returning from DPEXRT_sycl_usm_ndarray_to_python_acqref 2\n"); + return NULL; + } + + nrt_debug_print( + "returning from DPEXRT_sycl_usm_ndarray_to_python_acqref\n"); + return (PyObject *)dpnp_ary; +} + +/*----------------------------------------------------------------------------*/ +/*--------------------- The _dpexrt_python Python extension module -- -------*/ +/*----------------------------------------------------------------------------*/ + +static PyObject *build_c_helpers_dict(void) +{ + PyObject *dct = PyDict_New(); + if (dct == NULL) + goto error; + +#define _declpointer(name, value) \ + do { \ + PyObject *o = PyLong_FromVoidPtr(value); \ + if (o == NULL) \ + goto error; \ + if (PyDict_SetItemString(dct, name, o)) { \ + Py_DECREF(o); \ + goto error; \ + } \ + Py_DECREF(o); \ + } while (0) + + _declpointer("DPEXRT_sycl_usm_ndarray_to_python_acqref", + &DPEXRT_sycl_usm_ndarray_to_python_acqref); + +#undef _declpointer + return dct; +error: + Py_XDECREF(dct); + return NULL; +} + +/*--------- Builder for the _dpexrt_python Python extension module -- -------*/ + +MOD_INIT(_dpexrt_python) +{ + PyObject *m; + MOD_DEF(m, "_dpexrt_python", "No docs", NULL) + if (m == NULL) + return MOD_ERROR_VAL; + + import_array(); + import_dpctl(); + + PyObject *dpnp_array_mod = PyImport_ImportModule("dpnp.dpnp_array"); + + if (!dpnp_array_mod) { + Py_DECREF(m); + return MOD_ERROR_VAL; + } + PyObject *dpnp_array_type = + PyObject_GetAttrString(dpnp_array_mod, "dpnp_array"); + if (!PyType_Check(dpnp_array_type)) { + Py_DECREF(m); + Py_DECREF(dpnp_array_mod); + Py_XDECREF(dpnp_array_type); + return MOD_ERROR_VAL; + } + PyModule_AddObject(m, "dpnp_array_type", dpnp_array_type); + + Py_DECREF(dpnp_array_mod); + + PyModule_AddObject( + m, "DPEXRT_sycl_usm_ndarray_to_python_acqref", + PyLong_FromVoidPtr(&DPEXRT_sycl_usm_ndarray_to_python_acqref)); + + PyModule_AddObject(m, "c_helpers", build_c_helpers_dict()); + return MOD_SUCCESS_VAL(m); +} diff --git a/numba_dpex/core/runtime/_meminfo_helper.h b/numba_dpex/core/runtime/_meminfo_helper.h new file mode 100644 index 0000000000..c3b306808a --- /dev/null +++ b/numba_dpex/core/runtime/_meminfo_helper.h @@ -0,0 +1,51 @@ +// SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef _NRT_ARRAY_STRUCT_H_ +#define _NRT_ARRAY_STRUCT_H_ +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include + +#include +#include +#include + +#include "numba/_numba_common.h" +#include "numba/_pymodule.h" +#include "numba/core/runtime/nrt.h" + +/* + * The MemInfo structure. + * NOTE: copy from numba/core/runtime/nrt.c + */ +struct MemInfo +{ + size_t refct; + NRT_dtor_function dtor; + void *dtor_info; + void *data; + size_t size; /* only used for NRT allocated memory */ + NRT_ExternalAllocator *external_allocator; +}; + +/*! + * @brief A wrapper struct to store a MemInfo pointer along with the PyObject + * that is associated with the MeMinfo. + * + * The struct is stored in the dtor_info attribute of a MemInfo object and + * used by the destructor to free the MemInfo and DecRef the Pyobject. + * + */ +typedef struct +{ + PyObject *owner; + NRT_MemInfo *mi; +} MemInfoDtorInfo; + +typedef struct +{ + PyObject_HEAD NRT_MemInfo *meminfo; +} MemInfoObject; + +#endif /* _NRT_ARRAY_STRUCT_H_ */ diff --git a/numba_dpex/core/runtime/_nrt_helper.c b/numba_dpex/core/runtime/_nrt_helper.c new file mode 100644 index 0000000000..576d704a4f --- /dev/null +++ b/numba_dpex/core/runtime/_nrt_helper.c @@ -0,0 +1,120 @@ +// SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +#include "_nrt_helper.h" +#include +#include +#include +/* + * Global resources. + */ +struct NRT_MemSys +{ + /* Shutdown flag */ + int shutting; + /* Stats */ + struct + { + bool enabled; + atomic_size_t alloc; + atomic_size_t free; + atomic_size_t mi_alloc; + atomic_size_t mi_free; + } stats; + /* System allocation functions */ + struct + { + NRT_malloc_func malloc; + NRT_realloc_func realloc; + NRT_free_func free; + } allocator; +}; + +/* The Memory System object */ +static struct NRT_MemSys TheMSys; + +// following funcs are copied from numba/core/runtime/nrt.cpp +void *NRT_MemInfo_external_allocator(NRT_MemInfo *mi) +{ + NRT_Debug(nrt_debug_print( + "NRT_MemInfo_external_allocator meminfo: %p external_allocator: %p\n", + mi, mi->external_allocator)); + return mi->external_allocator; +} + +void *NRT_MemInfo_data(NRT_MemInfo *mi) { return mi->data; } + +void NRT_MemInfo_release(NRT_MemInfo *mi) +{ + assert(mi->refct > 0 && "RefCt cannot be 0"); + /* RefCt drop to zero */ + if ((--(mi->refct)) == 0) { + NRT_MemInfo_call_dtor(mi); + } +} + +void NRT_MemInfo_call_dtor(NRT_MemInfo *mi) +{ + NRT_Debug(nrt_debug_print("NRT_MemInfo_call_dtor %p\n", mi)); + if (mi->dtor && !TheMSys.shutting) + /* We have a destructor and the system is not shutting down */ + mi->dtor(mi->data, mi->size, mi->dtor_info); + /* Clear and release MemInfo */ + NRT_MemInfo_destroy(mi); +} + +void NRT_MemInfo_acquire(NRT_MemInfo *mi) +{ + // NRT_Debug(nrt_debug_print("NRT_MemInfo_acquire %p refct=%zu\n", mi, + // mi->refct.load())); + assert(mi->refct > 0 && "RefCt cannot be zero"); + mi->refct++; +} + +size_t NRT_MemInfo_size(NRT_MemInfo *mi) { return mi->size; } + +void *NRT_MemInfo_parent(NRT_MemInfo *mi) { return mi->dtor_info; } + +size_t NRT_MemInfo_refcount(NRT_MemInfo *mi) +{ + /* Should never returns 0 for a valid MemInfo */ + if (mi && mi->data) + return mi->refct; + else { + return (size_t)-1; + } +} + +void NRT_Free(void *ptr) +{ + NRT_Debug(nrt_debug_print("NRT_Free %p\n", ptr)); + TheMSys.allocator.free(ptr); + if (TheMSys.stats.enabled) { + TheMSys.stats.free++; + } +} + +void NRT_dealloc(NRT_MemInfo *mi) +{ + NRT_Debug( + nrt_debug_print("NRT_dealloc meminfo: %p external_allocator: %p\n", mi, + mi->external_allocator)); + if (mi->external_allocator) { + mi->external_allocator->free(mi, mi->external_allocator->opaque_data); + if (TheMSys.stats.enabled) { + TheMSys.stats.free++; + } + } + else { + NRT_Free(mi); + } +} + +void NRT_MemInfo_destroy(NRT_MemInfo *mi) +{ + NRT_dealloc(mi); + if (TheMSys.stats.enabled) { + TheMSys.stats.mi_free++; + } +} diff --git a/numba_dpex/core/runtime/_nrt_helper.h b/numba_dpex/core/runtime/_nrt_helper.h new file mode 100644 index 0000000000..4a2a7455ec --- /dev/null +++ b/numba_dpex/core/runtime/_nrt_helper.h @@ -0,0 +1,23 @@ +// SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef _NRT_HELPER_H_ +#define _NRT_HELPER_H_ + +#define NO_IMPORT_ARRAY +#include "_meminfo_helper.h" + +void *NRT_MemInfo_external_allocator(NRT_MemInfo *mi); +void *NRT_MemInfo_data(NRT_MemInfo *mi); +void NRT_MemInfo_release(NRT_MemInfo *mi); +void NRT_MemInfo_call_dtor(NRT_MemInfo *mi); +void NRT_MemInfo_acquire(NRT_MemInfo *mi); +size_t NRT_MemInfo_size(NRT_MemInfo *mi); +void *NRT_MemInfo_parent(NRT_MemInfo *mi); +size_t NRT_MemInfo_refcount(NRT_MemInfo *mi); +void NRT_Free(void *ptr); +void NRT_dealloc(NRT_MemInfo *mi); +void NRT_MemInfo_destroy(NRT_MemInfo *mi); + +#endif /* _NRT_HELPER_H_ */ diff --git a/numba_dpex/core/runtime/_nrt_python_helper.c b/numba_dpex/core/runtime/_nrt_python_helper.c new file mode 100644 index 0000000000..fb356e2ef3 --- /dev/null +++ b/numba_dpex/core/runtime/_nrt_python_helper.c @@ -0,0 +1,192 @@ +// SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +/* + * Definition of NRT functions for marshalling from / to Python objects. + * This module is included by _nrt_pythonmod.c and by pycc-compiled modules. + */ + +#include "_nrt_python_helper.h" + +static PyBufferProcs MemInfo_bufferProcs = {MemInfo_getbuffer, NULL}; + +// following funcs are copied from numba/core/runtime/_nrt_python.c +void MemInfo_dealloc(MemInfoObject *self) +{ + NRT_MemInfo_release(self->meminfo); + Py_TYPE(self)->tp_free((PyObject *)self); +} + +int MemInfo_init(MemInfoObject *self, PyObject *args, PyObject *kwds) +{ + static char *keywords[] = {"ptr", NULL}; + PyObject *raw_ptr_obj; + void *raw_ptr; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O", keywords, &raw_ptr_obj)) { + return -1; + } + raw_ptr = PyLong_AsVoidPtr(raw_ptr_obj); + NRT_Debug( + nrt_debug_print("MemInfo_init self=%p raw_ptr=%p\n", self, raw_ptr)); + + if (PyErr_Occurred()) + return -1; + self->meminfo = (NRT_MemInfo *)raw_ptr; + assert(NRT_MemInfo_refcount(self->meminfo) > 0 && "0 refcount"); + return 0; +} + +int MemInfo_getbuffer(PyObject *exporter, Py_buffer *view, int flags) +{ + Py_ssize_t len; + void *buf; + int readonly = 0; + + MemInfoObject *miobj = (MemInfoObject *)exporter; + NRT_MemInfo *mi = miobj->meminfo; + + buf = NRT_MemInfo_data(mi); + len = NRT_MemInfo_size(mi); + return PyBuffer_FillInfo(view, exporter, buf, len, readonly, flags); +} + +PyObject *MemInfo_acquire(MemInfoObject *self) +{ + NRT_MemInfo_acquire(self->meminfo); + Py_RETURN_NONE; +} + +PyObject *MemInfo_release(MemInfoObject *self) +{ + NRT_MemInfo_release(self->meminfo); + Py_RETURN_NONE; +} + +PyObject *MemInfo_get_data(MemInfoObject *self, void *closure) +{ + return PyLong_FromVoidPtr(NRT_MemInfo_data(self->meminfo)); +} + +PyObject *MemInfo_get_refcount(MemInfoObject *self, void *closure) +{ + size_t refct = NRT_MemInfo_refcount(self->meminfo); + if (refct == (size_t)-1) { + PyErr_SetString(PyExc_ValueError, "invalid MemInfo"); + return NULL; + } + return PyLong_FromSize_t(refct); +} + +PyObject *MemInfo_get_external_allocator(MemInfoObject *self, void *closure) +{ + void *p = NRT_MemInfo_external_allocator(self->meminfo); + return PyLong_FromVoidPtr(p); +} + +PyObject *MemInfo_get_parent(MemInfoObject *self, void *closure) +{ + void *p = NRT_MemInfo_parent(self->meminfo); + if (p) { + Py_INCREF(p); + return (PyObject *)p; + } + else { + Py_INCREF(Py_None); + return Py_None; + } +} + +PyMethodDef MemInfo_methods[] = { + {"acquire", (PyCFunction)MemInfo_acquire, METH_NOARGS, + "Increment the reference count"}, + {"release", (PyCFunction)MemInfo_release, METH_NOARGS, + "Decrement the reference count"}, + {NULL} /* Sentinel */ +}; + +PyGetSetDef MemInfo_getsets[] = { + {"data", (getter)MemInfo_get_data, NULL, + "Get the data pointer as an integer", NULL}, + {"refcount", (getter)MemInfo_get_refcount, NULL, "Get the refcount", NULL}, + {"external_allocator", (getter)MemInfo_get_external_allocator, NULL, + "Get the external allocator", NULL}, + {"parent", (getter)MemInfo_get_parent, NULL, NULL}, + {NULL} /* Sentinel */ +}; + +PyTypeObject MemInfoType = { + PyVarObject_HEAD_INIT(NULL, 0) "_dpexrt_python._MemInfo", /* tp_name */ + sizeof(MemInfoObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)MemInfo_dealloc, /* tp_dealloc */ + 0, /* tp_vectorcall_offset */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_as_async */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + &MemInfo_bufferProcs, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + MemInfo_methods, /* tp_methods */ + 0, /* tp_members */ + MemInfo_getsets, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)MemInfo_init, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ + 0, /* tp_finalize */ + /* The docs suggest Python 3.8 has no tp_vectorcall + * https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Doc/c-api/typeobj.rst?plain=1#L146 + * but the header has it: + * https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Include/cpython/object.h#L257 + */ + 0, /* tp_vectorcall */ +#if (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION == 8) + /* This is Python 3.8 only. + * See: https://github.com/python/cpython/blob/3.8/Include/cpython/object.h + * there's a tp_print preserved for backwards compatibility. xref: + * https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Include/cpython/object.h#L260 + */ + 0, /* tp_print */ +#endif + +/* WARNING: Do not remove this, only modify it! It is a version guard to + * act as a reminder to update this struct on Python version update! */ +#if (PY_MAJOR_VERSION == 3) +#if !((PY_MINOR_VERSION == 8) || (PY_MINOR_VERSION == 9) || \ + (PY_MINOR_VERSION == 10)) +#error "Python minor version is not supported." +#endif +#else +#error "Python major version is not supported." +#endif + /* END WARNING*/ +}; diff --git a/numba_dpex/core/runtime/_nrt_python_helper.h b/numba_dpex/core/runtime/_nrt_python_helper.h new file mode 100644 index 0000000000..444e23152d --- /dev/null +++ b/numba_dpex/core/runtime/_nrt_python_helper.h @@ -0,0 +1,47 @@ +// SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +//===----------------------------------------------------------------------===// +/// +/// \file +/// Re-definition of NRT functions for marshalling from / to Python objects +/// defined in numba/core/runtime/_nrt_python.c. +/// +//===----------------------------------------------------------------------===// + +#ifndef _NRT_PYTHON_HELPER_H_ +#define _NRT_PYTHON_HELPER_H_ + +#define NO_IMPORT_ARRAY +#include "_meminfo_helper.h" + +/*! + * @brief A pyTypeObject to describe a Python object to wrap Numba's MemInfo + * + */ +extern PyTypeObject MemInfoType; + +void MemInfo_dealloc(MemInfoObject *self); +int MemInfo_init(MemInfoObject *self, PyObject *args, PyObject *kwds); +int MemInfo_getbuffer(PyObject *exporter, Py_buffer *view, int flags); +PyObject *MemInfo_acquire(MemInfoObject *self); +PyObject *MemInfo_release(MemInfoObject *self); +PyObject *MemInfo_get_data(MemInfoObject *self, void *closure); +PyObject *MemInfo_get_refcount(MemInfoObject *self, void *closure); +PyObject *MemInfo_get_external_allocator(MemInfoObject *self, void *closure); +PyObject *MemInfo_get_parent(MemInfoObject *self, void *closure); + +/* WARNING: Do not remove this, only modify it! It is a version guard to + * act as a reminder to update this struct on Python version update! */ +#if (PY_MAJOR_VERSION == 3) +#if !((PY_MINOR_VERSION == 8) || (PY_MINOR_VERSION == 9) || \ + (PY_MINOR_VERSION == 10)) +#error "Python minor version is not supported." +#endif +#else +#error "Python major version is not supported." +#endif +/* END WARNING*/ + +#endif /* _NRT_PYTHON_HELPER_H_ */ diff --git a/setup.py b/setup.py index 00179ec224..a1ea4a4924 100644 --- a/setup.py +++ b/setup.py @@ -3,9 +3,14 @@ # SPDX-License-Identifier: Apache-2.0 import os +import shutil import subprocess import sys +import sysconfig +import dpctl +import numba +import numpy import setuptools.command.develop as orig_develop import setuptools.command.install as orig_install from Cython.Build import cythonize @@ -35,9 +40,6 @@ def get_ext_modules(): else: raise ImportError("DPNP is not available") - import dpctl - import numba - dpctl_runtime_library_dirs = [] if IS_LIN: @@ -67,6 +69,26 @@ def get_ext_modules(): ) ext_modules += [ext_dpnp_iface] + ext_dpexrt_python = Extension( + name="numba_dpex.core.runtime._dpexrt_python", + sources=[ + "numba_dpex/core/runtime/_dpexrt_python.c", + "numba_dpex/core/runtime/_nrt_helper.c", + "numba_dpex/core/runtime/_nrt_python_helper.c", + ], + libraries=["DPCTLSyclInterface"], + library_dirs=[os.path.dirname(dpctl.__file__)], + runtime_library_dirs=dpctl_runtime_library_dirs, + include_dirs=[ + sysconfig.get_paths()["include"], + numba.extending.include_path(), + numpy.get_include(), + dpctl.get_include(), + ], + ) + + ext_modules += [ext_dpexrt_python] + if dpnp_present: return cythonize(ext_modules) else: @@ -138,8 +160,6 @@ def spirv_compile(): def _llvm_spirv(): """Return path to llvm-spirv executable.""" - import shutil - result = None # use llvm-spirv from dpcpp package. From a57b5a0974be215f8001b743d41433006b5554d7 Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Sun, 12 Feb 2023 22:25:45 -0600 Subject: [PATCH 04/15] Unboxing helper function for dpnp.ndarray to _dpexrt_python.c - Adds an unboxing helper function to the _dpexrt_python.c for dpnp.ndarray. - Implements helper functions to create and destroy an NRT_ExternalAllocator object that uses usm allocators. --- numba_dpex/core/runtime/_dpexrt_python.c | 439 +++++++++++++++++++++++ 1 file changed, 439 insertions(+) diff --git a/numba_dpex/core/runtime/_dpexrt_python.c b/numba_dpex/core/runtime/_dpexrt_python.c index e006a3c5c1..ec6d10af99 100644 --- a/numba_dpex/core/runtime/_dpexrt_python.c +++ b/numba_dpex/core/runtime/_dpexrt_python.c @@ -23,9 +23,26 @@ // forward declarations static struct PyUSMArrayObject *PyUSMNdArray_ARRAYOBJ(PyObject *obj); +static npy_intp product_of_shape(npy_intp *shape, npy_intp ndim); +static void *usm_device_malloc(size_t size, void *opaque_data); +static void *usm_shared_malloc(size_t size, void *opaque_data); +static void *usm_host_malloc(size_t size, void *opaque_data); +static void usm_free(void *data, void *opaque_data); +static NRT_ExternalAllocator * +NRT_ExternalAllocator_new_for_usm(DPCTLSyclQueueRef qref, size_t usm_type); +static MemInfoDtorInfo *MemInfoDtorInfo_new(NRT_MemInfo *mi, PyObject *owner); +static NRT_MemInfo *NRT_MemInfo_new_from_usmndarray(PyObject *ndarrobj, + void *data, + npy_intp nitems, + npy_intp itemsize, + DPCTLSyclQueueRef qref); +static void usmndarray_meminfo_dtor(void *ptr, size_t size, void *info); static PyObject *box_from_arystruct_parent(arystruct_t *arystruct, int ndim, PyArray_Descr *descr); + +static int DPEXRT_sycl_usm_ndarray_from_python(PyObject *obj, + arystruct_t *arystruct); static PyObject * DPEXRT_sycl_usm_ndarray_to_python_acqref(arystruct_t *arystruct, PyTypeObject *retty, @@ -45,6 +62,283 @@ void nrt_debug_print(char *fmt, ...) va_end(args); } +/** An NRT_external_malloc_func implementation using DPCTLmalloc_device. + * + */ +static void *usm_device_malloc(size_t size, void *opaque_data) +{ + DPCTLSyclQueueRef qref = NULL; + + qref = (DPCTLSyclQueueRef)opaque_data; + return DPCTLmalloc_device(size, qref); +} + +/** An NRT_external_malloc_func implementation using DPCTLmalloc_shared. + * + */ +static void *usm_shared_malloc(size_t size, void *opaque_data) +{ + DPCTLSyclQueueRef qref = NULL; + + qref = (DPCTLSyclQueueRef)opaque_data; + return DPCTLmalloc_shared(size, qref); +} + +/** An NRT_external_malloc_func implementation using DPCTLmalloc_host. + * + */ +static void *usm_host_malloc(size_t size, void *opaque_data) +{ + DPCTLSyclQueueRef qref = NULL; + + qref = (DPCTLSyclQueueRef)opaque_data; + return DPCTLmalloc_host(size, qref); +} + +/** An NRT_external_free_func implementation based on DPCTLfree_with_queue + * + */ +static void usm_free(void *data, void *opaque_data) +{ + DPCTLSyclQueueRef qref = NULL; + qref = (DPCTLSyclQueueRef)opaque_data; + + DPCTLfree_with_queue(data, qref); +} + +/*! + * @brief Creates a new NRT_ExternalAllocator object tied to a SYCL USM + * allocator. + * + * @param qref A DPCTLSyclQueueRef opaque pointer for a sycl queue. + * @param usm_type Indicates the type of usm allocator to use. + * - 1: device + * - 2: shared + * - 3: host + * The values are as defined in the DPCTLSyclUSMType + * enum in dpctl's libsyclinterface library. + * @return {return} A new NRT_ExternalAllocator object or NULL if + * object creation failed. + */ +static NRT_ExternalAllocator * +NRT_ExternalAllocator_new_for_usm(DPCTLSyclQueueRef qref, size_t usm_type) +{ + + NRT_ExternalAllocator *allocator = NULL; + + allocator = (NRT_ExternalAllocator *)malloc(sizeof(NRT_ExternalAllocator)); + if (allocator == NULL) { + nrt_debug_print("DPEXRT-ERROR: failed to allocate memory for " + "NRT_ExternalAllocator at %s, line %d.\n", + __FILE__, __LINE__); + goto error; + } + nrt_debug_print("DPEXRT-DEBUG: usm type = %d at %s, line %d.\n", usm_type, + __FILE__, __LINE__); + + switch (usm_type) { + case 1: + allocator->malloc = usm_device_malloc; + break; + case 2: + allocator->malloc = usm_shared_malloc; + break; + case 3: + allocator->malloc = usm_host_malloc; + break; + default: + nrt_debug_print("DPEXRT-ERROR: Encountered an unknown usm " + "allocation type (%d) at %s, line %d\n", + usm_type, __FILE__, __LINE__); + goto error; + } + + allocator->realloc = NULL; + allocator->free = usm_free; + allocator->opaque_data = (void *)qref; + + return allocator; + +error: + free(allocator); + return NULL; +} + +/*! + * @brief Destructor function for a MemInfo object allocated inside DPEXRT. The + * destructor is called by Numba using the NRT_MemInfo_release function. + * + * The destructor does the following clean up: + * - Frees the data associated with the MemInfo object if there was no + * parent PyObject that owns the data. + * - Frees the DpctlSyclQueueRef pointer stored in the opaque data of the + * MemInfo's external_allocator member. + * - Frees the external_allocator object associated with the MemInfo object. + * - If there was a PyObject associated with the MemInfo, then + * the reference count on that object. + * - Frees the MemInfoDtorInfo wrapper object that was stored as the + * dtor_info member of the MemInfo. + * + * @param ptr *Unused*, the argument is required to match the + * type of the NRT_dtor_function pointer type. + * @param size *Unused*, the argument is required to match the + * type of the NRT_dtor_function pointer type. + * @param info A MemInfoDtorInfo object that stores a reference to + * the parent meminfo and any original PyObject from + * which the meminfo was created. + */ +static void usmndarray_meminfo_dtor(void *ptr, size_t size, void *info) +{ + MemInfoDtorInfo *mi_dtor_info = NULL; + + // Sanity-check to make sure the mi_dtor_info is an actual pointer. + if (!(mi_dtor_info = (MemInfoDtorInfo *)info)) { + nrt_debug_print( + "DPEXRT-ERROR: MemInfoDtorInfo object might be corrupted. Aborting " + "MemInfo destruction at %s, line %d\n", + __FILE__, __LINE__); + return; + } + + // If there is no owner PyObject, free the data by calling the + // external_allocator->free + if (!(mi_dtor_info->owner)) + mi_dtor_info->mi->external_allocator->free( + mi_dtor_info->mi->data, + mi_dtor_info->mi->external_allocator->opaque_data); + + // free the DpctlSyclQueueRef object stored inside the external_allocator + DPCTLQueue_Delete( + (DPCTLSyclQueueRef)mi_dtor_info->mi->external_allocator->opaque_data); + + // free the external_allocator object + free(mi_dtor_info->mi->external_allocator); + + // Set the pointer to NULL to prevent NRT_dealloc trying to use it free + // the meminfo object + mi_dtor_info->mi->external_allocator = NULL; + + if (mi_dtor_info->owner) { + // Decref the Pyobject from which the MemInfo was created + PyGILState_STATE gstate; + PyObject *ownerobj = mi_dtor_info->owner; + // ensure the GIL + gstate = PyGILState_Ensure(); + // decref the python object + Py_DECREF(ownerobj); + // release the GIL + PyGILState_Release(gstate); + } + + // Free the MemInfoDtorInfo object + free(mi_dtor_info); +} + +/*! + * @brief Allocates and returns a new MemInfoDtorInfo object. + * + * @param mi The parent NRT_MemInfo object for which the + * dtor_info attribute is being created. + * @param owner A PyObject from which the NRT_MemInfo object was + * created, maybe NULL if no such object exists. + * @return {return} A new MemInfoDtorInfo object. + */ +static MemInfoDtorInfo *MemInfoDtorInfo_new(NRT_MemInfo *mi, PyObject *owner) +{ + MemInfoDtorInfo *mi_dtor_info = NULL; + + if (!(mi_dtor_info = (MemInfoDtorInfo *)malloc(sizeof(MemInfoDtorInfo)))) { + nrt_debug_print("DPEXRT-ERROR: Could not allocate a new " + "MemInfoDtorInfo object at %s, line %d\n", + __FILE__, __LINE__); + return NULL; + } + mi_dtor_info->mi = mi; + mi_dtor_info->owner = owner; + + return mi_dtor_info; +} + +/*! + * @brief Creates a NRT_MemInfo object for a dpnp.ndarray + * + * @param ndarrobj An dpnp.ndarray PyObject + * @param data The data pointer of the dpnp.ndarray + * @param nitems The number of elements in the dpnp.ndarray. + * @param itemsize The size of each element of the dpnp.ndarray. + * @param qref A SYCL queue pointer wrapper on which the memory + * of the dpnp.ndarray was allocated. + * @return {return} A new NRT_MemInfo object + */ +static NRT_MemInfo *NRT_MemInfo_new_from_usmndarray(PyObject *ndarrobj, + void *data, + npy_intp nitems, + npy_intp itemsize, + DPCTLSyclQueueRef qref) +{ + NRT_MemInfo *mi = NULL; + NRT_ExternalAllocator *ext_alloca = NULL; + MemInfoDtorInfo *midtor_info = NULL; + DPCTLSyclContextRef cref = NULL; + + // Allocate a new NRT_MemInfo object + if (!(mi = (NRT_MemInfo *)malloc(sizeof(NRT_MemInfo)))) { + nrt_debug_print("DPEXRT-ERROR: Could not allocate a new NRT_MemInfo " + "object at %s, line %d\n", + __FILE__, __LINE__); + goto error; + } + + if (!(cref = DPCTLQueue_GetContext(qref))) { + nrt_debug_print("DPEXRT-ERROR: Could not get the DPCTLSyclContext from " + "the queue object at %s, line %d\n", + __FILE__, __LINE__); + goto error; + } + + size_t usm_type = (size_t)DPCTLUSM_GetPointerType(data, cref); + DPCTLContext_Delete(cref); + + // Allocate a new NRT_ExternalAllocator + if (!(ext_alloca = NRT_ExternalAllocator_new_for_usm(qref, usm_type))) { + nrt_debug_print("DPEXRT-ERROR: Could not allocate a new " + "NRT_ExternalAllocator object at %s, line %d\n", + __FILE__, __LINE__); + goto error; + } + + // Allocate a new MemInfoDtorInfo + if (!(midtor_info = MemInfoDtorInfo_new(mi, ndarrobj))) { + nrt_debug_print("DPEXRT-ERROR: Could not allocate a new " + "MemInfoDtorInfo object at %s, line %d\n", + __FILE__, __LINE__); + goto error; + } + + // Initialize the NRT_MemInfo object + mi->refct = 1; /* starts with 1 refct */ + mi->dtor = usmndarray_meminfo_dtor; + mi->dtor_info = midtor_info; + mi->data = data; + mi->size = nitems * itemsize; + mi->external_allocator = ext_alloca; + + nrt_debug_print( + "DPEXRT-DEBUG: NRT_MemInfo_init mi=%p external_allocator=%p\n", mi, + ext_alloca); + + return mi; + +error: + nrt_debug_print( + "DPEXRT-ERROR: Failed inside NRT_MemInfo_new_from_usmndarray clean up " + "and return NULL at %s, line %d\n", + __FILE__, __LINE__); + free(mi); + free(ext_alloca); + return NULL; +} + /*----------------------------------------------------------------------------*/ /*--------- Helpers to get attributes out of a dpnp.ndarray PyObject ---------*/ /*----------------------------------------------------------------------------*/ @@ -75,8 +369,144 @@ static struct PyUSMArrayObject *PyUSMNdArray_ARRAYOBJ(PyObject *obj) return pyusmarrayobj; } +/*! + * @brief Returns the product of the elements in an array of a given + * length. + * + * @param shape An array of integers + * @param ndim The length of the ``shape`` array. + * @return {return} The product of the elements in the ``shape`` array. + */ +static npy_intp product_of_shape(npy_intp *shape, npy_intp ndim) +{ + npy_intp nelems = 1; + + for (int i = 0; i < ndim; ++i) + nelems *= shape[i]; + + return nelems; +} + /*----- Boxing and Unboxing implementations for a dpnp.ndarray PyObject ------*/ +/*! + * @brief Unboxes a PyObject that may represent a dpnp.ndarray into a Numba + * native represetation. + * + * @param obj A Python object that may be a dpnp.ndarray + * @param arystruct Numba's internal native represnetation for a given + * instance of a dpnp.ndarray + * @return {return} Error code representing success (0) or failure (-1). + */ +static int DPEXRT_sycl_usm_ndarray_from_python(PyObject *obj, + arystruct_t *arystruct) +{ + struct PyUSMArrayObject *arrayobj = NULL; + int i, ndim; + npy_intp *shape = NULL, *strides = NULL; + npy_intp *p = NULL, nitems, itemsize; + void *data = NULL; + DPCTLSyclQueueRef qref = NULL; + PyGILState_STATE gstate; + + // Increment the ref count on obj to prevent CPython from garbage + // collecting the array. + Py_IncRef(obj); + + nrt_debug_print("DPEXRT-DEBUG: In DPEXRT_sycl_usm_ndarray_from_python.\n"); + + // Check if the PyObject obj has an _array_obj attribute that is of + // dpctl.tensor.usm_ndarray type. + if (!(arrayobj = PyUSMNdArray_ARRAYOBJ(obj))) { + nrt_debug_print("DPEXRT-ERROR: PyUSMNdArray_ARRAYOBJ check failed %d\n", + __FILE__, __LINE__); + goto error; + } + + if (!(ndim = UsmNDArray_GetNDim(arrayobj))) { + nrt_debug_print( + "DPEXRT-ERROR: UsmNDArray_GetNDim returned 0 at %s, line %d\n", + __FILE__, __LINE__); + goto error; + } + shape = UsmNDArray_GetShape(arrayobj); + strides = UsmNDArray_GetStrides(arrayobj); + data = (void *)UsmNDArray_GetData(arrayobj); + nitems = product_of_shape(shape, ndim); + itemsize = (npy_intp)UsmNDArray_GetElementSize(arrayobj); + if (!(qref = UsmNDArray_GetQueueRef(arrayobj))) { + nrt_debug_print("DPEXRT-ERROR: UsmNDArray_GetQueueRef returned NULL at " + "%s, line %d.\n", + __FILE__, __LINE__); + goto error; + } + else { + nrt_debug_print("qref addr : %p\n", qref); + } + + if (!(arystruct->meminfo = NRT_MemInfo_new_from_usmndarray( + obj, data, nitems, itemsize, qref))) + { + nrt_debug_print("DPEXRT-ERROR: NRT_MemInfo_new_from_usmndarray failed " + "at %s, line %d.\n", + __FILE__, __LINE__); + goto error; + } + + arystruct->data = data; + arystruct->nitems = nitems; + arystruct->itemsize = itemsize; + arystruct->parent = obj; + + p = arystruct->shape_and_strides; + + for (i = 0; i < ndim; ++i, ++p) + *p = shape[i]; + + // DPCTL returns a NULL pointer if the array is contiguous + // FIXME: Stride computation should check order and adjust how strides are + // calculated. Right now strides are assuming that order is C contigous. + if (strides) { + for (i = 0; i < ndim; ++i, ++p) { + *p = strides[i]; + } + } + else { + for (i = 1; i < ndim; ++i, ++p) { + *p = shape[i]; + } + *p = 1; + } + + // --- DEBUG + nrt_debug_print("DPEXRT-DEBUG: Assigned shape_and_strides at %s, line %d\n", + __FILE__, __LINE__); + p = arystruct->shape_and_strides; + for (i = 0; i < ndim * 2; ++i, ++p) { + nrt_debug_print("DPEXRT-DEBUG: arraystruct->p[%d] = %d, ", i, *p); + } + nrt_debug_print("\n"); + // -- DEBUG + + return 0; + +error: + // If the check failed then decrement the refcount and return an error + // code of -1. + // Decref the Pyobject of the array + // ensure the GIL + nrt_debug_print("DPEXRT-ERROR: Failed to unbox dpnp ndarray into a Numba " + "arraystruct at %s, line %d\n", + __FILE__, __LINE__); + gstate = PyGILState_Ensure(); + // decref the python object + Py_DECREF(obj); + // release the GIL + PyGILState_Release(gstate); + + return -1; +} + /*! * @brief A helper function that boxes a Numba arystruct_t object into a * dpnp.ndarray PyObject using the arystruct_t's parent attribute. @@ -323,8 +753,12 @@ static PyObject *build_c_helpers_dict(void) Py_DECREF(o); \ } while (0) + _declpointer("DPEXRT_sycl_usm_ndarray_from_python", + &DPEXRT_sycl_usm_ndarray_from_python); _declpointer("DPEXRT_sycl_usm_ndarray_to_python_acqref", &DPEXRT_sycl_usm_ndarray_to_python_acqref); + _declpointer("NRT_ExternalAllocator_new_for_usm", + &NRT_ExternalAllocator_new_for_usm); #undef _declpointer return dct; @@ -363,6 +797,11 @@ MOD_INIT(_dpexrt_python) Py_DECREF(dpnp_array_mod); + PyModule_AddObject(m, "NRT_ExternalAllocator_new_for_usm", + PyLong_FromVoidPtr(&NRT_ExternalAllocator_new_for_usm)); + PyModule_AddObject( + m, "DPEXRT_sycl_usm_ndarray_from_python", + PyLong_FromVoidPtr(&DPEXRT_sycl_usm_ndarray_from_python)); PyModule_AddObject( m, "DPEXRT_sycl_usm_ndarray_to_python_acqref", PyLong_FromVoidPtr(&DPEXRT_sycl_usm_ndarray_to_python_acqref)); From 356ddb6cf6f96ca4d1bdfb541939618b720b93f6 Mon Sep 17 00:00:00 2001 From: Mingjie Wang Date: Sun, 12 Feb 2023 22:34:28 -0600 Subject: [PATCH 05/15] Add a USM-based NRT_MemInfo allocator to dpexrt_python module. - Adds a function to allocate a NRT_MemInfo object's data pointer using USM allocators. --- numba_dpex/core/runtime/_dpexrt_python.c | 88 +++++++++++++++++++++++- 1 file changed, 87 insertions(+), 1 deletion(-) diff --git a/numba_dpex/core/runtime/_dpexrt_python.c b/numba_dpex/core/runtime/_dpexrt_python.c index ec6d10af99..0f20cb031d 100644 --- a/numba_dpex/core/runtime/_dpexrt_python.c +++ b/numba_dpex/core/runtime/_dpexrt_python.c @@ -5,7 +5,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// A Python module that provides constructors to create a Numba MemInfo +/// A Python module that pprovides constructors to create a Numba MemInfo /// PyObject using a sycl USM allocator as the external memory allocator. /// The Module also provides the Numba box and unbox implementations for a /// dpnp.ndarray object. @@ -339,6 +339,87 @@ static NRT_MemInfo *NRT_MemInfo_new_from_usmndarray(PyObject *ndarrobj, return NULL; } +/*! + * @brief Creates a NRT_MemInfo object whose data is allocated using a USM + * allocator. + * + * @param size The size of memory (data) owned by the NRT_MemInfo + * object. + * @param usm_type The usm type of the memory. + * @param device The device on which the memory was allocated. + * @return {return} A new NRT_MemInfo object, NULL if no NRT_MemInfo + * object could be created. + */ +static NRT_MemInfo * +DPEXRT_MemInfo_alloc(npy_intp size, size_t usm_type, const char *device) +{ + NRT_MemInfo *mi = NULL; + NRT_ExternalAllocator *ext_alloca = NULL; + MemInfoDtorInfo *midtor_info = NULL; + DPCTLSyclDeviceSelectorRef dselector = NULL; + DPCTLSyclDeviceRef dref = NULL; + DPCTLSyclQueueRef qref = NULL; + + nrt_debug_print("DPEXRT-DEBUG: Inside DPEXRT_MemInfo_alloc %s, line %d\n", + __FILE__, __LINE__); + // Allocate a new NRT_MemInfo object + if (!(mi = (NRT_MemInfo *)malloc(sizeof(NRT_MemInfo)))) { + nrt_debug_print( + "DPEXRT-ERROR: Could not allocate a new NRT_MemInfo object.\n"); + goto error; + } + + if (!(dselector = DPCTLFilterSelector_Create(device))) { + nrt_debug_print( + "DPEXRT-ERROR: Could not create a sycl::device_selector from " + "filter string: %s at %s %d.\n", + device, __FILE__, __LINE__); + goto error; + } + + if (!(dref = DPCTLDevice_CreateFromSelector(dselector))) + goto error; + + if (!(qref = DPCTLQueue_CreateForDevice(dref, NULL, 0))) + goto error; + + DPCTLDeviceSelector_Delete(dselector); + DPCTLDevice_Delete(dref); + + // Allocate a new NRT_ExternalAllocator + if (!(ext_alloca = NRT_ExternalAllocator_new_for_usm(qref, usm_type))) + goto error; + + if (!(midtor_info = MemInfoDtorInfo_new(mi, NULL))) + goto error; + + mi->refct = 1; /* starts with 1 refct */ + mi->dtor = usmndarray_meminfo_dtor; + mi->dtor_info = midtor_info; + mi->data = ext_alloca->malloc(size, qref); + + if (mi->data == NULL) + goto error; + + mi->size = size; + mi->external_allocator = ext_alloca; + nrt_debug_print( + "DPEXRT-DEBUG: DPEXRT_MemInfo_alloc mi=%p " + "external_allocator=%p for usm_type %zu on device %s, %s at %d\n", + mi, ext_alloca, usm_type, device, __FILE__, __LINE__); + + return mi; + +error: + free(mi); + free(ext_alloca); + free(midtor_info); + DPCTLDeviceSelector_Delete(dselector); + DPCTLDevice_Delete(dref); + + return NULL; +} + /*----------------------------------------------------------------------------*/ /*--------- Helpers to get attributes out of a dpnp.ndarray PyObject ---------*/ /*----------------------------------------------------------------------------*/ @@ -757,6 +838,7 @@ static PyObject *build_c_helpers_dict(void) &DPEXRT_sycl_usm_ndarray_from_python); _declpointer("DPEXRT_sycl_usm_ndarray_to_python_acqref", &DPEXRT_sycl_usm_ndarray_to_python_acqref); + _declpointer("DPEXRT_MemInfo_alloc", &DPEXRT_MemInfo_alloc); _declpointer("NRT_ExternalAllocator_new_for_usm", &NRT_ExternalAllocator_new_for_usm); @@ -796,6 +878,8 @@ MOD_INIT(_dpexrt_python) PyModule_AddObject(m, "dpnp_array_type", dpnp_array_type); Py_DECREF(dpnp_array_mod); + static PyTypeObject *dpnp_array_type_obj; + dpnp_array_type_obj = (PyTypeObject *)(dpnp_array_type); PyModule_AddObject(m, "NRT_ExternalAllocator_new_for_usm", PyLong_FromVoidPtr(&NRT_ExternalAllocator_new_for_usm)); @@ -805,6 +889,8 @@ MOD_INIT(_dpexrt_python) PyModule_AddObject( m, "DPEXRT_sycl_usm_ndarray_to_python_acqref", PyLong_FromVoidPtr(&DPEXRT_sycl_usm_ndarray_to_python_acqref)); + PyModule_AddObject(m, "DPEXRT_MemInfo_alloc", + PyLong_FromVoidPtr(&DPEXRT_MemInfo_alloc)); PyModule_AddObject(m, "c_helpers", build_c_helpers_dict()); return MOD_SUCCESS_VAL(m); From b829e54f7b2ee99f79445077733c24ab9bff151b Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Sun, 12 Feb 2023 22:37:58 -0600 Subject: [PATCH 06/15] Add wrapper functions to generate LLVM IR CallInst for dpexrt_python. - Adds helpers to generate C calls to dpexrt_python native functions directly inside LLVM IR. - Registers the helper function in _dpexrt_python so that we can insert calls to them via llvmlite. --- numba_dpex/core/runtime/__init__.py | 25 ++++++ numba_dpex/core/runtime/context.py | 132 ++++++++++++++++++++++++++++ 2 files changed, 157 insertions(+) create mode 100644 numba_dpex/core/runtime/__init__.py create mode 100644 numba_dpex/core/runtime/context.py diff --git a/numba_dpex/core/runtime/__init__.py b/numba_dpex/core/runtime/__init__.py new file mode 100644 index 0000000000..d11909bc9c --- /dev/null +++ b/numba_dpex/core/runtime/__init__.py @@ -0,0 +1,25 @@ +# Copyright 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import llvmlite.binding as ll + +from ._dpexrt_python import c_helpers + +# Register the helper function in _dpexrt_python so that we can insert +# calls to them via llvmlite. +for ( + py_name, + c_address, +) in c_helpers.items(): + ll.add_symbol(py_name, c_address) diff --git a/numba_dpex/core/runtime/context.py b/numba_dpex/core/runtime/context.py new file mode 100644 index 0000000000..cf6c29ed4f --- /dev/null +++ b/numba_dpex/core/runtime/context.py @@ -0,0 +1,132 @@ +# SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +import functools + +import llvmlite.llvmpy.core as lc +from llvmlite import ir +from llvmlite.llvmpy.core import ATTR_NO_CAPTURE, Type +from numba.core import cgutils, types + + +class DpexRTContext(object): + """ + An object providing access to DPEXRT API in the lowering pass. + """ + + def __init__(self, context): + self._context = context + + def _check_null_result(func): + @functools.wraps(func) + def wrap(self, builder, *args, **kwargs): + memptr = func(self, builder, *args, **kwargs) + msg = "USM allocation failed. Check the usm_type and filter " + "string values." + cgutils.guard_memory_error(self._context, builder, memptr, msg=msg) + return memptr + + return wrap + + @_check_null_result + def meminfo_alloc(self, builder, size, usm_type, device): + """Allocate a new MemInfo with a data payload of `size` bytes. + + The result of the call is checked and if it is NULL, i.e. allocation + failed, then a MemoryError is raised. If the allocation succeeded then + a pointer to the MemInfo is returned. + + Args: + builder (_type_): LLVM IR builder + size (_type_): LLVM uint64 Value specifying the size in bytes for + the data payload. + usm_type (_type_): An LLVM Constant Value specifying the type of the + usm allocator. The constant value should match the values in + ``dpctl's`` ``libsyclinterface::DPCTLSyclUSMType`` enum. + device (_type_): An LLVM ArrayType storing a const string for a + DPC++ filter selector string. + + Returns: A pointer to the MemInfo is returned. + """ + + return self.meminfo_alloc_unchecked(builder, size, usm_type, device) + + def meminfo_alloc_unchecked(self, builder, size, usm_type, device): + """ + Allocate a new MemInfo with a data payload of `size` bytes. + + A pointer to the MemInfo is returned. + + Returns NULL to indicate error/failure to allocate. + """ + mod = builder.module + u64 = ir.IntType(64) + fnty = ir.FunctionType( + cgutils.voidptr_t, [cgutils.intp_t, u64, cgutils.voidptr_t] + ) + fn = cgutils.get_or_insert_function(mod, fnty, "DPEXRT_MemInfo_alloc") + fn.return_value.add_attribute("noalias") + + ret = builder.call(fn, [size, usm_type, device]) + + return ret + + def arraystruct_from_python(self, pyapi, obj, ptr): + """Generates a call to DPEXRT_sycl_usm_ndarray_from_python C function + defined in the _DPREXRT_python Python extension. + + Args: + pyapi (_type_): _description_ + obj (_type_): _description_ + ptr (_type_): _description_ + + Returns: + _type_: _description_ + """ + fnty = Type.function(Type.int(), [pyapi.pyobj, pyapi.voidptr]) + fn = pyapi._get_function(fnty, "DPEXRT_sycl_usm_ndarray_from_python") + fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE) + fn.args[1].add_attribute(lc.ATTR_NO_CAPTURE) + + self.error = pyapi.builder.call(fn, (obj, ptr)) + + return self.error + + def usm_ndarray_to_python_acqref(self, pyapi, aryty, ary, dtypeptr): + """_summary_ + + Args: + pyapi (_type_): _description_ + aryty (_type_): _description_ + ary (_type_): _description_ + dtypeptr (_type_): _description_ + + Returns: + _type_: _description_ + """ + args = [ + pyapi.voidptr, + pyapi.pyobj, + ir.IntType(32), + ir.IntType(32), + pyapi.pyobj, + ] + fnty = Type.function(pyapi.pyobj, args) + fn = pyapi._get_function( + fnty, "DPEXRT_sycl_usm_ndarray_to_python_acqref" + ) + fn.args[0].add_attribute(ATTR_NO_CAPTURE) + + aryptr = cgutils.alloca_once_value(pyapi.builder, ary) + ptr = pyapi.builder.bitcast(aryptr, pyapi.voidptr) + + # Embed the Python type of the array (maybe subclass) in the LLVM IR. + serialized = pyapi.serialize_object(aryty.box_type) + serial_aryty_pytype = pyapi.unserialize(serialized) + + ndim = pyapi.context.get_constant(types.int32, aryty.ndim) + writable = pyapi.context.get_constant(types.int32, int(aryty.mutable)) + + args = [ptr, serial_aryty_pytype, ndim, writable, dtypeptr] + return pyapi.builder.call(fn, args) From ecaf9683171e9bfa1ca402ab3e77f085bb31e002 Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Sun, 12 Feb 2023 22:57:53 -0600 Subject: [PATCH 07/15] Overload implementation for dpnp.empty. - Removes the numba_dpex/core/dpnp_ndarray modules - Adds an arrayobj submodule to numba_dpex/dpnp_iface - Implements boxing and unboxing for dpnp.ndarrays using _dpexrt_python extension. - Adds an overload for dpnp.empty to dono_iface/arrayobj. --- numba_dpex/core/__init__.py | 1 - numba_dpex/core/dpnp_ndarray/__init__.py | 3 - numba_dpex/core/dpnp_ndarray/dpnp_empty.py | 214 --------------- numba_dpex/core/types/dpnp_ndarray_type.py | 114 +++++++- numba_dpex/dpnp_iface/__init__.py | 3 + numba_dpex/dpnp_iface/arrayobj.py | 294 +++++++++++++++++++++ 6 files changed, 409 insertions(+), 220 deletions(-) delete mode 100644 numba_dpex/core/dpnp_ndarray/__init__.py delete mode 100644 numba_dpex/core/dpnp_ndarray/dpnp_empty.py create mode 100644 numba_dpex/dpnp_iface/arrayobj.py diff --git a/numba_dpex/core/__init__.py b/numba_dpex/core/__init__.py index 3d8ff994b3..d394eb33e0 100644 --- a/numba_dpex/core/__init__.py +++ b/numba_dpex/core/__init__.py @@ -4,6 +4,5 @@ from .datamodel import * -from .dpnp_ndarray import dpnp_empty from .types import * from .typing import * diff --git a/numba_dpex/core/dpnp_ndarray/__init__.py b/numba_dpex/core/dpnp_ndarray/__init__.py deleted file mode 100644 index b2ed211648..0000000000 --- a/numba_dpex/core/dpnp_ndarray/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 diff --git a/numba_dpex/core/dpnp_ndarray/dpnp_empty.py b/numba_dpex/core/dpnp_ndarray/dpnp_empty.py deleted file mode 100644 index 82f9d1e09d..0000000000 --- a/numba_dpex/core/dpnp_ndarray/dpnp_empty.py +++ /dev/null @@ -1,214 +0,0 @@ -# SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -import dpnp -from llvmlite import ir -from numba import types -from numba.core import cgutils -from numba.extending import ( - intrinsic, - lower_builtin, - overload_classmethod, - type_callable, -) - -from numba_dpex.core.types import DpnpNdArray - - -@type_callable(dpnp.empty) -def type_dpnp_empty(context): - def typer(shape, dtype=None, usm_type=None, sycl_queue=None): - from numba.core.typing.npydecl import parse_dtype, parse_shape - - if dtype is None: - nb_dtype = types.double - else: - nb_dtype = parse_dtype(dtype) - - ndim = parse_shape(shape) - - if usm_type is None: - usm_type = "device" - else: - usm_type = parse_usm_type(usm_type) - - if nb_dtype is not None and ndim is not None and usm_type is not None: - return DpnpNdArray( - dtype=nb_dtype, ndim=ndim, layout="C", usm_type=usm_type - ) - - return typer - - -def parse_usm_type(usm_type): - """ - Return the usm_type, if it is a string literal. - """ - from numba.core.errors import TypingError - - if isinstance(usm_type, types.StringLiteral): - usm_type_str = usm_type.literal_value - if usm_type_str not in ["shared", "device", "host"]: - msg = f"Invalid usm_type specified: '{usm_type_str}'" - raise TypingError(msg) - return usm_type_str - - -@lower_builtin(dpnp.empty, types.Any, types.Any, types.Any, types.Any) -def impl_dpnp_empty(context, builder, sig, args): - """ - Inputs: shape, dtype, usm_type, queue - """ - from numba.core.imputils import impl_ret_new_ref - - empty_args = _parse_empty_args(context, builder, sig, args) - ary = _empty_nd_impl(context, builder, *empty_args) - return impl_ret_new_ref(context, builder, sig.return_type, ary._getvalue()) - - -def _parse_empty_args(context, builder, sig, args): - """ - Parse the arguments of a dpnp.empty(), .zeros() or .ones() call. - """ - from numba.np.arrayobj import _parse_shape - - arrtype = sig.return_type - - arrshapetype = sig.args[0] - arrshape = args[0] - shape = _parse_shape(context, builder, arrshapetype, arrshape) - - queue = args[-1] - return (arrtype, shape, queue) - - -def _empty_nd_impl(context, builder, arrtype, shapes, queue): - """See numba.np.arrayobj._empty_nd_impl(). - This implementation uses different MemInfo allocator. - """ - from numba.np.arrayobj import ( - get_itemsize, - make_array, - populate_array, - signature, - ) - - arycls = make_array(arrtype) - ary = arycls(context, builder) - - datatype = context.get_data_type(arrtype.dtype) - itemsize = context.get_constant(types.intp, get_itemsize(context, arrtype)) - - # compute array length - arrlen = context.get_constant(types.intp, 1) - overflow = ir.Constant(ir.IntType(1), 0) - for s in shapes: - arrlen_mult = builder.smul_with_overflow(arrlen, s) - arrlen = builder.extract_value(arrlen_mult, 0) - overflow = builder.or_(overflow, builder.extract_value(arrlen_mult, 1)) - - if arrtype.ndim == 0: - strides = () - elif arrtype.layout == "C": - strides = [itemsize] - for dimension_size in reversed(shapes[1:]): - strides.append(builder.mul(strides[-1], dimension_size)) - strides = tuple(reversed(strides)) - elif arrtype.layout == "F": - strides = [itemsize] - for dimension_size in shapes[:-1]: - strides.append(builder.mul(strides[-1], dimension_size)) - strides = tuple(strides) - else: - raise NotImplementedError( - "Don't know how to allocate array with layout '{0}'.".format( - arrtype.layout - ) - ) - - # Check overflow, numpy also does this after checking order - allocsize_mult = builder.smul_with_overflow(arrlen, itemsize) - allocsize = builder.extract_value(allocsize_mult, 0) - overflow = builder.or_(overflow, builder.extract_value(allocsize_mult, 1)) - - with builder.if_then(overflow, likely=False): - # Raise same error as numpy, see: - # https://github.com/numpy/numpy/blob/2a488fe76a0f732dc418d03b452caace161673da/numpy/core/src/multiarray/ctors.c#L1095-L1101 # noqa: E501 - context.call_conv.return_user_exc( - builder, - ValueError, - ( - "array is too big; `arr.size * arr.dtype.itemsize` is larger than" - " the maximum possible size.", - ), - ) - - usm_type_num = {"shared": 0, "device": 1, "host": 2}[arrtype.usm_type] - usm_type = context.get_constant(types.int64, usm_type_num) - - args = (context.get_dummy_value(), allocsize, usm_type, queue) - - mip = types.MemInfoPointer(types.voidptr) - arytypeclass = types.TypeRef(type(arrtype)) - sig = signature(mip, arytypeclass, types.intp, types.intp, types.voidptr) - - meminfo = context.compile_internal(builder, _call_allocator, sig, args) - data = context.nrt.meminfo_data(builder, meminfo) - - intp_t = context.get_value_type(types.intp) - shape_array = cgutils.pack_array(builder, shapes, ty=intp_t) - strides_array = cgutils.pack_array(builder, strides, ty=intp_t) - - populate_array( - ary, - data=builder.bitcast(data, datatype.as_pointer()), - shape=shape_array, - strides=strides_array, - itemsize=itemsize, - meminfo=meminfo, - ) - - return ary - - -def _call_allocator(arrtype, size, usm_type, sycl_queue): - """Trampoline to call the intrinsic used for allocation""" - return arrtype._allocate(size, usm_type, sycl_queue) - - -@overload_classmethod(DpnpNdArray, "_allocate") -def _ol_dpnp_array_allocate(cls, size, usm_type, sycl_queue): - def impl(cls, size, usm_type, sycl_queue): - return intrin_alloc(size, usm_type, sycl_queue) - - return impl - - -@intrinsic -def intrin_alloc(typingctx, size, usm_type, sycl_queue): - """Intrinsic to call into the allocator for Array""" - from numba.core.base import BaseContext - from numba.core.runtime.context import NRTContext - from numba.core.typing.templates import Signature - - def MemInfo_new(context: NRTContext, builder, size, usm_type, queue): - context._require_nrt() - - mod = builder.module - fnargs = [cgutils.intp_t, cgutils.intp_t, cgutils.voidptr_t] - fnty = ir.FunctionType(cgutils.voidptr_t, fnargs) - fn = cgutils.get_or_insert_function(mod, fnty, "DPRT_MemInfo_new") - fn.return_value.add_attribute("noalias") - return builder.call(fn, [size, usm_type, queue]) - - def codegen(context: BaseContext, builder, signature: Signature, args): - meminfo = MemInfo_new(context.nrt, builder, *args) - meminfo.name = "meminfo" - return meminfo - - from numba.core.typing import signature - - mip = types.MemInfoPointer(types.voidptr) # return untyped pointer - sig = signature(mip, size, usm_type, sycl_queue) - return sig, codegen diff --git a/numba_dpex/core/types/dpnp_ndarray_type.py b/numba_dpex/core/types/dpnp_ndarray_type.py index 29ed445fb9..0712854b46 100644 --- a/numba_dpex/core/types/dpnp_ndarray_type.py +++ b/numba_dpex/core/types/dpnp_ndarray_type.py @@ -3,13 +3,123 @@ # SPDX-License-Identifier: Apache-2.0 +from numba.core import cgutils +from numba.core.errors import NumbaNotImplementedError +from numba.core.pythonapi import NativeValue, PythonAPI, box, unbox +from numba.np import numpy_support + +from numba_dpex.core.exceptions import UnreachableError +from numba_dpex.core.runtime import context as dpexrt + from .usm_ndarray_type import USMNdArray class DpnpNdArray(USMNdArray): """ The Numba type to represent an dpnp.ndarray. The type has the same - structure as USMNdArray used to represnet dpctl.tensor.usm_ndarray. + structure as USMNdArray used to represent dpctl.tensor.usm_ndarray. + """ + + @property + def is_internal(self): + return True + + +# --------------- Boxing/Unboxing logic for dpnp.ndarray ----------------------# + + +@unbox(DpnpNdArray) +def unbox_dpnp_nd_array(typ, obj, c): + """Converts a dpnp.ndarray object to a Numba internal array structure. + + Args: + typ : The Numba type of the PyObject + obj : The actual PyObject to be unboxed + c : + + Returns: + _type_: _description_ """ + # Reusing the numba.core.base.BaseContext's make_array function to get a + # struct allocated. The same struct is used for numpy.ndarray + # and dpnp.ndarray. It is possible to do so, as the extra information + # specific to dpnp.ndarray such as sycl_queue is inferred statically and + # stored as part of the DpnpNdArray type. + + # --------------- Original Numba comment from @ubox(types.Array) + # + # This is necessary because unbox_buffer() does not work on some + # dtypes, e.g. datetime64 and timedelta64. + # TODO check matching dtype. + # currently, mismatching dtype will still work and causes + # potential memory corruption + # + # --------------- End of Numba comment from @ubox(types.Array) + nativearycls = c.context.make_array(typ) + nativeary = nativearycls(c.context, c.builder) + aryptr = nativeary._getpointer() + + ptr = c.builder.bitcast(aryptr, c.pyapi.voidptr) + # FIXME : We need to check if Numba_RT as well as DPEX RT are enabled. + if c.context.enable_nrt: + dpexrtCtx = dpexrt.DpexRTContext(c.context) + errcode = dpexrtCtx.arraystruct_from_python(c.pyapi, obj, ptr) + else: + raise UnreachableError + + # TODO: here we have minimal typechecking by the itemsize. + # need to do better + try: + expected_itemsize = numpy_support.as_dtype(typ.dtype).itemsize + except NumbaNotImplementedError: + # Don't check types that can't be `as_dtype()`-ed + itemsize_mismatch = cgutils.false_bit + else: + expected_itemsize = nativeary.itemsize.type(expected_itemsize) + itemsize_mismatch = c.builder.icmp_unsigned( + "!=", + nativeary.itemsize, + expected_itemsize, + ) + + failed = c.builder.or_( + cgutils.is_not_null(c.builder, errcode), + itemsize_mismatch, + ) + # Handle error + with c.builder.if_then(failed, likely=False): + c.pyapi.err_set_string( + "PyExc_TypeError", + "can't unbox array from PyObject into " + "native value. The object maybe of a " + "different type", + ) + return NativeValue(c.builder.load(aryptr), is_error=failed) + + +@box(DpnpNdArray) +def box_array(typ, val, c): + if c.context.enable_nrt: + np_dtype = numpy_support.as_dtype(typ.dtype) + dtypeptr = c.env_manager.read_const(c.env_manager.add_const(np_dtype)) + dpexrtCtx = dpexrt.DpexRTContext(c.context) + newary = dpexrtCtx.usm_ndarray_to_python_acqref( + c.pyapi, typ, val, dtypeptr + ) + + if not newary: + c.pyapi.err_set_string( + "PyExc_TypeError", + "could not box native array into a dpnp.ndarray PyObject.", + ) + + # Steals NRT ref + # Refer: + # numba.core.base.nrt -> numba.core.runtime.context -> decref + # The `NRT_decref` function is generated directly as LLVM IR inside + # numba.core.runtime.nrtdynmod.py + c.context.nrt.decref(c.builder, typ, val) - pass + return newary + else: + raise UnreachableError diff --git a/numba_dpex/dpnp_iface/__init__.py b/numba_dpex/dpnp_iface/__init__.py index b6277ad3e2..0df52fadd0 100644 --- a/numba_dpex/dpnp_iface/__init__.py +++ b/numba_dpex/dpnp_iface/__init__.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: Apache-2.0 +from . import arrayobj + def ensure_dpnp(name): try: @@ -24,4 +26,5 @@ def _init_dpnp(): _init_dpnp() + DEBUG = None diff --git a/numba_dpex/dpnp_iface/arrayobj.py b/numba_dpex/dpnp_iface/arrayobj.py new file mode 100644 index 0000000000..25835c6cad --- /dev/null +++ b/numba_dpex/dpnp_iface/arrayobj.py @@ -0,0 +1,294 @@ +# SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +import dpnp +from llvmlite import ir +from llvmlite.ir import Constant +from numba import errors, types +from numba.core import cgutils +from numba.core.typing import signature +from numba.core.typing.npydecl import parse_shape +from numba.extending import intrinsic, overload, overload_classmethod +from numba.np.arrayobj import ( + _parse_empty_args, + get_itemsize, + make_array, + populate_array, +) + +from numba_dpex.core.runtime import context as dpexrt +from numba_dpex.core.types import DpnpNdArray + +from ..decorators import dpjit + +# ------------------------------------------------------------------------------ +# Helps to parse dpnp constructor arguments + + +def _parse_usm_type(usm_type): + """ + Returns the usm_type, if it is a string literal. + """ + from numba.core.errors import TypingError + + if isinstance(usm_type, types.StringLiteral): + usm_type_str = usm_type.literal_value + if usm_type_str not in ["shared", "device", "host"]: + msg = f"Invalid usm_type specified: '{usm_type_str}'" + raise TypingError(msg) + return usm_type_str + else: + raise TypeError + + +def _parse_device_filter_string(device): + """ + Returns the device filter string, if it is a string literal. + """ + from numba.core.errors import TypingError + + if isinstance(device, types.StringLiteral): + device_filter_str = device.literal_value + return device_filter_str + else: + raise TypeError + + +# ------------------------------------------------------------------------------ +# Helper functions to support dpnp array constructors + +# FIXME: The _empty_nd_impl was copied over *as it is* from numba.np.arrayobj. +# However, we cannot use it yet as the `_call_allocator` function needs to be +# tailored to our needs. Specifically, we need to pass the device string so that +# a correct type of external allocator may be created for the NRT_MemInfo +# object. + + +def _empty_nd_impl(context, builder, arrtype, shapes): + """Utility function used for allocating a new array during LLVM code + generation (lowering). Given a target context, builder, array + type, and a tuple or list of lowered dimension sizes, returns a + LLVM value pointing at a Numba runtime allocated array. + """ + arycls = make_array(arrtype) + ary = arycls(context, builder) + + datatype = context.get_data_type(arrtype.dtype) + itemsize = context.get_constant(types.intp, get_itemsize(context, arrtype)) + + # compute array length + arrlen = context.get_constant(types.intp, 1) + overflow = Constant(ir.IntType(1), 0) + for s in shapes: + arrlen_mult = builder.smul_with_overflow(arrlen, s) + arrlen = builder.extract_value(arrlen_mult, 0) + overflow = builder.or_(overflow, builder.extract_value(arrlen_mult, 1)) + + if arrtype.ndim == 0: + strides = () + elif arrtype.layout == "C": + strides = [itemsize] + for dimension_size in reversed(shapes[1:]): + strides.append(builder.mul(strides[-1], dimension_size)) + strides = tuple(reversed(strides)) + elif arrtype.layout == "F": + strides = [itemsize] + for dimension_size in shapes[:-1]: + strides.append(builder.mul(strides[-1], dimension_size)) + strides = tuple(strides) + else: + raise NotImplementedError( + "Don't know how to allocate array with layout '{0}'.".format( + arrtype.layout + ) + ) + + # Check overflow, numpy also does this after checking order + allocsize_mult = builder.smul_with_overflow(arrlen, itemsize) + allocsize = builder.extract_value(allocsize_mult, 0) + overflow = builder.or_(overflow, builder.extract_value(allocsize_mult, 1)) + + with builder.if_then(overflow, likely=False): + # Raise same error as numpy, see: + # https://github.com/numpy/numpy/blob/2a488fe76a0f732dc418d03b452caace161673da/numpy/core/src/multiarray/ctors.c#L1095-L1101 # noqa: E501 + context.call_conv.return_user_exc( + builder, + ValueError, + ( + "array is too big; `arr.size * arr.dtype.itemsize` is larger " + "than the maximum possible size.", + ), + ) + + usm_ty = arrtype.usm_type + usm_ty_val = 0 + if usm_ty == "device": + usm_ty_val = 1 + elif usm_ty == "shared": + usm_ty_val = 2 + elif usm_ty == "host": + usm_ty_val = 3 + usm_type = context.get_constant(types.uint64, usm_ty_val) + device = context.insert_const_string(builder.module, arrtype.device) + + args = ( + context.get_dummy_value(), + allocsize, + usm_type, + device, + ) + mip = types.MemInfoPointer(types.voidptr) + arytypeclass = types.TypeRef(type(arrtype)) + sig = signature( + mip, + arytypeclass, + types.intp, + types.uint64, + types.voidptr, + ) + + op = _call_usm_allocator + fnop = context.typing_context.resolve_value_type(op) + # The _call_usm_allocator function will be compiled and added to registry + # when the get_call_type function is invoked. + fnop.get_call_type(context.typing_context, sig.args, {}) + eqfn = context.get_function(fnop, sig) + meminfo = eqfn(builder, args) + data = context.nrt.meminfo_data(builder, meminfo) + intp_t = context.get_value_type(types.intp) + shape_array = cgutils.pack_array(builder, shapes, ty=intp_t) + strides_array = cgutils.pack_array(builder, strides, ty=intp_t) + + populate_array( + ary, + data=builder.bitcast(data, datatype.as_pointer()), + shape=shape_array, + strides=strides_array, + itemsize=itemsize, + meminfo=meminfo, + ) + + return ary + + +@overload_classmethod(DpnpNdArray, "_usm_allocate") +def _ol_array_allocate(cls, allocsize, usm_type, device): + """Implements an allocator for dpnp.ndarrays.""" + + def impl(cls, allocsize, usm_type, device): + return intrin_usm_alloc(allocsize, usm_type, device) + + return impl + + +@dpjit +def _call_usm_allocator(arrtype, size, usm_type, device): + """Trampoline to call the intrinsic used for allocation""" + return arrtype._usm_allocate(size, usm_type, device) + + +@intrinsic +def intrin_usm_alloc(typingctx, allocsize, usm_type, device): + """Intrinsic to call into the allocator for Array""" + + def codegen(context, builder, signature, args): + [allocsize, usm_type, device] = args + dpexrtCtx = dpexrt.DpexRTContext(context) + meminfo = dpexrtCtx.meminfo_alloc(builder, allocsize, usm_type, device) + return meminfo + + mip = types.MemInfoPointer(types.voidptr) # return untyped pointer + sig = signature(mip, allocsize, usm_type, device) + return sig, codegen + + +@intrinsic +def impl_dpnp_empty( + tyctx, + ty_shape, + ty_dtype, + ty_usm_type, + ty_device, + ty_retty_ref, +): + ty_retty = ty_retty_ref.instance_type + + sig = ty_retty(ty_shape, ty_dtype, ty_usm_type, ty_device, ty_retty_ref) + + def codegen(cgctx, builder, sig, llargs): + arrtype = _parse_empty_args(cgctx, builder, sig, llargs) + ary = _empty_nd_impl(cgctx, builder, *arrtype) + return ary._getvalue() + + return sig, codegen + + +# ------------------------------------------------------------------------------ +# Dpnp array constructor overloads + + +@overload(dpnp.empty, prefer_literal=True) +def ol_dpnp_empty( + shape, dtype=None, usm_type=None, device=None, sycl_queue=None +): + """Implementation of an overload to support dpnp.empty inside a jit + function. + + Args: + shape (tuple): Dimensions of the array to be created. + dtype optional): Data type of the array. Can be typestring, + a `numpy.dtype` object, `numpy` char string, or a numpy + scalar type. Default: None + usm_type ("device"|"shared"|"host", optional): The type of SYCL USM + allocation for the output array. Default: `"device"`. + device (optional): array API concept of device where the output array + is created. `device` can be `None`, a oneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a + non-partitioned SYCL device, an instance of + :class:`dpctl.SyclQueue`, or a `Device` object returnedby + `dpctl.tensor.usm_array.device`. Default: `None`. + sycl_queue (:class:`dpctl.SyclQueue`, optional): Not supported. + + Returns: Numba implementation of the dpnp.empty + """ + + if sycl_queue: + raise errors.TypingError( + "The sycl_queue keyword is not yet supported by dpnp.empty inside " + "a dpjit decorated function." + ) + + ndim = parse_shape(shape) + if not ndim: + raise errors.TypingError("Could not infer the rank of the ndarray") + + if usm_type is not None: + usm_type = _parse_usm_type(usm_type) + else: + usm_type = "device" + + if device is not None: + device = _parse_device_filter_string(device) + else: + device = "unknown" + + if ndim is not None: + retty = DpnpNdArray( + dtype=dtype, + ndim=ndim, + usm_type=usm_type, + device=device, + ) + + def impl( + shape, dtype=None, usm_type=None, device=None, sycl_queue=None + ): + return impl_dpnp_empty(shape, dtype, usm_type, device, retty) + + return impl + else: + msg = ( + f"Cannot parse input types to function dpnp.empty({shape}, {dtype})" + ) + raise errors.TypingError(msg) From 253be2975b4a1597a2e96c9b9e5f4b903019bc85 Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Sun, 12 Feb 2023 23:01:22 -0600 Subject: [PATCH 08/15] Various improvements to the DpexTarget context. - Implement refresh and a dpex target registry. - Initialize the dpjit dispacther and runtime sub-modules when dpex loads. - doxstrings etc. --- numba_dpex/__init__.py | 6 ++++- numba_dpex/core/dpjit_dispatcher.py | 8 ++++++ numba_dpex/core/targets/dpjit_target.py | 35 +++++++++++++++++++++++++ numba_dpex/decorators.py | 6 +++++ 4 files changed, 54 insertions(+), 1 deletion(-) diff --git a/numba_dpex/__init__.py b/numba_dpex/__init__.py index b855fb3aef..92ebc03779 100644 --- a/numba_dpex/__init__.py +++ b/numba_dpex/__init__.py @@ -5,10 +5,14 @@ """ The numba-dpex extension module adds data-parallel offload support to Numba. """ -import numba.testing +import numba_dpex.core.dpjit_dispatcher import numba_dpex.core.offload_dispatcher +# Initialize the _dpexrt_python extension +import numba_dpex.core.runtime +import numba_dpex.core.targets.dpjit_target + # Re-export types itself import numba_dpex.core.types as types from numba_dpex.core.kernel_interface.utils import * diff --git a/numba_dpex/core/dpjit_dispatcher.py b/numba_dpex/core/dpjit_dispatcher.py index 211e8752cb..f2a5164845 100644 --- a/numba_dpex/core/dpjit_dispatcher.py +++ b/numba_dpex/core/dpjit_dispatcher.py @@ -12,6 +12,14 @@ class DpjitDispatcher(dispatcher.Dispatcher): + """A dpex.djit-specific dispatcher. + + The DpjitDispatcher sets the targetdescr string to "dpex" so that Numba's + Dispatcher can lookup the global target_registry with that string and + correctly use the DpexTarget context. + + """ + targetdescr = dpex_target def __init__( diff --git a/numba_dpex/core/targets/dpjit_target.py b/numba_dpex/core/targets/dpjit_target.py index d254fd097d..03a460fa54 100644 --- a/numba_dpex/core/targets/dpjit_target.py +++ b/numba_dpex/core/targets/dpjit_target.py @@ -5,7 +5,11 @@ """Defines the target and typing contexts for numba_dpex's dpjit decorator. """ +from numba.core import utils +from numba.core.codegen import JITCPUCodegen +from numba.core.compiler_lock import global_compiler_lock from numba.core.cpu import CPUContext +from numba.core.imputils import Registry, RegistryLoader from numba.core.target_extension import CPU, target_registry @@ -19,7 +23,38 @@ class Dpex(CPU): # permits lookup and reference in user space by the string "dpex" target_registry[DPEX_TARGET_NAME] = Dpex +# This is the function registry for the dpu, it just has one registry, this one! +dpex_function_registry = Registry() + class DpexTargetContext(CPUContext): def __init__(self, typingctx, target=DPEX_TARGET_NAME): super().__init__(typingctx, target) + + @global_compiler_lock + def init(self): + self.is32bit = utils.MACHINE_BITS == 32 + self._internal_codegen = JITCPUCodegen("numba.exec") + self.lower_extensions = {} + # Initialize NRT runtime + # rtsys.initialize(self) + self.refresh() + + @utils.cached_property + def dpexrt(self): + from numba_dpex.core.runtime.context import DpexRTContext + + return DpexRTContext(self) + + def refresh(self): + registry = dpex_function_registry + try: + loader = self._registries[registry] + except KeyError: + loader = RegistryLoader(registry) + self._registries[registry] = loader + self.install_registry(registry) + # Also refresh typing context, since @overload declarations can + # affect it. + self.typing_context.refresh() + super().refresh() diff --git a/numba_dpex/decorators.py b/numba_dpex/decorators.py index 674859ef73..748934acce 100644 --- a/numba_dpex/decorators.py +++ b/numba_dpex/decorators.py @@ -166,6 +166,12 @@ def dpjit(*args, **kws): del kws["forceobj"] kws.update({"nopython": True}) kws.update({"pipeline_class": OffloadCompiler}) + + # FIXME: When trying to use dpex's target context, overloads do not work + # properly. We will turn on dpex target once the issue is fixed. + + # kws.update({"_target": "dpex"}) + return decorators.jit(*args, **kws) From b554084a89b9c8173e6467588d68fd998695eaf1 Mon Sep 17 00:00:00 2001 From: "Wang, Mingjie1" Date: Tue, 14 Feb 2023 12:16:12 -0600 Subject: [PATCH 09/15] Added dpnp.empty() tests. Added runtime tests. --- numba_dpex/tests/core/__init__.py | 6 ++++ .../core/runtime/test_llvm_registration.py | 32 +++++++++++++++++ .../runtime/test_nrt_python.py | 0 numba_dpex/tests/core/runtime/test_runtime.py | 19 ++++++++++ .../runtime/test_usm_allocators_ext.py | 0 numba_dpex/tests/core/test_dpjit_target.py | 32 +++++++++++++++++ numba_dpex/tests/dpjit_tests/__init__.py | 6 ++++ .../dpjit_tests/dpnp/test_dpnp_empty_dpjit.py | 36 +++++++++++++++++++ .../dpnp_ndarray/test_dpnp_empty.py | 28 --------------- .../tests/test_usm_ndarray_type_exceptions.py | 31 ++++++++++++++++ 10 files changed, 162 insertions(+), 28 deletions(-) create mode 100644 numba_dpex/tests/core/__init__.py create mode 100644 numba_dpex/tests/core/runtime/test_llvm_registration.py rename numba_dpex/tests/{njit_tests/dpnp_ndarray => core}/runtime/test_nrt_python.py (100%) create mode 100644 numba_dpex/tests/core/runtime/test_runtime.py rename numba_dpex/tests/{njit_tests/dpnp_ndarray => core}/runtime/test_usm_allocators_ext.py (100%) create mode 100644 numba_dpex/tests/core/test_dpjit_target.py create mode 100644 numba_dpex/tests/dpjit_tests/__init__.py create mode 100644 numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_empty_dpjit.py delete mode 100644 numba_dpex/tests/njit_tests/dpnp_ndarray/test_dpnp_empty.py create mode 100644 numba_dpex/tests/test_usm_ndarray_type_exceptions.py diff --git a/numba_dpex/tests/core/__init__.py b/numba_dpex/tests/core/__init__.py new file mode 100644 index 0000000000..af3ef6a951 --- /dev/null +++ b/numba_dpex/tests/core/__init__.py @@ -0,0 +1,6 @@ +# SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +from .._helper import * +from . import * diff --git a/numba_dpex/tests/core/runtime/test_llvm_registration.py b/numba_dpex/tests/core/runtime/test_llvm_registration.py new file mode 100644 index 0000000000..e5cd8769e9 --- /dev/null +++ b/numba_dpex/tests/core/runtime/test_llvm_registration.py @@ -0,0 +1,32 @@ +# SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + + +import llvmlite.binding as llb + +from numba_dpex.core import runtime + + +def test_llvm_symbol_registered(): + """ "Register the helper function in _dpexrt_python so that we can insert calls to them via llvmlite. + + 1. DPEXRT_sycl_usm_ndarray_from_python + + 2.DPEXRT_sycl_usm_ndarray_to_python_acqref + + """ + assert ( + llb.address_of_symbol("DPEXRT_sycl_usm_ndarray_from_python") + == runtime._dpexrt_python.DPEXRT_sycl_usm_ndarray_from_python + ) + + assert ( + llb.address_of_symbol("DPEXRT_sycl_usm_ndarray_to_python_acqref") + == runtime._dpexrt_python.DPEXRT_sycl_usm_ndarray_to_python_acqref + ) + + assert ( + llb.address_of_symbol("NRT_ExternalAllocator_new_for_usm") + == runtime._dpexrt_python.NRT_ExternalAllocator_new_for_usm + ) diff --git a/numba_dpex/tests/njit_tests/dpnp_ndarray/runtime/test_nrt_python.py b/numba_dpex/tests/core/runtime/test_nrt_python.py similarity index 100% rename from numba_dpex/tests/njit_tests/dpnp_ndarray/runtime/test_nrt_python.py rename to numba_dpex/tests/core/runtime/test_nrt_python.py diff --git a/numba_dpex/tests/core/runtime/test_runtime.py b/numba_dpex/tests/core/runtime/test_runtime.py new file mode 100644 index 0000000000..276355f75b --- /dev/null +++ b/numba_dpex/tests/core/runtime/test_runtime.py @@ -0,0 +1,19 @@ +# SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + + +from numba_dpex.core.runtime import _dpexrt_python + + +def test_pointers_exposed(): + """This test is to check attributts in _dpexrt_python.""" + + def exposed(function_name): + assert hasattr(_dpexrt_python, function_name) + assert isinstance(getattr(_dpexrt_python, function_name), int) + + exposed("DPEXRT_sycl_usm_ndarray_from_python") + exposed("DPEXRT_sycl_usm_ndarray_to_python_acqref") + exposed("DPEXRT_MemInfo_alloc") + exposed("NRT_ExternalAllocator_new_for_usm") diff --git a/numba_dpex/tests/njit_tests/dpnp_ndarray/runtime/test_usm_allocators_ext.py b/numba_dpex/tests/core/runtime/test_usm_allocators_ext.py similarity index 100% rename from numba_dpex/tests/njit_tests/dpnp_ndarray/runtime/test_usm_allocators_ext.py rename to numba_dpex/tests/core/runtime/test_usm_allocators_ext.py diff --git a/numba_dpex/tests/core/test_dpjit_target.py b/numba_dpex/tests/core/test_dpjit_target.py new file mode 100644 index 0000000000..fdb030d8ad --- /dev/null +++ b/numba_dpex/tests/core/test_dpjit_target.py @@ -0,0 +1,32 @@ +# Copyright 2020 - 2022 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for class DpexTargetContext.""" + + +import pytest +from numba.core import typing +from numba.core.codegen import JITCPUCodegen + +from numba_dpex.core.targets.dpjit_target import DpexTargetContext + +ctx = typing.Context() +dpexctx = DpexTargetContext(ctx) + + +def test_dpjit_target(): + assert dpexctx.lower_extensions == {} + assert dpexctx.is32bit is False + assert dpexctx.dpexrt is not None + assert ( + isinstance(dpexctx._internal_codegen, type(JITCPUCodegen("numba.exec"))) + == 1 + ) + + +def test_dpjit_target_refresh(): + try: + dpexctx.refresh + except KeyError: + pytest.fail("Unexpected KeyError in dpjit_target.") diff --git a/numba_dpex/tests/dpjit_tests/__init__.py b/numba_dpex/tests/dpjit_tests/__init__.py new file mode 100644 index 0000000000..af3ef6a951 --- /dev/null +++ b/numba_dpex/tests/dpjit_tests/__init__.py @@ -0,0 +1,6 @@ +# SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +from .._helper import * +from . import * diff --git a/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_empty_dpjit.py b/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_empty_dpjit.py new file mode 100644 index 0000000000..ab83fe8b16 --- /dev/null +++ b/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_empty_dpjit.py @@ -0,0 +1,36 @@ +# SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for dpnp ndarray constructors.""" + +import dpnp +import pytest + +from numba_dpex import dpjit + +shapes = [10, (2, 5)] +dtypes = [dpnp.int32, dpnp.int64, dpnp.float32, dpnp.float64] +usm_types = ["device", "shared", "host"] +devices = ["cpu", "unknown"] + + +@pytest.mark.parametrize("shape", shapes) +@pytest.mark.parametrize("dtype", dtypes) +@pytest.mark.parametrize("usm_type", usm_types) +@pytest.mark.parametrize("device", devices) +def test_dpnp_empty(shape, dtype, usm_type, device): + @dpjit + def func(shape): + dpnp.empty(shape=shape, dtype=dtype, usm_type=usm_type, device=device) + + @dpjit + def func1(shape): + c = dpnp.empty( + shape=shape, dtype=dtype, usm_type=usm_type, device=device + ) + return c + + func(shape) + + func1(shape) diff --git a/numba_dpex/tests/njit_tests/dpnp_ndarray/test_dpnp_empty.py b/numba_dpex/tests/njit_tests/dpnp_ndarray/test_dpnp_empty.py deleted file mode 100644 index a5f918e535..0000000000 --- a/numba_dpex/tests/njit_tests/dpnp_ndarray/test_dpnp_empty.py +++ /dev/null @@ -1,28 +0,0 @@ -# SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -"""Tests for dpnp ndarray constructors.""" - -import dpnp -import pytest -from numba import njit - -shapes = [10, (2, 5)] -dtypes = ["f8", dpnp.float32] -usm_types = ["device", "shared", "host"] - - -@pytest.mark.skip(reason="Disabling old dpnp.empty tests") -@pytest.mark.parametrize("shape", shapes) -@pytest.mark.parametrize("dtype", dtypes) -@pytest.mark.parametrize("usm_type", usm_types) -def test_dpnp_empty(shape, dtype, usm_type): - from numba_dpex.dpctl_iface import get_current_queue - - @njit - def func(shape): - queue = get_current_queue() - dpnp.empty(shape, dtype, usm_type, queue) - - func(shape) diff --git a/numba_dpex/tests/test_usm_ndarray_type_exceptions.py b/numba_dpex/tests/test_usm_ndarray_type_exceptions.py new file mode 100644 index 0000000000..7c1a446199 --- /dev/null +++ b/numba_dpex/tests/test_usm_ndarray_type_exceptions.py @@ -0,0 +1,31 @@ +# SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +"""This is to test USMNdArray raising correct exceptions.""" + +import dpnp +import pytest +from numba import njit +from numba.core.errors import TypingError + +from numba_dpex.dpctl_iface import get_current_queue + +arguments = [ + ("shape=10", 'device="cpu"', "queue=a.sycl_queue"), + ("shape=10", "device=10", "queue=a.sycl_queue"), + ("shape=10", 'device="cpu"', "queue=test"), + ("shape=10", 'device="dpu"'), +] + + +@pytest.mark.parametrize("argument", arguments) +def test_usm_ndarray_type_exceptions(argument): + a = dpnp.ndarray(10) + + @njit + def func(a): + dpnp.empty(argument) + + with pytest.raises(TypingError): + func(a) From f98ed68aa238f2615a23b825c224c7875617ee2a Mon Sep 17 00:00:00 2001 From: "Wang, Mingjie1" Date: Wed, 15 Feb 2023 13:39:50 -0600 Subject: [PATCH 10/15] Added smoke test in workflow for Windows test. --- .github/workflows/conda-package.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index bd55a54146..455e27fada 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -250,6 +250,8 @@ jobs: - name: Add dpnp skip variable run: echo "NUMBA_DPEX_TESTING_SKIP_NO_DPNP=1" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append + - name: Smoke test + run: python -c "import numba_dpex.core.runtime._dpexrt_python" - name: Run tests run: | python -m pytest -q -ra --disable-warnings --pyargs ${{ env.MODULE_NAME }} -vv From 87f86ad814291993f4cfaa7a1efa9cacc3955886 Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Wed, 15 Feb 2023 10:35:20 -0600 Subject: [PATCH 11/15] Add _dpexrt_python to setuptool packages. --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a1ea4a4924..44560eb640 100644 --- a/setup.py +++ b/setup.py @@ -177,7 +177,9 @@ def _llvm_spirv(): return result -packages = find_packages(include=["numba_dpex", "numba_dpex.*"]) +packages = find_packages( + include=["numba_dpex", "numba_dpex.*", "_dpexrt_python"] +) build_requires = ["cython"] install_requires = [ "numba >={}".format("0.56"), From 6162d362468a92bf930a7714bc344aab100747fd Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Wed, 15 Feb 2023 11:58:43 -0600 Subject: [PATCH 12/15] Polish up _dpexrt_python.c - Set PyErr_SetString before returning NULL from DPEXRT_sycl_usm_ndarray_to_python_acqref. - Add few extra NULL checks. - Remove debug prints. - Add NRT_debug wrappers to debug prints. --- numba_dpex/core/runtime/_dpexrt_python.c | 240 ++++++++++++----------- 1 file changed, 122 insertions(+), 118 deletions(-) diff --git a/numba_dpex/core/runtime/_dpexrt_python.c b/numba_dpex/core/runtime/_dpexrt_python.c index 0f20cb031d..813e3f4425 100644 --- a/numba_dpex/core/runtime/_dpexrt_python.c +++ b/numba_dpex/core/runtime/_dpexrt_python.c @@ -128,13 +128,13 @@ NRT_ExternalAllocator_new_for_usm(DPCTLSyclQueueRef qref, size_t usm_type) allocator = (NRT_ExternalAllocator *)malloc(sizeof(NRT_ExternalAllocator)); if (allocator == NULL) { - nrt_debug_print("DPEXRT-ERROR: failed to allocate memory for " - "NRT_ExternalAllocator at %s, line %d.\n", - __FILE__, __LINE__); + NRT_Debug(nrt_debug_print("DPEXRT-ERROR: failed to allocate memory for " + "NRT_ExternalAllocator at %s, line %d.\n", + __FILE__, __LINE__)); goto error; } - nrt_debug_print("DPEXRT-DEBUG: usm type = %d at %s, line %d.\n", usm_type, - __FILE__, __LINE__); + NRT_Debug(nrt_debug_print("DPEXRT-DEBUG: usm type = %d at %s, line %d.\n", + usm_type, __FILE__, __LINE__)); switch (usm_type) { case 1: @@ -147,9 +147,9 @@ NRT_ExternalAllocator_new_for_usm(DPCTLSyclQueueRef qref, size_t usm_type) allocator->malloc = usm_host_malloc; break; default: - nrt_debug_print("DPEXRT-ERROR: Encountered an unknown usm " - "allocation type (%d) at %s, line %d\n", - usm_type, __FILE__, __LINE__); + NRT_Debug(nrt_debug_print("DPEXRT-ERROR: Encountered an unknown usm " + "allocation type (%d) at %s, line %d\n", + usm_type, __FILE__, __LINE__)); goto error; } @@ -193,10 +193,10 @@ static void usmndarray_meminfo_dtor(void *ptr, size_t size, void *info) // Sanity-check to make sure the mi_dtor_info is an actual pointer. if (!(mi_dtor_info = (MemInfoDtorInfo *)info)) { - nrt_debug_print( + NRT_Debug(nrt_debug_print( "DPEXRT-ERROR: MemInfoDtorInfo object might be corrupted. Aborting " "MemInfo destruction at %s, line %d\n", - __FILE__, __LINE__); + __FILE__, __LINE__)); return; } @@ -248,9 +248,9 @@ static MemInfoDtorInfo *MemInfoDtorInfo_new(NRT_MemInfo *mi, PyObject *owner) MemInfoDtorInfo *mi_dtor_info = NULL; if (!(mi_dtor_info = (MemInfoDtorInfo *)malloc(sizeof(MemInfoDtorInfo)))) { - nrt_debug_print("DPEXRT-ERROR: Could not allocate a new " - "MemInfoDtorInfo object at %s, line %d\n", - __FILE__, __LINE__); + NRT_Debug(nrt_debug_print("DPEXRT-ERROR: Could not allocate a new " + "MemInfoDtorInfo object at %s, line %d\n", + __FILE__, __LINE__)); return NULL; } mi_dtor_info->mi = mi; @@ -283,16 +283,18 @@ static NRT_MemInfo *NRT_MemInfo_new_from_usmndarray(PyObject *ndarrobj, // Allocate a new NRT_MemInfo object if (!(mi = (NRT_MemInfo *)malloc(sizeof(NRT_MemInfo)))) { - nrt_debug_print("DPEXRT-ERROR: Could not allocate a new NRT_MemInfo " - "object at %s, line %d\n", - __FILE__, __LINE__); + NRT_Debug(nrt_debug_print( + "DPEXRT-ERROR: Could not allocate a new NRT_MemInfo " + "object at %s, line %d\n", + __FILE__, __LINE__)); goto error; } if (!(cref = DPCTLQueue_GetContext(qref))) { - nrt_debug_print("DPEXRT-ERROR: Could not get the DPCTLSyclContext from " - "the queue object at %s, line %d\n", - __FILE__, __LINE__); + NRT_Debug(nrt_debug_print( + "DPEXRT-ERROR: Could not get the DPCTLSyclContext from " + "the queue object at %s, line %d\n", + __FILE__, __LINE__)); goto error; } @@ -301,17 +303,18 @@ static NRT_MemInfo *NRT_MemInfo_new_from_usmndarray(PyObject *ndarrobj, // Allocate a new NRT_ExternalAllocator if (!(ext_alloca = NRT_ExternalAllocator_new_for_usm(qref, usm_type))) { - nrt_debug_print("DPEXRT-ERROR: Could not allocate a new " - "NRT_ExternalAllocator object at %s, line %d\n", - __FILE__, __LINE__); + NRT_Debug( + nrt_debug_print("DPEXRT-ERROR: Could not allocate a new " + "NRT_ExternalAllocator object at %s, line %d\n", + __FILE__, __LINE__)); goto error; } // Allocate a new MemInfoDtorInfo if (!(midtor_info = MemInfoDtorInfo_new(mi, ndarrobj))) { - nrt_debug_print("DPEXRT-ERROR: Could not allocate a new " - "MemInfoDtorInfo object at %s, line %d\n", - __FILE__, __LINE__); + NRT_Debug(nrt_debug_print("DPEXRT-ERROR: Could not allocate a new " + "MemInfoDtorInfo object at %s, line %d\n", + __FILE__, __LINE__)); goto error; } @@ -323,17 +326,17 @@ static NRT_MemInfo *NRT_MemInfo_new_from_usmndarray(PyObject *ndarrobj, mi->size = nitems * itemsize; mi->external_allocator = ext_alloca; - nrt_debug_print( + NRT_Debug(nrt_debug_print( "DPEXRT-DEBUG: NRT_MemInfo_init mi=%p external_allocator=%p\n", mi, - ext_alloca); + ext_alloca)); return mi; error: - nrt_debug_print( + NRT_Debug(nrt_debug_print( "DPEXRT-ERROR: Failed inside NRT_MemInfo_new_from_usmndarray clean up " "and return NULL at %s, line %d\n", - __FILE__, __LINE__); + __FILE__, __LINE__)); free(mi); free(ext_alloca); return NULL; @@ -360,20 +363,21 @@ DPEXRT_MemInfo_alloc(npy_intp size, size_t usm_type, const char *device) DPCTLSyclDeviceRef dref = NULL; DPCTLSyclQueueRef qref = NULL; - nrt_debug_print("DPEXRT-DEBUG: Inside DPEXRT_MemInfo_alloc %s, line %d\n", - __FILE__, __LINE__); + NRT_Debug(nrt_debug_print( + "DPEXRT-DEBUG: Inside DPEXRT_MemInfo_alloc %s, line %d\n", __FILE__, + __LINE__)); // Allocate a new NRT_MemInfo object if (!(mi = (NRT_MemInfo *)malloc(sizeof(NRT_MemInfo)))) { - nrt_debug_print( - "DPEXRT-ERROR: Could not allocate a new NRT_MemInfo object.\n"); + NRT_Debug(nrt_debug_print( + "DPEXRT-ERROR: Could not allocate a new NRT_MemInfo object.\n")); goto error; } if (!(dselector = DPCTLFilterSelector_Create(device))) { - nrt_debug_print( + NRT_Debug(nrt_debug_print( "DPEXRT-ERROR: Could not create a sycl::device_selector from " "filter string: %s at %s %d.\n", - device, __FILE__, __LINE__); + device, __FILE__, __LINE__)); goto error; } @@ -403,10 +407,10 @@ DPEXRT_MemInfo_alloc(npy_intp size, size_t usm_type, const char *device) mi->size = size; mi->external_allocator = ext_alloca; - nrt_debug_print( + NRT_Debug(nrt_debug_print( "DPEXRT-DEBUG: DPEXRT_MemInfo_alloc mi=%p " "external_allocator=%p for usm_type %zu on device %s, %s at %d\n", - mi, ext_alloca, usm_type, device, __FILE__, __LINE__); + mi, ext_alloca, usm_type, device, __FILE__, __LINE__)); return mi; @@ -494,20 +498,22 @@ static int DPEXRT_sycl_usm_ndarray_from_python(PyObject *obj, // collecting the array. Py_IncRef(obj); - nrt_debug_print("DPEXRT-DEBUG: In DPEXRT_sycl_usm_ndarray_from_python.\n"); + NRT_Debug(nrt_debug_print( + "DPEXRT-DEBUG: In DPEXRT_sycl_usm_ndarray_from_python.\n")); // Check if the PyObject obj has an _array_obj attribute that is of // dpctl.tensor.usm_ndarray type. if (!(arrayobj = PyUSMNdArray_ARRAYOBJ(obj))) { - nrt_debug_print("DPEXRT-ERROR: PyUSMNdArray_ARRAYOBJ check failed %d\n", - __FILE__, __LINE__); + NRT_Debug(nrt_debug_print( + "DPEXRT-ERROR: PyUSMNdArray_ARRAYOBJ check failed %d\n", __FILE__, + __LINE__)); goto error; } if (!(ndim = UsmNDArray_GetNDim(arrayobj))) { - nrt_debug_print( + NRT_Debug(nrt_debug_print( "DPEXRT-ERROR: UsmNDArray_GetNDim returned 0 at %s, line %d\n", - __FILE__, __LINE__); + __FILE__, __LINE__)); goto error; } shape = UsmNDArray_GetShape(arrayobj); @@ -516,21 +522,20 @@ static int DPEXRT_sycl_usm_ndarray_from_python(PyObject *obj, nitems = product_of_shape(shape, ndim); itemsize = (npy_intp)UsmNDArray_GetElementSize(arrayobj); if (!(qref = UsmNDArray_GetQueueRef(arrayobj))) { - nrt_debug_print("DPEXRT-ERROR: UsmNDArray_GetQueueRef returned NULL at " - "%s, line %d.\n", - __FILE__, __LINE__); + NRT_Debug(nrt_debug_print( + "DPEXRT-ERROR: UsmNDArray_GetQueueRef returned NULL at " + "%s, line %d.\n", + __FILE__, __LINE__)); goto error; } - else { - nrt_debug_print("qref addr : %p\n", qref); - } if (!(arystruct->meminfo = NRT_MemInfo_new_from_usmndarray( obj, data, nitems, itemsize, qref))) { - nrt_debug_print("DPEXRT-ERROR: NRT_MemInfo_new_from_usmndarray failed " - "at %s, line %d.\n", - __FILE__, __LINE__); + NRT_Debug(nrt_debug_print( + "DPEXRT-ERROR: NRT_MemInfo_new_from_usmndarray failed " + "at %s, line %d.\n", + __FILE__, __LINE__)); goto error; } @@ -559,16 +564,6 @@ static int DPEXRT_sycl_usm_ndarray_from_python(PyObject *obj, *p = 1; } - // --- DEBUG - nrt_debug_print("DPEXRT-DEBUG: Assigned shape_and_strides at %s, line %d\n", - __FILE__, __LINE__); - p = arystruct->shape_and_strides; - for (i = 0; i < ndim * 2; ++i, ++p) { - nrt_debug_print("DPEXRT-DEBUG: arraystruct->p[%d] = %d, ", i, *p); - } - nrt_debug_print("\n"); - // -- DEBUG - return 0; error: @@ -576,9 +571,10 @@ static int DPEXRT_sycl_usm_ndarray_from_python(PyObject *obj, // code of -1. // Decref the Pyobject of the array // ensure the GIL - nrt_debug_print("DPEXRT-ERROR: Failed to unbox dpnp ndarray into a Numba " - "arraystruct at %s, line %d\n", - __FILE__, __LINE__); + NRT_Debug(nrt_debug_print( + "DPEXRT-ERROR: Failed to unbox dpnp ndarray into a Numba " + "arraystruct at %s, line %d\n", + __FILE__, __LINE__)); gstate = PyGILState_Ensure(); // decref the python object Py_DECREF(obj); @@ -608,7 +604,7 @@ static PyObject *box_from_arystruct_parent(arystruct_t *arystruct, PyObject *array = arystruct->parent; struct PyUSMArrayObject *arrayobj = NULL; - nrt_debug_print("DPEXRT-DEBUG: In try_to_return_parent.\n"); + NRT_Debug(nrt_debug_print("DPEXRT-DEBUG: In try_to_return_parent.\n")); if (!(arrayobj = PyUSMNdArray_ARRAYOBJ(arystruct->parent))) return NULL; @@ -647,8 +643,8 @@ static PyObject *box_from_arystruct_parent(arystruct_t *arystruct, // parent, we need to increment the reference count of the parent here. Py_IncRef(array); - nrt_debug_print( - "DPEXRT-DEBUG: try_to_return_parent found a valid parent.\n"); + NRT_Debug(nrt_debug_print( + "DPEXRT-DEBUG: try_to_return_parent found a valid parent.\n")); /* Yes, it is the same array return a new reference */ return array; @@ -683,29 +679,31 @@ DPEXRT_sycl_usm_ndarray_to_python_acqref(arystruct_t *arystruct, MemInfoObject *miobj = NULL; npy_intp *shape = NULL, *strides = NULL; int typenum = 0; + int status = 0; - nrt_debug_print( - "DPEXRT-DEBUG: In DPEXRT_sycl_usm_ndarray_to_python_acqref.\n"); + NRT_Debug(nrt_debug_print( + "DPEXRT-DEBUG: In DPEXRT_sycl_usm_ndarray_to_python_acqref.\n")); if (descr == NULL) { PyErr_Format( PyExc_RuntimeError, "In 'DPEXRT_sycl_usm_ndarray_to_python_acqref', 'descr' is NULL"); - return NULL; + return MOD_ERROR_VAL; } if (!NUMBA_PyArray_DescrCheck(descr)) { PyErr_Format(PyExc_TypeError, "expected dtype object, got '%.200s'", Py_TYPE(descr)->tp_name); - return NULL; + return MOD_ERROR_VAL; } // If the arystruct has a parent attribute, try to box the parent and // return it. if (arystruct->parent) { - nrt_debug_print("DPEXRT-DEBUG: arystruct has a parent, therefore " - "trying to box and return the parent at %s, line %d\n", - __FILE__, __LINE__); + NRT_Debug(nrt_debug_print( + "DPEXRT-DEBUG: arystruct has a parent, therefore " + "trying to box and return the parent at %s, line %d\n", + __FILE__, __LINE__)); PyObject *obj = box_from_arystruct_parent(arystruct, ndim, descr); if (obj) { @@ -724,33 +722,35 @@ DPEXRT_sycl_usm_ndarray_to_python_acqref(arystruct_t *arystruct, // Python manage the lifetime of the memory. if (arystruct->meminfo) { // wrap into MemInfoObject - miobj = PyObject_New(MemInfoObject, &MemInfoType); + if (!(miobj = PyObject_New(MemInfoObject, &MemInfoType))) { + PyErr_Format(PyExc_ValueError, + "In 'DPEXRT_sycl_usm_ndarray_to_python_acqref', " + "failed to create a new MemInfoObject object."); + return MOD_ERROR_VAL; + }; args = PyTuple_New(1); // PyTuple_SET_ITEM steals reference PyTuple_SET_ITEM(args, 0, PyLong_FromVoidPtr(arystruct->meminfo)); - NRT_Debug(nrt_debug_print( - "NRT_adapt_ndarray_to_python arystruct->meminfo=%p\n", - arystruct->meminfo)); - - NRT_Debug(nrt_debug_print( - "NRT_adapt_ndarray_to_python_acqref created MemInfo=%p\n", miobj)); - // Note: MemInfo_init() does not incref. The function steals the // NRT reference, which we need to acquire. // Increase the refcount of the NRT_MemInfo object, i.e., mi->refct++ NRT_MemInfo_acquire(arystruct->meminfo); - - if (MemInfo_init(miobj, args, NULL)) { - NRT_Debug(nrt_debug_print("MemInfo_init failed.\n")); - return NULL; + status = MemInfo_init(miobj, args, NULL); + if (status != 0) { + NRT_Debug(nrt_debug_print("MemInfo_init failed at %s, line %d\n", + __FILE__, __LINE__)); + Py_DECREF(args); + PyErr_Format(PyExc_ValueError, + "In 'DPEXRT_sycl_usm_ndarray_to_python_acqref', " + "failed to init MemInfoObject object."); + return MOD_ERROR_VAL; } Py_DECREF(args); } shape = arystruct->shape_and_strides; strides = shape + ndim; - typenum = descr->type_num; usm_ndarr_obj = UsmNDArray_MakeFromPtr( ndim, shape, typenum, strides, (DPCTLSyclUSMRef)arystruct->data, @@ -760,12 +760,18 @@ DPEXRT_sycl_usm_ndarray_to_python_acqref(arystruct_t *arystruct, if (usm_ndarr_obj == NULL || !PyObject_TypeCheck(usm_ndarr_obj, &PyUSMArrayType)) { - return NULL; + PyErr_Format(PyExc_ValueError, + "In 'DPEXRT_sycl_usm_ndarray_to_python_acqref', " + "failed to create a new dpctl.tensor.usm_ndarray object."); + return MOD_ERROR_VAL; } // call new on dpnp_array dpnp_array_mod = PyImport_ImportModule("dpnp.dpnp_array"); if (!dpnp_array_mod) { + PyErr_Format(PyExc_ValueError, + "In 'DPEXRT_sycl_usm_ndarray_to_python_acqref', " + "failed to load the dpnp.dpnp_array module."); return MOD_ERROR_VAL; } dpnp_array_type = PyObject_GetAttrString(dpnp_array_mod, "dpnp_array"); @@ -773,6 +779,9 @@ DPEXRT_sycl_usm_ndarray_to_python_acqref(arystruct_t *arystruct, if (!PyType_Check(dpnp_array_type)) { Py_DECREF(dpnp_array_mod); Py_XDECREF(dpnp_array_type); + PyErr_Format(PyExc_ValueError, + "In 'DPEXRT_sycl_usm_ndarray_to_python_acqref', " + "failed to crate dpnp.dpnp_array PyTypeObject."); return MOD_ERROR_VAL; } @@ -780,35 +789,32 @@ DPEXRT_sycl_usm_ndarray_to_python_acqref(arystruct_t *arystruct, dpnp_array_type_obj = (PyTypeObject *)(dpnp_array_type); - dpnp_ary = (PyObject *)dpnp_array_type_obj->tp_new( - dpnp_array_type_obj, PyTuple_New(0), PyDict_New()); + if (!(dpnp_ary = (PyObject *)dpnp_array_type_obj->tp_new( + dpnp_array_type_obj, PyTuple_New(0), PyDict_New()))) + { + PyErr_SetString(PyExc_ValueError, + "In 'DPEXRT_sycl_usm_ndarray_to_python_acqref', " + "creating a dpnp.ndarray object from " + "a dpctl.tensor.usm_ndarray failed."); + return MOD_ERROR_VAL; + }; - if (dpnp_ary == NULL) { - nrt_debug_print("dpnp_ary==NULL \n"); - } - else { - nrt_debug_print("dpnp_ary=%p \n", dpnp_ary); - } - int status = PyObject_SetAttrString((PyObject *)dpnp_ary, "_array_obj", - usm_ndarr_obj); - nrt_debug_print("returning from status \n"); + status = PyObject_SetAttrString((PyObject *)dpnp_ary, "_array_obj", + usm_ndarr_obj); if (status == -1) { - nrt_debug_print("returning from status ==NULL \n"); Py_DECREF(dpnp_array_type_obj); - PyErr_SetString(PyExc_TypeError, "Oh no!"); + PyErr_SetString(PyExc_TypeError, + "In 'DPEXRT_sycl_usm_ndarray_to_python_acqref', " + "could not extract '_array_obj' attribute from " + "dpnp.ndarray object."); return (PyObject *)NULL; } - nrt_debug_print( - "returning from DPEXRT_sycl_usm_ndarray_to_python_acqref 1 \n"); - if (dpnp_ary == NULL) { - nrt_debug_print( - "returning from DPEXRT_sycl_usm_ndarray_to_python_acqref 2\n"); - return NULL; - } + NRT_Debug(nrt_debug_print( + "Returning from DPEXRT_sycl_usm_ndarray_to_python_acqref " + "at %s, line %d\n", + __FILE__, __LINE__)); - nrt_debug_print( - "returning from DPEXRT_sycl_usm_ndarray_to_python_acqref\n"); return (PyObject *)dpnp_ary; } @@ -853,7 +859,10 @@ static PyObject *build_c_helpers_dict(void) MOD_INIT(_dpexrt_python) { - PyObject *m; + PyObject *m = NULL; + PyObject *dpnp_array_type = NULL; + PyObject *dpnp_array_mod = NULL; + MOD_DEF(m, "_dpexrt_python", "No docs", NULL) if (m == NULL) return MOD_ERROR_VAL; @@ -861,14 +870,12 @@ MOD_INIT(_dpexrt_python) import_array(); import_dpctl(); - PyObject *dpnp_array_mod = PyImport_ImportModule("dpnp.dpnp_array"); - + dpnp_array_mod = PyImport_ImportModule("dpnp.dpnp_array"); if (!dpnp_array_mod) { Py_DECREF(m); return MOD_ERROR_VAL; } - PyObject *dpnp_array_type = - PyObject_GetAttrString(dpnp_array_mod, "dpnp_array"); + dpnp_array_type = PyObject_GetAttrString(dpnp_array_mod, "dpnp_array"); if (!PyType_Check(dpnp_array_type)) { Py_DECREF(m); Py_DECREF(dpnp_array_mod); @@ -876,10 +883,7 @@ MOD_INIT(_dpexrt_python) return MOD_ERROR_VAL; } PyModule_AddObject(m, "dpnp_array_type", dpnp_array_type); - Py_DECREF(dpnp_array_mod); - static PyTypeObject *dpnp_array_type_obj; - dpnp_array_type_obj = (PyTypeObject *)(dpnp_array_type); PyModule_AddObject(m, "NRT_ExternalAllocator_new_for_usm", PyLong_FromVoidPtr(&NRT_ExternalAllocator_new_for_usm)); From fbd769d33cedcddc65e971a0c5c2873355a6728b Mon Sep 17 00:00:00 2001 From: "Wang, Mingjie1" Date: Wed, 15 Feb 2023 16:47:17 -0600 Subject: [PATCH 13/15] Added _stdatomic.h for Windows build. --- numba_dpex/core/runtime/_nrt_helper.c | 9 ++++++- numba_dpex/core/runtime/_stdatomic.h | 34 +++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 numba_dpex/core/runtime/_stdatomic.h diff --git a/numba_dpex/core/runtime/_nrt_helper.c b/numba_dpex/core/runtime/_nrt_helper.c index 576d704a4f..4a29eb2fbe 100644 --- a/numba_dpex/core/runtime/_nrt_helper.c +++ b/numba_dpex/core/runtime/_nrt_helper.c @@ -4,8 +4,15 @@ #include "_nrt_helper.h" #include -#include + #include + +#ifdef _MSC_VER +#include "_stdatomic.h" +#else +#include +#endif + /* * Global resources. */ diff --git a/numba_dpex/core/runtime/_stdatomic.h b/numba_dpex/core/runtime/_stdatomic.h new file mode 100644 index 0000000000..87c752e713 --- /dev/null +++ b/numba_dpex/core/runtime/_stdatomic.h @@ -0,0 +1,34 @@ +// SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +//===----------------------------------------------------------------------===// +/// +/// \file +/// Define a small subset of stdatomic.h to compile on Windows. +/// +//===----------------------------------------------------------------------===// + +#ifndef COMPAT_ATOMICS_WIN32_STDATOMIC_H +#define COMPAT_ATOMICS_WIN32_STDATOMIC_H + +#define WIN32_LEAN_AND_MEAN +#include +#include +#include + +#define ATOMIC_FLAG_INIT 0 + +#define ATOMIC_VAR_INIT(value) (value) + +typedef intptr_t atomic_size_t; + +#ifdef _WIN64 +#define atomic_fetch_add(object, operand) \ + InterlockedExchangeAdd64(object, operand) +#endif /* _WIN64 */ + +#define atomic_fetch_add_explicit(object, operand, order) \ + atomic_fetch_add(object, operand) + +#endif /* COMPAT_ATOMICS_WIN32_STDATOMIC_H */ From 0c07bf71f9b9e96f7cc9f2866eedc5aa4f3ebffa Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Thu, 16 Feb 2023 00:44:06 -0600 Subject: [PATCH 14/15] Update actions used in conda-package workflow. --- .github/workflows/conda-package.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 455e27fada..0091ab7dac 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -36,7 +36,7 @@ jobs: run: ./scripts/build_conda_package.sh ${{ matrix.python }} - name: Upload artifact - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} path: /usr/share/miniconda/conda-bld/linux-64/${{ env.PACKAGE_NAME }}-*.tar.bz2 @@ -64,7 +64,7 @@ jobs: activate-environment: "" - name: Cache conda packages - uses: actions/cache@v2 + uses: actions/cache@v3 env: CACHE_NUMBER: 0 # Increase to reset cache with: @@ -79,7 +79,7 @@ jobs: - name: Build conda package run: conda build --no-test --python ${{ matrix.python }} ${{ env.CHANNELS }} conda-recipe - name: Upload artifact - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} ${{ matrix.artifact_name }} path: ${{ env.conda-bld }}${{ env.PACKAGE_NAME }}-*.tar.bz2 @@ -96,7 +96,7 @@ jobs: steps: - name: Download artifact - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} @@ -195,7 +195,7 @@ jobs: - name: Collect dependencies run: conda install ${{ env.PACKAGE_NAME }} python=${{ matrix.python }} -c $env:GITHUB_WORKSPACE/channel ${{ env.CHANNELS }} --only-deps --dry-run > lockfile - name: Cache conda packages - uses: actions/cache@v2 + uses: actions/cache@v3 env: CACHE_NUMBER: 1 # Increase to reset cache with: @@ -265,7 +265,7 @@ jobs: python: ["3.8", "3.9", "3.10"] steps: - name: Download artifact - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} @@ -293,7 +293,7 @@ jobs: python: ["3.8", "3.9", "3.10"] steps: - name: Download artifact - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} From 70e93bc5967abbacc009cd6611e63d595c14c114 Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Thu, 16 Feb 2023 01:06:54 -0600 Subject: [PATCH 15/15] Convert non-None dtypes to Numba dtype in side dpnp.empty. --- numba_dpex/dpnp_iface/arrayobj.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/numba_dpex/dpnp_iface/arrayobj.py b/numba_dpex/dpnp_iface/arrayobj.py index 25835c6cad..7d583b61ed 100644 --- a/numba_dpex/dpnp_iface/arrayobj.py +++ b/numba_dpex/dpnp_iface/arrayobj.py @@ -8,6 +8,7 @@ from numba import errors, types from numba.core import cgutils from numba.core.typing import signature +from numba.core.typing.npydecl import parse_dtype as ty_parse_dtype from numba.core.typing.npydecl import parse_shape from numba.extending import intrinsic, overload, overload_classmethod from numba.np.arrayobj import ( @@ -16,6 +17,7 @@ make_array, populate_array, ) +from numba.np.numpy_support import is_nonelike from numba_dpex.core.runtime import context as dpexrt from numba_dpex.core.types import DpnpNdArray @@ -263,6 +265,15 @@ def ol_dpnp_empty( if not ndim: raise errors.TypingError("Could not infer the rank of the ndarray") + # If a dtype value was passed in, then try to convert it to the + # coresponding Numba type. If None was passed, the default, then pass None + # to the DpnpNdArray constructor. The default dtype will be derived based + # on the behavior defined in dpctl.tensor.usm_ndarray. + if not is_nonelike(dtype): + nb_dtype = ty_parse_dtype(dtype) + else: + nb_dtype = None + if usm_type is not None: usm_type = _parse_usm_type(usm_type) else: @@ -275,7 +286,7 @@ def ol_dpnp_empty( if ndim is not None: retty = DpnpNdArray( - dtype=dtype, + dtype=nb_dtype, ndim=ndim, usm_type=usm_type, device=device,