diff --git a/docs/source/conf.py b/docs/source/conf.py index 90ebaa6f5f..f0ef3a41e8 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -5,10 +5,14 @@ # coding: utf-8 # Configuration file for the Sphinx documentation builder. -import numba_dpex - # -- Project information ----------------------------------------------------- +import sys + +sys.path.append(".") + +from sycl_spec_links import sycl_ext_links # noqa E402 + project = "numba-dpex" copyright = "2020-2024, Intel Corporation" author = "Intel Corporation" @@ -45,6 +49,8 @@ # This pattern also affects html_static_path and html_extra_path. exclude_patterns = [] +extlinks = {} +extlinks.update(sycl_ext_links) # -- Options for HTML output ------------------------------------------------- @@ -72,14 +78,7 @@ # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = [] -html_sidebars = { - # "**": [ - # "globaltoc.html", - # "sourcelink.html", - # "searchbox.html", - # "relations.html", - # ], -} +html_sidebars = {} html_show_sourcelink = False @@ -88,28 +87,8 @@ todo_link_only = True # -- InterSphinx configuration: looks for objects in external projects ----- -# Add here external classes you want to link from Intel SDC documentation -# Each entry of the dictionary has the following format: -# 'class name': ('link to object.inv file for that class', None) -# intersphinx_mapping = { -# 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), -# 'python': ('http://docs.python.org/2', None), -# 'numpy': ('http://docs.scipy.org/doc/numpy', None) -# } intersphinx_mapping = {} -# -- Napoleon extension configuration (Numpy and Google docstring options) ------- -# napoleon_google_docstring = True -# napoleon_numpy_docstring = True -# napoleon_include_init_with_doc = True -# napoleon_include_private_with_doc = True -# napoleon_include_special_with_doc = True -# napoleon_use_admonition_for_examples = False -# napoleon_use_admonition_for_notes = False -# napoleon_use_admonition_for_references = False -# napoleon_use_ivar = False -# napoleon_use_param = True -# napoleon_use_rtype = True # -- Prepend module name to an object name or not ----------------------------------- add_module_names = False diff --git a/docs/source/sycl_spec_links.py b/docs/source/sycl_spec_links.py new file mode 100644 index 0000000000..770d6d50b7 --- /dev/null +++ b/docs/source/sycl_spec_links.py @@ -0,0 +1,56 @@ +# SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +"""Links to the SYCL 2020 specification that are used in docstring. + +The module provides a dictionary in the format needed by the sphinx.ext.extlinks +extension. +""" + +sycl_ext_links = { + "sycl_item": ( + "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#subsec:item.class%s", + None, + ), + "sycl_group": ( + "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#group-class%s", + None, + ), + "sycl_nditem": ( + "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#subsec:nditem.class%s", + None, + ), + "sycl_ndrange": ( + "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#subsubsec:nd-range-class%s", + None, + ), + "sycl_range": ( + "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#range-class%s", + None, + ), + "sycl_atomic_ref": ( + "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:atomic-references%s", + None, + ), + "sycl_local_accessor": ( + "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:accessor.local%s", + None, + ), + "sycl_private_memory": ( + "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#_parallel_for_hierarchical_invoke%s", + None, + ), + "sycl_memory_scope": ( + "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:memory-scope%s", + None, + ), + "sycl_memory_order": ( + "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:memory-ordering%s", + None, + ), + "sycl_addr_space": ( + "https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#_address_space_classes%s", + None, + ), +} diff --git a/numba_dpex/kernel_api/atomic_fence.py b/numba_dpex/kernel_api/atomic_fence.py index 1c8ed3dabc..855181a0e6 100644 --- a/numba_dpex/kernel_api/atomic_fence.py +++ b/numba_dpex/kernel_api/atomic_fence.py @@ -4,20 +4,25 @@ """Python functions that simulate SYCL's atomic_fence primitives. """ +from .memory_enums import MemoryOrder, MemoryScope -def atomic_fence(memory_order, memory_scope): # pylint: disable=unused-argument - """The function for performing memory fence across all work-items. - Modeled after ``sycl::atomic_fence`` function. - It provides control over re-ordering of memory load - and store operations. The ``atomic_fence`` function acts as a - fence across all work-items and devices specified by a - memory_scope argument. +def atomic_fence( + memory_order: MemoryOrder, memory_scope: MemoryScope +): # pylint: disable=unused-argument + """Performs a memory fence operations across all work-items. - Args: - memory_order: The memory synchronization order. + The function is equivalent to the ``sycl::atomic_fence`` function and + controls the order of memory accesses (loads and stores) by individual + work-items. + + .. important:: + The function is a no-op during CPython execution and only available in + JIT compiled mode of execution. - memory_scope: The set of work-items and devices to which - the memory ordering constraints apply. + Args: + memory_order (MemoryOrder): The memory synchronization order. + memory_scope (MemoryScope): The set of work-items and devices to which + the memory ordering constraints apply. """ diff --git a/numba_dpex/kernel_api/atomic_ref.py b/numba_dpex/kernel_api/atomic_ref.py index ab316382f5..c1e2022f0a 100644 --- a/numba_dpex/kernel_api/atomic_ref.py +++ b/numba_dpex/kernel_api/atomic_ref.py @@ -10,7 +10,7 @@ class AtomicRef: - """Analogue to the ``sycl::atomic_ref`` class. + """Analogue to the :sycl_atomic_ref:`sycl::atomic_ref <>` class. An atomic reference is a view into a data container that can be then updated atomically using any of the ``fetch_*`` member functions of the class. diff --git a/numba_dpex/kernel_api/barrier.py b/numba_dpex/kernel_api/barrier.py index 3fd78fe80e..716c7b2d9e 100644 --- a/numba_dpex/kernel_api/barrier.py +++ b/numba_dpex/kernel_api/barrier.py @@ -9,24 +9,34 @@ from .memory_enums import MemoryScope -def group_barrier(group: Group, fence_scope=MemoryScope.WORK_GROUP): - """Performs a barrier operation across all work-items in a work group. +def group_barrier( + group: Group, fence_scope: MemoryScope = MemoryScope.WORK_GROUP +): + """Performs a barrier operation across all work-items in a work-group. - The function is modeled after the ``sycl::group_barrier`` function. It - synchronizes work within a group of work items. All the work-items + The function is equivalent to the ``sycl::group_barrier`` function. It + synchronizes work within a group of work-items. All the work-items of the group must execute the barrier call before any work-item continues execution beyond the barrier. - The ``group_barrier`` performs mem-fence operations ensuring that memory + The ``group_barrier`` performs a memory fence operation ensuring that memory accesses issued before the barrier are not re-ordered with those issued - after the barrier: all work-items in group G execute a release fence prior + after the barrier. All work-items in group G execute a release fence prior to synchronizing at the barrier, all work-items in group G execute an acquire fence afterwards, and there is an implicit synchronization of these fences as if provided by an explicit atomic operation on an atomic object. + .. important:: + The function is not implemented yet for pure CPython execution and is + only supported in JIT compiled mode of execution. + Args: - fence_scope (optional): scope of any memory consistency - operations that are performed by the barrier. + group (Group): Indicates the work-group inside which the barrier is to + be executed. + fence_scope (MemoryScope) (optional): scope of any memory + consistency operations that are performed by the barrier. + Raises: + NotImplementedError: When the function is called directly from Python. """ # TODO: A pure Python simulation of a group_barrier will be added later. diff --git a/numba_dpex/kernel_api/index_space_ids.py b/numba_dpex/kernel_api/index_space_ids.py index e53b81f58f..bc4dc7d927 100644 --- a/numba_dpex/kernel_api/index_space_ids.py +++ b/numba_dpex/kernel_api/index_space_ids.py @@ -11,7 +11,15 @@ class Group: - """Analogue to the ``sycl::group`` type.""" + # pylint: disable=line-too-long + """Analogue to the :sycl_group:`sycl::group <>` class. + + Represents a particular work-group within a parallel execution and + provides API to extract various properties of the work-group. An instance + of the class is not user-constructible. Users should use + :func:`numba_dpex.kernel_api.NdItem.get_group` to access the Group to which + a work-item belongs. + """ def __init__( self, @@ -27,12 +35,20 @@ def __init__( self._leader = False def get_group_id(self, dim): - """Returns the index of the work-group within the global nd-range for - specified dimension. + """Returns a specific coordinate of the multi-dimensional index of a group. Since the work-items in a work-group have a defined position within the global nd-range, the returned group id can be used along with the local id to uniquely identify the work-item in the global nd-range. + + Args: + dim (int): An integral value between (1..3) for which the group + index is returned. + Returns: + int: The coordinate for the ``dim`` dimension for the group's + multi-dimensional index within an nd-range. + Raises: + ValueError: If the ``dim`` argument is not in the (1..3) interval. """ if dim > len(self._index) - 1: raise ValueError( @@ -41,7 +57,12 @@ def get_group_id(self, dim): return self._index[dim] def get_group_linear_id(self): - """Returns a linearized version of the work-group index.""" + """Returns a linearized version of the work-group index. + + Returns: + int: The linearized index for the group's position within an + nd-range. + """ if self.dimensions == 1: return self.get_group_id(0) if self.dimensions == 2: @@ -59,13 +80,23 @@ def get_group_linear_id(self): ) def get_group_range(self, dim): - """Returns a the extent of the range representing the number of groups - in the nd-range for a specified dimension. + """Returns the extent of the range of groups in an nd-range for given dimension. + + Args: + dim (int): An integral value between (1..3) for which the group + index is returned. + Returns: + int: The extent of group range for the specified dimension. """ return self._group_range[dim] def get_group_linear_range(self): - """Return the total number of work-groups in the nd_range.""" + """Returns the total number of work-groups in the nd_range. + + Returns: + int: Returns the number of groups in a parallel execution of an + nd-range kernel. + """ num_wg = 1 for i in range(self.dimensions): num_wg *= self.get_group_range(i) @@ -73,14 +104,24 @@ def get_group_linear_range(self): return num_wg def get_local_range(self, dim): - """Returns the extent of the SYCL range representing all dimensions - of the local range for a specified dimension. This local range may - have been provided by the programmer, or chosen by the SYCL runtime. + """Returns the extent of the range of work-items in a work-group for given dimension. + + Args: + dim (int): An integral value between (1..3) for which the group + index is returned. + Returns: + int: The extent of the local work-item range for the specified + dimension. """ return self._local_range[dim] def get_local_linear_range(self): - """Return the total number of work-items in the work-group.""" + """Return the total number of work-items in the work-group. + + Returns: + int: Returns the linearized size of the local range inside an + nd-range. + """ num_wi = 1 for i in range(self.dimensions): num_wi *= self.get_local_range(i) @@ -89,16 +130,13 @@ def get_local_linear_range(self): @property def leader(self): - """Return true for exactly one work-item in the work-group, if the - calling work-item is the leader of the work-group, and false for all - other work-items in the work-group. + """Return true if the caller work-item is the leader of the work-group. The leader of the work-group is determined during construction of the work-group, and is invariant for the lifetime of the work-group. The leader of the work-group is guaranteed to be the work-item with a local id of 0. - Returns: bool: If the work item is the designated leader of the """ @@ -106,7 +144,8 @@ def leader(self): @property def dimensions(self) -> int: - """Returns the rank of a Group object. + """Returns the dimensionality of the range to which the work-group belongs. + Returns: int: Number of dimensions in the Group object """ @@ -119,10 +158,10 @@ def leader(self, work_item_id): class Item: - """Analogue to the ``sycl::item`` class. + """Analogue to the :sycl_item:`sycl::item <>` class. - Identifies an instance of the function object executing at each point in an - :class:`.Range`. + Identifies the work-item in a parallel execution of a kernel launched with + the :class:`.Range` index-space class. """ def __init__(self, extent: Range, index: list): @@ -130,11 +169,10 @@ def __init__(self, extent: Range, index: list): self._index = index def get_linear_id(self): - """Get the linear id associated with this item for all dimensions. - Original implementation could be found at ``sycl::item_base`` class. + """Returns the linear id associated with this item for all dimensions. Returns: - int: The linear id. + int: The linear id of the work item in the global range. """ if self.dimensions == 1: return self.get_id(0) @@ -181,7 +219,7 @@ def dimensions(self) -> int: class NdItem: - """Analogue to the ``sycl::nd_item`` class. + """Analogue to the :sycl_nditem:`sycl::nd_item <>` class. Identifies an instance of the function object executing at each point in an :class:`.NdRange`. diff --git a/numba_dpex/kernel_api/launcher.py b/numba_dpex/kernel_api/launcher.py index 0bcb3e24df..41c158594c 100644 --- a/numba_dpex/kernel_api/launcher.py +++ b/numba_dpex/kernel_api/launcher.py @@ -7,6 +7,7 @@ from inspect import signature from itertools import product +from typing import Union from .index_space_ids import Group, Item, NdItem from .local_accessor import LocalAccessor, _LocalAccessorMock @@ -112,17 +113,26 @@ def _ndrange_kernel_launcher(kernel_fn, index_range, *kernel_args): ) -def call_kernel(kernel_fn, index_range, *kernel_args): +def call_kernel(kernel_fn, index_range: Union[Range, NdRange], *kernel_args): """Mocks the launching of a kernel function over either a Range or NdRange. + .. important:: + The function is meant to be used only during prototyping a kernel_api + function in Python. To launch a JIT compiled kernel, the + :func:`numba_dpex.core.kernel_launcher.call_kernel` function should be + used. + Args: - kernel_fn : A callable function object - index_range (numba_dpex.Range): An instance of a Range object + kernel_fn : A callable function object written using + :py:mod:`numba_dpex.kernel_api`. + index_range (Range|NdRange): An instance of a Range or an NdRange object + kernel_args (List): The expanded list of actual arguments with which to + launch the kernel execution. Raises: - ValueError: If the first positional argument is not callable + ValueError: If the first positional argument is not callable. ValueError: If the second positional argument is not a Range or an - Ndrange object + Ndrange object """ if not callable(kernel_fn): raise ValueError( diff --git a/numba_dpex/kernel_api/local_accessor.py b/numba_dpex/kernel_api/local_accessor.py index 220ef884d7..3ba46a6785 100644 --- a/numba_dpex/kernel_api/local_accessor.py +++ b/numba_dpex/kernel_api/local_accessor.py @@ -10,9 +10,9 @@ class LocalAccessor: - """ - The ``LocalAccessor`` class is analogous to SYCL's ``local_accessor`` - class. The class acts a s proxy to allocating device local memory and + """Analogue to the :sycl_local_accessor:`sycl::local_accessor <>` class. + + The class acts as a proxy to allocating device local memory and accessing that memory from within a :func:`numba_dpex.kernel` decorated function. """ diff --git a/numba_dpex/kernel_api/memory_enums.py b/numba_dpex/kernel_api/memory_enums.py index 5631e88191..723fd62c2e 100644 --- a/numba_dpex/kernel_api/memory_enums.py +++ b/numba_dpex/kernel_api/memory_enums.py @@ -11,21 +11,11 @@ class MemoryOrder(FlagEnum): """ - An enumeration of the supported ``sycl::memory_order`` values. + Analogue of :sycl_memory_order:`sycl::memory_order <>` enumeration. The integer values of the enums is kept consistent with the corresponding implementation in dpcpp. - ===================== ============ - Order Enum value - ===================== ============ - RELAXED 0 - ACQUIRE 1 - CONSUME_UNSUPPORTED 2 - RELEASE 3 - ACQ_REL 4 - SEQ_CST 5 - ===================== ============ """ RELAXED = 0 @@ -38,19 +28,11 @@ class MemoryOrder(FlagEnum): class MemoryScope(FlagEnum): """ - An enumeration of the supported ``sycl::memory_scope`` values. - - For more details please refer to SYCL 2020 specification, section 3.8.3.2 - - =============== ============ - Memory Scope Enum value - =============== ============ - WORK_ITEM 0 - SUB_GROUP 1 - WORK_GROUP 2 - DEVICE 3 - SYSTEM 4 - =============== ============ + Analogue of :sycl_memory_scope:`sycl::memory_scope <>` enumeration. + + The integer values of the enums is kept consistent with the corresponding + implementation in dpcpp. + """ WORK_ITEM = 0 @@ -61,17 +43,10 @@ class MemoryScope(FlagEnum): class AddressSpace(FlagEnum): - """An enumeration of the supported address space values. - - ================== ============ - Address space Value - ================== ============ - PRIVATE 0 - GLOBAL 1 - CONSTANT 2 - LOCAL 3 - GENERIC 4 - ================== ============ + """Analogue of :sycl_addr_space:`SYCL address space classes <>`. + + The integer values of the enums is kept consistent with the corresponding + implementation in dpcpp. """ PRIVATE = 0 diff --git a/numba_dpex/kernel_api/private_array.py b/numba_dpex/kernel_api/private_array.py index 95b9a7ae2a..b83a529055 100644 --- a/numba_dpex/kernel_api/private_array.py +++ b/numba_dpex/kernel_api/private_array.py @@ -11,9 +11,12 @@ class PrivateArray: - """ - The ``PrivateArray`` class is an simple version of array intended to be used - inside kernel work item. + """An array that gets allocated on the private memory of a work-item. + + The class should be used to allocate small arrays on the private + per-work-item memory for fast accesses inside a kernel. It is similar in + intent to the :sycl_private_memory:`sycl::private_memory <>` class but is + not a direct analogue. """ def __init__(self, shape, dtype, fill_zeros=False) -> None: diff --git a/numba_dpex/kernel_api/ranges.py b/numba_dpex/kernel_api/ranges.py index e4a131544a..cf11e12f89 100644 --- a/numba_dpex/kernel_api/ranges.py +++ b/numba_dpex/kernel_api/ranges.py @@ -15,7 +15,7 @@ class Range(tuple): - """A data structure to encapsulate a single kernel launch parameter. + """Analogue to the :sycl_range:`sycl::range <>` class. The range is an abstraction that describes the number of elements in each dimension of buffers and index spaces. It can contain @@ -131,7 +131,7 @@ def dim2(self) -> int: class NdRange: - """A class to encapsulate all kernel launch parameters. + """Analogue to the :sycl_ndrange:`sycl::nd_range <>` class. The NdRange defines the index space for a work group as well as the global index space. It is passed to parallel_for to execute