@@ -19,6 +19,61 @@ namespace py = pybind11;
1919using namespace amrex ;
2020
2121
22+ namespace
23+ {
24+ /* * CPU: __array_interface__ v3
25+ *
26+ * https://numpy.org/doc/stable/reference/arrays.interface.html
27+ */
28+ template <typename T>
29+ py::dict
30+ array_interface (Array4<T> const & a4)
31+ {
32+ auto d = py::dict ();
33+ auto const len = length (a4);
34+ // F->C index conversion here
35+ // p[(i-begin.x)+(j-begin.y)*jstride+(k-begin.z)*kstride+n*nstride];
36+ // Buffer dimensions: zero-size shall not skip dimension
37+ auto shape = py::make_tuple (
38+ a4.ncomp ,
39+ len.z <= 0 ? 1 : len.z ,
40+ len.y <= 0 ? 1 : len.y ,
41+ len.x <= 0 ? 1 : len.x // fastest varying index
42+ );
43+ // buffer protocol strides are in bytes, AMReX strides are elements
44+ auto const strides = py::make_tuple (
45+ sizeof (T) * a4.nstride ,
46+ sizeof (T) * a4.kstride ,
47+ sizeof (T) * a4.jstride ,
48+ sizeof (T) // fastest varying index
49+ );
50+ bool const read_only = false ;
51+ d[" data" ] = py::make_tuple (std::intptr_t (a4.dataPtr ()), read_only);
52+ // note: if we want to keep the same global indexing with non-zero
53+ // box small_end as in AMReX, then we can explore playing with
54+ // this offset as well
55+ // d["offset"] = 0; // default
56+ // d["mask"] = py::none(); // default
57+
58+ d[" shape" ] = shape;
59+ // we could also set this after checking the strides are C-style contiguous:
60+ // if (is_contiguous<T>(shape, strides))
61+ // d["strides"] = py::none(); // C-style contiguous
62+ // else
63+ d[" strides" ] = strides;
64+
65+ // type description
66+ // for more complicated types, e.g., tuples/structs
67+ // d["descr"] = ...;
68+ // we currently only need this
69+ d[" typestr" ] = py::format_descriptor<T>::format ();
70+
71+ d[" version" ] = 3 ;
72+ return d;
73+ }
74+ }
75+
76+
2277template < typename T >
2378void make_Array4 (py::module &m, std::string typestr)
2479{
@@ -85,56 +140,44 @@ void make_Array4(py::module &m, std::string typestr)
85140 return a4;
86141 }))
87142
143+
144+ // CPU: __array_interface__ v3
145+ // https://numpy.org/doc/stable/reference/arrays.interface.html
88146 .def_property_readonly (" __array_interface__" , [](Array4<T> const & a4) {
89- auto d = py::dict ();
90- auto const len = length (a4);
91- // F->C index conversion here
92- // p[(i-begin.x)+(j-begin.y)*jstride+(k-begin.z)*kstride+n*nstride];
93- // Buffer dimensions: zero-size shall not skip dimension
94- auto shape = py::make_tuple (
95- a4.ncomp ,
96- len.z <= 0 ? 1 : len.z ,
97- len.y <= 0 ? 1 : len.y ,
98- len.x <= 0 ? 1 : len.x // fastest varying index
99- );
100- // buffer protocol strides are in bytes, AMReX strides are elements
101- auto const strides = py::make_tuple (
102- sizeof (T) * a4.nstride ,
103- sizeof (T) * a4.kstride ,
104- sizeof (T) * a4.jstride ,
105- sizeof (T) // fastest varying index
106- );
107- bool const read_only = false ;
108- d[" data" ] = py::make_tuple (std::intptr_t (a4.dataPtr ()), read_only);
109- // note: if we want to keep the same global indexing with non-zero
110- // box small_end as in AMReX, then we can explore playing with
111- // this offset as well
112- // d["offset"] = 0; // default
113- // d["mask"] = py::none(); // default
114-
115- d[" shape" ] = shape;
116- // we could also set this after checking the strides are C-style contiguous:
117- // if (is_contiguous<T>(shape, strides))
118- // d["strides"] = py::none(); // C-style contiguous
119- // else
120- d[" strides" ] = strides;
121-
122- d[" typestr" ] = py::format_descriptor<T>::format ();
123- d[" version" ] = 3 ;
124- return d;
147+ return array_interface (a4);
125148 })
126149
150+ // CPU: __array_function__ interface (TODO)
151+ //
152+ // NEP 18 — A dispatch mechanism for NumPy's high level array functions.
153+ // https://numpy.org/neps/nep-0018-array-function-protocol.html
154+ // This enables code using NumPy to be directly operated on Array4 arrays.
155+ // __array_function__ feature requires NumPy 1.16 or later.
156+
127157
128- // TODO : __cuda_array_interface__
158+ // Nvidia GPUs : __cuda_array_interface__ v2
129159 // https://numba.readthedocs.io/en/latest/cuda/cuda_array_interface.html
160+ .def_property_readonly (" __cuda_array_interface__" , [](Array4<T> const & a4) {
161+ auto d = array_interface (a4);
162+
163+ // data:
164+ // Because the user of the interface may or may not be in the same context, the most common case is to use cuPointerGetAttribute with CU_POINTER_ATTRIBUTE_DEVICE_POINTER in the CUDA driver API (or the equivalent CUDA Runtime API) to retrieve a device pointer that is usable in the currently active context.
165+ // TODO For zero-size arrays, use 0 here.
166+
167+ // ... TODO: wasn't there some stream or device info?
168+
169+ d[" version" ] = 2 ;
170+ return d;
171+ })
130172
131173
132- // TODO: __dlpack__
174+ // TODO: __dlpack__ __dlpack_device__
133175 // DLPack protocol (CPU, NVIDIA GPU, AMD GPU, Intel GPU, etc.)
134176 // https://dmlc.github.io/dlpack/latest/
135177 // https://data-apis.org/array-api/latest/design_topics/data_interchange.html
136178 // https://github.com/data-apis/consortium-feedback/issues/1
137179 // https://github.com/dmlc/dlpack/blob/master/include/dlpack/dlpack.h
180+ // https://docs.cupy.dev/en/stable/user_guide/interoperability.html#dlpack-data-exchange-protocol
138181
139182
140183 .def (" contains" , &Array4<T>::contains)
0 commit comments