11from copy import copy
22
33from cython import Py_ssize_t
4- from cpython.ref cimport Py_INCREF
54
65from libc.stdlib cimport malloc, free
76
87import numpy as np
98cimport numpy as cnp
10- from numpy cimport (ndarray,
11- int64_t,
12- PyArray_SETITEM,
13- PyArray_ITER_NEXT, PyArray_ITER_DATA, PyArray_IterNew,
14- flatiter)
9+ from numpy cimport ndarray, int64_t
1510cnp.import_array()
1611
1712from pandas._libs cimport util
@@ -26,146 +21,6 @@ cdef _check_result_array(object obj, Py_ssize_t cnt):
2621 raise ValueError (' Function does not reduce' )
2722
2823
29- cdef class Reducer:
30- """
31- Performs generic reduction operation on a C or Fortran-contiguous ndarray
32- while avoiding ndarray construction overhead
33- """
34- cdef:
35- Py_ssize_t increment, chunksize, nresults
36- object dummy, f, labels, typ, ityp, index
37- ndarray arr
38-
39- def __init__ (
40- self , ndarray arr , object f , int axis = 1 , object dummy = None , object labels = None
41- ):
42- cdef:
43- Py_ssize_t n, k
44-
45- n, k = (< object > arr).shape
46-
47- if axis == 0 :
48- if not arr.flags.f_contiguous:
49- arr = arr.copy(' F' )
50-
51- self .nresults = k
52- self .chunksize = n
53- self .increment = n * arr.dtype.itemsize
54- else :
55- if not arr.flags.c_contiguous:
56- arr = arr.copy(' C' )
57-
58- self .nresults = n
59- self .chunksize = k
60- self .increment = k * arr.dtype.itemsize
61-
62- self .f = f
63- self .arr = arr
64- self .labels = labels
65- self .dummy, self .typ, self .index, self .ityp = self ._check_dummy(
66- dummy = dummy)
67-
68- cdef _check_dummy(self , object dummy = None ):
69- cdef:
70- object index = None , typ = None , ityp = None
71-
72- if dummy is None :
73- dummy = np.empty(self .chunksize, dtype = self .arr.dtype)
74-
75- # our ref is stolen later since we are creating this array
76- # in cython, so increment first
77- Py_INCREF(dummy)
78-
79- else :
80-
81- # we passed a Series
82- typ = type (dummy)
83- index = dummy.index
84- dummy = dummy.values
85-
86- if dummy.dtype != self .arr.dtype:
87- raise ValueError (' Dummy array must be same dtype' )
88- if len (dummy) != self .chunksize:
89- raise ValueError (f' Dummy array must be length {self.chunksize}' )
90-
91- return dummy, typ, index, ityp
92-
93- def get_result (self ):
94- cdef:
95- char * dummy_buf
96- ndarray arr, result, chunk
97- Py_ssize_t i
98- flatiter it
99- object res, name, labels
100- object cached_typ = None
101-
102- arr = self .arr
103- chunk = self .dummy
104- dummy_buf = chunk.data
105- chunk.data = arr.data
106- labels = self .labels
107-
108- result = np.empty(self .nresults, dtype = ' O' )
109- it = < flatiter> PyArray_IterNew(result)
110- reduction_success = True
111-
112- try :
113- for i in range (self .nresults):
114-
115- # create the cached type
116- # each time just reassign the data
117- if i == 0 :
118-
119- if self .typ is not None :
120- # In this case, we also have self.index
121- name = labels[i]
122- cached_typ = self .typ(
123- chunk, index = self .index, name = name, dtype = arr.dtype)
124-
125- # use the cached_typ if possible
126- if cached_typ is not None :
127- # In this case, we also have non-None labels
128- name = labels[i]
129-
130- object .__setattr__ (
131- cached_typ._mgr._block, ' values' , chunk)
132- object .__setattr__ (cached_typ, ' name' , name)
133- res = self .f(cached_typ)
134- else :
135- res = self .f(chunk)
136-
137- # TODO: reason for not squeezing here?
138- extracted_res = _extract_result(res, squeeze = False )
139- if i == 0 :
140- # On the first pass, we check the output shape to see
141- # if this looks like a reduction.
142- # If it does not, return the computed value to be used by the
143- # pure python implementation,
144- # so the function won't be called twice on the same object,
145- # and side effects would occur twice
146- try :
147- _check_result_array(extracted_res, len (self .dummy))
148- except ValueError as err:
149- if " Function does not reduce" not in str (err):
150- # catch only the specific exception
151- raise
152-
153- reduction_success = False
154- PyArray_SETITEM(result, PyArray_ITER_DATA(it), copy(res))
155- break
156-
157- PyArray_SETITEM(result, PyArray_ITER_DATA(it), extracted_res)
158- chunk.data = chunk.data + self .increment
159- PyArray_ITER_NEXT(it)
160-
161- finally :
162- # so we don't free the wrong memory
163- chunk.data = dummy_buf
164-
165- result = maybe_convert_objects(result)
166- return result, reduction_success
167-
168-
16924cdef class _BaseGrouper:
17025 cdef _check_dummy(self , object dummy):
17126 # both values and index must be an ndarray!
@@ -610,30 +465,3 @@ cdef class BlockSlider:
610465 # axis=1 is the frame's axis=0
611466 arr.data = self .base_ptrs[i]
612467 arr.shape[1 ] = 0
613-
614-
615- def compute_reduction (arr: ndarray , f , axis: int = 0 , dummy = None , labels = None ):
616- """
617-
618- Parameters
619- -----------
620- arr : np.ndarray
621- f : function
622- axis : integer axis
623- dummy : type of reduced output (series)
624- labels : Index or None
625- """
626-
627- # We either have both dummy and labels, or neither of them
628- if (labels is None ) ^ (dummy is None ):
629- raise ValueError (" Must pass either dummy and labels, or neither" )
630-
631- if labels is not None :
632- # Caller is responsible for ensuring we don't have MultiIndex
633- assert labels.nlevels == 1
634-
635- # pass as an ndarray/ExtensionArray
636- labels = labels._values
637-
638- reducer = Reducer(arr, f, axis = axis, dummy = dummy, labels = labels)
639- return reducer.get_result()
0 commit comments