diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index e7cd7cd898d5b..48ee01c809efd 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -106,7 +106,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float64_t, ndim=2] values, ndarray[intp_t] labels, - Py_ssize_t min_count=-1): + Py_ssize_t min_count=-1) -> None: """ Only aggregates on axis=0 """ @@ -148,7 +148,7 @@ def group_cumprod_float64(float64_t[:, ::1] out, const intp_t[:] labels, int ngroups, bint is_datetimelike, - bint skipna=True): + bint skipna=True) -> None: """ Cumulative product of columns of `values`, in row groups `labels`. @@ -205,7 +205,7 @@ def group_cumsum(numeric[:, ::1] out, const intp_t[:] labels, int ngroups, is_datetimelike, - bint skipna=True): + bint skipna=True) -> None: """ Cumulative sum of columns of `values`, in row groups `labels`. @@ -270,7 +270,7 @@ def group_cumsum(numeric[:, ::1] out, @cython.boundscheck(False) @cython.wraparound(False) def group_shift_indexer(int64_t[::1] out, const intp_t[:] labels, - int ngroups, int periods): + int ngroups, int periods) -> None: cdef: Py_ssize_t N, i, j, ii, lab int offset = 0, sign @@ -322,14 +322,14 @@ def group_shift_indexer(int64_t[::1] out, const intp_t[:] labels, @cython.wraparound(False) @cython.boundscheck(False) def group_fillna_indexer(ndarray[int64_t] out, ndarray[intp_t] labels, - ndarray[uint8_t] mask, object direction, - int64_t limit, bint dropna): + ndarray[uint8_t] mask, str direction, + int64_t limit, bint dropna) -> None: """ Indexes how to fill values forwards or backwards within a group. Parameters ---------- - out : np.ndarray[np.uint8] + out : np.ndarray[np.int64] Values into which this method will write its results. labels : np.ndarray[np.intp] Array containing unique label for each group, with its ordering @@ -392,8 +392,8 @@ def group_any_all(uint8_t[::1] out, const uint8_t[::1] values, const intp_t[:] labels, const uint8_t[::1] mask, - object val_test, - bint skipna): + str val_test, + bint skipna) -> None: """ Aggregated boolean values to show truthfulness of group elements. @@ -465,7 +465,7 @@ def group_add(complexfloating_t[:, ::1] out, int64_t[::1] counts, ndarray[complexfloating_t, ndim=2] values, const intp_t[:] labels, - Py_ssize_t min_count=0): + Py_ssize_t min_count=0) -> None: """ Only aggregates on axis=0 using Kahan summation """ @@ -518,7 +518,7 @@ def group_prod(floating[:, ::1] out, int64_t[::1] counts, ndarray[floating, ndim=2] values, const intp_t[:] labels, - Py_ssize_t min_count=0): + Py_ssize_t min_count=0) -> None: """ Only aggregates on axis=0 """ @@ -568,7 +568,7 @@ def group_var(floating[:, ::1] out, ndarray[floating, ndim=2] values, const intp_t[:] labels, Py_ssize_t min_count=-1, - int64_t ddof=1): + int64_t ddof=1) -> None: cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) floating val, ct, oldmean @@ -621,7 +621,7 @@ def group_mean(floating[:, ::1] out, int64_t[::1] counts, ndarray[floating, ndim=2] values, const intp_t[::1] labels, - Py_ssize_t min_count=-1): + Py_ssize_t min_count=-1) -> None: cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) floating val, count, y, t @@ -673,7 +673,7 @@ def group_ohlc(floating[:, ::1] out, int64_t[::1] counts, ndarray[floating, ndim=2] values, const intp_t[:] labels, - Py_ssize_t min_count=-1): + Py_ssize_t min_count=-1) -> None: """ Only aggregates on axis=0 """ @@ -721,7 +721,7 @@ def group_quantile(ndarray[float64_t] out, ndarray[intp_t] labels, ndarray[uint8_t] mask, float64_t q, - object interpolation): + str interpolation) -> None: """ Calculate the quantile per group. @@ -733,8 +733,6 @@ def group_quantile(ndarray[float64_t] out, Array containing the values to apply the function against. labels : ndarray[np.intp] Array containing the unique group labels. - values : ndarray - Array containing the values to apply the function against. q : float The quantile value to search for. interpolation : {'linear', 'lower', 'highest', 'nearest', 'midpoint'} @@ -865,7 +863,7 @@ def group_last(rank_t[:, ::1] out, int64_t[::1] counts, ndarray[rank_t, ndim=2] values, const intp_t[:] labels, - Py_ssize_t min_count=-1): + Py_ssize_t min_count=-1) -> None: """ Only aggregates on axis=0 """ @@ -957,8 +955,9 @@ def group_nth(rank_t[:, ::1] out, int64_t[::1] counts, ndarray[rank_t, ndim=2] values, const intp_t[:] labels, - int64_t min_count=-1, int64_t rank=1 - ): + int64_t min_count=-1, + int64_t rank=1, + ) -> None: """ Only aggregates on axis=0 """ @@ -1050,8 +1049,8 @@ def group_rank(float64_t[:, ::1] out, ndarray[rank_t, ndim=2] values, const intp_t[:] labels, int ngroups, - bint is_datetimelike, object ties_method="average", - bint ascending=True, bint pct=False, object na_option="keep"): + bint is_datetimelike, str ties_method="average", + bint ascending=True, bint pct=False, str na_option="keep") -> None: """ Provides the rank of values within each group. @@ -1221,7 +1220,7 @@ def group_max(groupby_t[:, ::1] out, int64_t[::1] counts, ndarray[groupby_t, ndim=2] values, const intp_t[:] labels, - Py_ssize_t min_count=-1): + Py_ssize_t min_count=-1) -> None: """See group_min_max.__doc__""" group_min_max(out, counts, values, labels, min_count=min_count, compute_max=True) @@ -1232,7 +1231,7 @@ def group_min(groupby_t[:, ::1] out, int64_t[::1] counts, ndarray[groupby_t, ndim=2] values, const intp_t[:] labels, - Py_ssize_t min_count=-1): + Py_ssize_t min_count=-1) -> None: """See group_min_max.__doc__""" group_min_max(out, counts, values, labels, min_count=min_count, compute_max=False) @@ -1311,7 +1310,7 @@ def group_cummin(groupby_t[:, ::1] out, ndarray[groupby_t, ndim=2] values, const intp_t[:] labels, int ngroups, - bint is_datetimelike): + bint is_datetimelike) -> None: """See group_cummin_max.__doc__""" group_cummin_max(out, values, labels, ngroups, is_datetimelike, compute_max=False) @@ -1322,6 +1321,6 @@ def group_cummax(groupby_t[:, ::1] out, ndarray[groupby_t, ndim=2] values, const intp_t[:] labels, int ngroups, - bint is_datetimelike): + bint is_datetimelike) -> None: """See group_cummin_max.__doc__""" group_cummin_max(out, values, labels, ngroups, is_datetimelike, compute_max=True) diff --git a/pandas/_libs/hashtable.pxd b/pandas/_libs/hashtable.pxd index 735d8c07f4774..a5679af44ac06 100644 --- a/pandas/_libs/hashtable.pxd +++ b/pandas/_libs/hashtable.pxd @@ -134,6 +134,6 @@ cdef class Int64Vector: cdef bint external_view_exists cdef resize(self) - cpdef to_array(self) + cpdef ndarray to_array(self) cdef inline void append(self, int64_t x) cdef extend(self, int64_t[:] x) diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index e402a4b7c0ccc..1e2a336f12444 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -61,7 +61,7 @@ cdef class Factorizer: ObjectVector uniques Py_ssize_t count - def __init__(self, size_hint): + def __init__(self, size_hint: int): self.table = PyObjectHashTable(size_hint) self.uniques = ObjectVector() self.count = 0 @@ -116,12 +116,12 @@ cdef class Int64Factorizer: Int64Vector uniques Py_ssize_t count - def __init__(self, size_hint): + def __init__(self, size_hint: int): self.table = Int64HashTable(size_hint) self.uniques = Int64Vector() self.count = 0 - def get_count(self): + def get_count(self) -> int: return self.count def factorize(self, const int64_t[:] values, sort=False, diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 301644274111b..b80a127be970d 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -220,7 +220,7 @@ cdef class {{name}}Vector: def __len__(self) -> int: return self.data.n - cpdef to_array(self): + cpdef ndarray to_array(self): if self.data.m != self.data.n: if self.external_view_exists: # should never happen @@ -288,7 +288,7 @@ cdef class StringVector: def __len__(self) -> int: return self.data.n - def to_array(self): + cpdef ndarray[object, ndim=1] to_array(self): cdef: ndarray ao Py_ssize_t n @@ -345,7 +345,7 @@ cdef class ObjectVector: self.data[self.n] = obj self.n += 1 - def to_array(self): + cpdef ndarray[object, ndim=1] to_array(self): if self.m != self.n: if self.external_view_exists: raise ValueError("should have raised on append()") @@ -403,7 +403,7 @@ cdef class {{name}}HashTable(HashTable): kh_destroy_{{dtype}}(self.table) self.table = NULL - def __contains__(self, object key): + def __contains__(self, object key) -> bool: cdef: khiter_t k {{c_type}} ckey @@ -452,7 +452,7 @@ cdef class {{name}}HashTable(HashTable): raise KeyError(key) @cython.boundscheck(False) - def map(self, const {{dtype}}_t[:] keys, const int64_t[:] values): + def map(self, const {{dtype}}_t[:] keys, const int64_t[:] values) -> None: cdef: Py_ssize_t i, n = len(values) int ret = 0 @@ -466,7 +466,7 @@ cdef class {{name}}HashTable(HashTable): self.table.vals[k] = values[i] @cython.boundscheck(False) - def map_locations(self, const {{dtype}}_t[:] values): + def map_locations(self, const {{dtype}}_t[:] values) -> None: cdef: Py_ssize_t i, n = len(values) int ret = 0 @@ -480,7 +480,8 @@ cdef class {{name}}HashTable(HashTable): self.table.vals[k] = i @cython.boundscheck(False) - def lookup(self, const {{dtype}}_t[:] values): + def lookup(self, const {{dtype}}_t[:] values) -> ndarray: + # -> np.ndarray[np.intp] cdef: Py_ssize_t i, n = len(values) int ret = 0 @@ -818,7 +819,8 @@ cdef class StringHashTable(HashTable): return labels @cython.boundscheck(False) - def lookup(self, ndarray[object] values): + def lookup(self, ndarray[object] values) -> ndarray: + # -> np.ndarray[np.intp] cdef: Py_ssize_t i, n = len(values) int ret = 0 @@ -853,7 +855,7 @@ cdef class StringHashTable(HashTable): return np.asarray(locs) @cython.boundscheck(False) - def map_locations(self, ndarray[object] values): + def map_locations(self, ndarray[object] values) -> None: cdef: Py_ssize_t i, n = len(values) int ret = 0 @@ -1071,7 +1073,7 @@ cdef class PyObjectHashTable(HashTable): def __len__(self) -> int: return self.table.size - def __contains__(self, object key): + def __contains__(self, object key) -> bool: cdef: khiter_t k hash(key) @@ -1123,7 +1125,7 @@ cdef class PyObjectHashTable(HashTable): else: raise KeyError(key) - def map_locations(self, ndarray[object] values): + def map_locations(self, ndarray[object] values) -> None: cdef: Py_ssize_t i, n = len(values) int ret = 0 @@ -1137,7 +1139,8 @@ cdef class PyObjectHashTable(HashTable): k = kh_put_pymap(self.table, val, &ret) self.table.vals[k] = i - def lookup(self, ndarray[object] values): + def lookup(self, ndarray[object] values) -> ndarray: + # -> np.ndarray[np.intp] cdef: Py_ssize_t i, n = len(values) int ret = 0 diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 47e6d417bb925..f1f56c6c0c855 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -132,6 +132,7 @@ cdef class IndexEngine: return self._maybe_get_bool_indexer(val) cdef _maybe_get_bool_indexer(self, object val): + # Returns ndarray[bool] or int cdef: ndarray[uint8_t, ndim=1, cast=True] indexer @@ -247,7 +248,7 @@ cdef class IndexEngine: self.need_unique_check = 0 - cdef void _call_map_locations(self, values): + cdef void _call_map_locations(self, ndarray values): self.mapping.map_locations(values) def clear_mapping(self): diff --git a/pandas/_libs/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in index e5026ce2fa292..8638c2c689c3f 100644 --- a/pandas/_libs/index_class_helper.pxi.in +++ b/pandas/_libs/index_class_helper.pxi.in @@ -44,10 +44,11 @@ cdef class {{name}}Engine(IndexEngine): raise KeyError(val) {{endif}} - cdef void _call_map_locations(self, values): + cdef void _call_map_locations(self, ndarray values): self.mapping.map_locations(algos.ensure_{{name.lower()}}(values)) cdef _maybe_get_bool_indexer(self, object val): + # Returns ndarray[bool] or int cdef: ndarray[uint8_t, ndim=1, cast=True] indexer ndarray[intp_t, ndim=1] found