@@ -36,7 +36,6 @@ class providing the base-class of operations.
3636from pandas ._libs import Timestamp
3737import pandas ._libs .groupby as libgroupby
3838from pandas ._typing import FrameOrSeries , Scalar
39- from pandas .compat import set_function_name
4039from pandas .compat .numpy import function as nv
4140from pandas .errors import AbstractMethodError
4241from pandas .util ._decorators import Appender , Substitution , cache_readonly , doc
@@ -192,6 +191,24 @@ class providing the base-class of operations.
192191 """ ,
193192)
194193
194+ _groupby_agg_method_template = """
195+ Compute {fname} of group values.
196+
197+ Parameters
198+ ----------
199+ numeric_only : bool, default {no}
200+ Include only float, int, boolean columns. If None, will attempt to use
201+ everything, then use only numeric data.
202+ min_count : int, default {mc}
203+ The required number of valid values to perform the operation. If fewer
204+ than ``min_count`` non-NA values are present the result will be NA.
205+
206+ Returns
207+ -------
208+ Series or DataFrame
209+ Computed {fname} of values within each group.
210+ """
211+
195212_pipe_template = """
196213Apply a function `func` with arguments to this %(klass)s object and return
197214the function's result.
@@ -945,6 +962,37 @@ def _wrap_transformed_output(self, output: Mapping[base.OutputKey, np.ndarray]):
945962 def _wrap_applied_output (self , keys , values , not_indexed_same : bool = False ):
946963 raise AbstractMethodError (self )
947964
965+ def _agg_general (
966+ self ,
967+ numeric_only : bool = True ,
968+ min_count : int = - 1 ,
969+ * ,
970+ alias : str ,
971+ npfunc : Callable ,
972+ ):
973+ self ._set_group_selection ()
974+
975+ # try a cython aggregation if we can
976+ try :
977+ return self ._cython_agg_general (
978+ how = alias , alt = npfunc , numeric_only = numeric_only , min_count = min_count ,
979+ )
980+ except DataError :
981+ pass
982+ except NotImplementedError as err :
983+ if "function is not implemented for this dtype" in str (
984+ err
985+ ) or "category dtype not supported" in str (err ):
986+ # raised in _get_cython_function, in some cases can
987+ # be trimmed by implementing cython funcs for more dtypes
988+ pass
989+ else :
990+ raise
991+
992+ # apply a non-cython aggregation
993+ result = self .aggregate (lambda x : npfunc (x , axis = self .axis ))
994+ return result
995+
948996 def _cython_agg_general (
949997 self , how : str , alt = None , numeric_only : bool = True , min_count : int = - 1
950998 ):
@@ -1438,74 +1486,36 @@ def size(self):
14381486 result = self ._obj_1d_constructor (result )
14391487 return self ._reindex_output (result , fill_value = 0 )
14401488
1441- @classmethod
1442- def _add_numeric_operations ( cls ):
1443- """
1444- Add numeric operations to the GroupBy generically.
1445- """
1489+ @doc ( _groupby_agg_method_template , fname = "sum" , no = True , mc = 0 )
1490+ def sum ( self , numeric_only : bool = True , min_count : int = 0 ):
1491+ return self . _agg_general (
1492+ numeric_only = numeric_only , min_count = min_count , alias = "add" , npfunc = np . sum
1493+ )
14461494
1447- def groupby_function (
1448- name : str ,
1449- alias : str ,
1450- npfunc ,
1451- numeric_only : bool = True ,
1452- min_count : int = - 1 ,
1453- ):
1495+ @doc (_groupby_agg_method_template , fname = "prod" , no = True , mc = 0 )
1496+ def prod (self , numeric_only : bool = True , min_count : int = 0 ):
1497+ return self ._agg_general (
1498+ numeric_only = numeric_only , min_count = min_count , alias = "prod" , npfunc = np .prod
1499+ )
14541500
1455- _local_template = """
1456- Compute %(f)s of group values.
1457-
1458- Parameters
1459- ----------
1460- numeric_only : bool, default %(no)s
1461- Include only float, int, boolean columns. If None, will attempt to use
1462- everything, then use only numeric data.
1463- min_count : int, default %(mc)s
1464- The required number of valid values to perform the operation. If fewer
1465- than ``min_count`` non-NA values are present the result will be NA.
1466-
1467- Returns
1468- -------
1469- Series or DataFrame
1470- Computed %(f)s of values within each group.
1471- """
1472-
1473- @Substitution (name = "groupby" , f = name , no = numeric_only , mc = min_count )
1474- @Appender (_common_see_also )
1475- @Appender (_local_template )
1476- def func (self , numeric_only = numeric_only , min_count = min_count ):
1477- self ._set_group_selection ()
1478-
1479- # try a cython aggregation if we can
1480- try :
1481- return self ._cython_agg_general (
1482- how = alias ,
1483- alt = npfunc ,
1484- numeric_only = numeric_only ,
1485- min_count = min_count ,
1486- )
1487- except DataError :
1488- pass
1489- except NotImplementedError as err :
1490- if "function is not implemented for this dtype" in str (
1491- err
1492- ) or "category dtype not supported" in str (err ):
1493- # raised in _get_cython_function, in some cases can
1494- # be trimmed by implementing cython funcs for more dtypes
1495- pass
1496- else :
1497- raise
1498-
1499- # apply a non-cython aggregation
1500- result = self .aggregate (lambda x : npfunc (x , axis = self .axis ))
1501- return result
1502-
1503- set_function_name (func , name , cls )
1504-
1505- return func
1501+ @doc (_groupby_agg_method_template , fname = "min" , no = False , mc = - 1 )
1502+ def min (self , numeric_only : bool = False , min_count : int = - 1 ):
1503+ return self ._agg_general (
1504+ numeric_only = numeric_only , min_count = min_count , alias = "min" , npfunc = np .min
1505+ )
15061506
1507+ @doc (_groupby_agg_method_template , fname = "max" , no = False , mc = - 1 )
1508+ def max (self , numeric_only : bool = False , min_count : int = - 1 ):
1509+ return self ._agg_general (
1510+ numeric_only = numeric_only , min_count = min_count , alias = "max" , npfunc = np .max
1511+ )
1512+
1513+ @doc (_groupby_agg_method_template , fname = "first" , no = False , mc = - 1 )
1514+ def first (self , numeric_only : bool = False , min_count : int = - 1 ):
15071515 def first_compat (obj : FrameOrSeries , axis : int = 0 ):
15081516 def first (x : Series ):
1517+ """Helper function for first item that isn't NA.
1518+ """
15091519 x = x .array [notna (x .array )]
15101520 if len (x ) == 0 :
15111521 return np .nan
@@ -1518,8 +1528,19 @@ def first(x: Series):
15181528 else :
15191529 raise TypeError (type (obj ))
15201530
1531+ return self ._agg_general (
1532+ numeric_only = numeric_only ,
1533+ min_count = min_count ,
1534+ alias = "first" ,
1535+ npfunc = first_compat ,
1536+ )
1537+
1538+ @doc (_groupby_agg_method_template , fname = "last" , no = False , mc = - 1 )
1539+ def last (self , numeric_only : bool = False , min_count : int = - 1 ):
15211540 def last_compat (obj : FrameOrSeries , axis : int = 0 ):
15221541 def last (x : Series ):
1542+ """Helper function for last item that isn't NA.
1543+ """
15231544 x = x .array [notna (x .array )]
15241545 if len (x ) == 0 :
15251546 return np .nan
@@ -1532,12 +1553,12 @@ def last(x: Series):
15321553 else :
15331554 raise TypeError (type (obj ))
15341555
1535- cls . sum = groupby_function ( "sum" , "add" , np . sum , min_count = 0 )
1536- cls . prod = groupby_function ( "prod" , "prod" , np . prod , min_count = 0 )
1537- cls . min = groupby_function ( "min" , "min" , np . min , numeric_only = False )
1538- cls . max = groupby_function ( "max" , "max" , np . max , numeric_only = False )
1539- cls . first = groupby_function ( "first" , "first" , first_compat , numeric_only = False )
1540- cls . last = groupby_function ( "last" , "last" , last_compat , numeric_only = False )
1556+ return self . _agg_general (
1557+ numeric_only = numeric_only ,
1558+ min_count = min_count ,
1559+ alias = "last" ,
1560+ npfunc = last_compat ,
1561+ )
15411562
15421563 @Substitution (name = "groupby" )
15431564 @Appender (_common_see_also )
@@ -2637,9 +2658,6 @@ def _reindex_output(
26372658 return output .reset_index (drop = True )
26382659
26392660
2640- GroupBy ._add_numeric_operations ()
2641-
2642-
26432661@doc (GroupBy )
26442662def get_groupby (
26452663 obj : NDFrame ,
0 commit comments