@@ -34,7 +34,6 @@ class providing the base-class of operations.
3434from pandas ._libs import Timestamp
3535import pandas ._libs .groupby as libgroupby
3636from pandas ._typing import FrameOrSeries , Scalar
37- from pandas .compat import set_function_name
3837from pandas .compat .numpy import function as nv
3938from pandas .errors import AbstractMethodError
4039from pandas .util ._decorators import Appender , Substitution , cache_readonly
@@ -871,6 +870,32 @@ def _wrap_transformed_output(self, output: Mapping[base.OutputKey, np.ndarray]):
871870 def _wrap_applied_output (self , keys , values , not_indexed_same : bool = False ):
872871 raise AbstractMethodError (self )
873872
873+ def _agg_general (
874+ self , numeric_only = True , min_count = - 1 , * , alias : str , npfunc : Callable
875+ ):
876+ self ._set_group_selection ()
877+
878+ # try a cython aggregation if we can
879+ try :
880+ return self ._cython_agg_general (
881+ how = alias , alt = npfunc , numeric_only = numeric_only , min_count = min_count ,
882+ )
883+ except DataError :
884+ pass
885+ except NotImplementedError as err :
886+ if "function is not implemented for this dtype" in str (
887+ err
888+ ) or "category dtype not supported" in str (err ):
889+ # raised in _get_cython_function, in some cases can
890+ # be trimmed by implementing cython funcs for more dtypes
891+ pass
892+ else :
893+ raise
894+
895+ # apply a non-cython aggregation
896+ result = self .aggregate (lambda x : npfunc (x , axis = self .axis ))
897+ return result
898+
874899 def _cython_agg_general (
875900 self , how : str , alt = None , numeric_only : bool = True , min_count : int = - 1
876901 ):
@@ -1336,105 +1361,62 @@ def size(self):
13361361 result .name = self .obj .name
13371362 return self ._reindex_output (result , fill_value = 0 )
13381363
1339- @classmethod
1340- def _add_numeric_operations (cls ):
1341- """
1342- Add numeric operations to the GroupBy generically.
1364+ def sum (self , numeric_only = True , min_count = 0 ):
1365+ return self ._agg_general (
1366+ numeric_only = numeric_only , min_count = min_count , alias = "add" , npfunc = np .sum
1367+ )
1368+
1369+ def prod (self , numeric_only = True , min_count = 0 ):
1370+ return self ._agg_general (
1371+ numeric_only = numeric_only , min_count = min_count , alias = "prod" , npfunc = np .prod
1372+ )
1373+
1374+ def min (self , numeric_only = False , min_count = - 1 ):
1375+ return self ._agg_general (
1376+ numeric_only = numeric_only , min_count = min_count , alias = "min" , npfunc = np .min
1377+ )
1378+
1379+ def max (self , numeric_only = False , min_count = - 1 ):
1380+ return self ._agg_general (
1381+ numeric_only = numeric_only , min_count = min_count , alias = "max" , npfunc = np .max
1382+ )
1383+
1384+ @staticmethod
1385+ def _get_loc (x , axis : int = 0 , * , loc : int ):
1386+ """Helper function for first/last item that isn't NA.
13431387 """
13441388
1345- def groupby_function (
1346- name : str ,
1347- alias : str ,
1348- npfunc ,
1349- numeric_only : bool = True ,
1350- min_count : int = - 1 ,
1351- ):
1389+ def get_loc_notna (x , loc : int ):
1390+ x = x .to_numpy ()
1391+ x = x [notna (x )]
1392+ if len (x ) == 0 :
1393+ return np .nan
1394+ return x [loc ]
13521395
1353- _local_template = """
1354- Compute %(f)s of group values.
1355-
1356- Parameters
1357- ----------
1358- numeric_only : bool, default %(no)s
1359- Include only float, int, boolean columns. If None, will attempt to use
1360- everything, then use only numeric data.
1361- min_count : int, default %(mc)s
1362- The required number of valid values to perform the operation. If fewer
1363- than ``min_count`` non-NA values are present the result will be NA.
1364-
1365- Returns
1366- -------
1367- Series or DataFrame
1368- Computed %(f)s of values within each group.
1369- """
1370-
1371- @Substitution (name = "groupby" , f = name , no = numeric_only , mc = min_count )
1372- @Appender (_common_see_also )
1373- @Appender (_local_template )
1374- def func (self , numeric_only = numeric_only , min_count = min_count ):
1375- self ._set_group_selection ()
1376-
1377- # try a cython aggregation if we can
1378- try :
1379- return self ._cython_agg_general (
1380- how = alias ,
1381- alt = npfunc ,
1382- numeric_only = numeric_only ,
1383- min_count = min_count ,
1384- )
1385- except DataError :
1386- pass
1387- except NotImplementedError as err :
1388- if "function is not implemented for this dtype" in str (
1389- err
1390- ) or "category dtype not supported" in str (err ):
1391- # raised in _get_cython_function, in some cases can
1392- # be trimmed by implementing cython funcs for more dtypes
1393- pass
1394- else :
1395- raise
1396-
1397- # apply a non-cython aggregation
1398- result = self .aggregate (lambda x : npfunc (x , axis = self .axis ))
1399- return result
1400-
1401- set_function_name (func , name , cls )
1402-
1403- return func
1404-
1405- def first_compat (x , axis = 0 ):
1406- def first (x ):
1407- x = x .to_numpy ()
1408-
1409- x = x [notna (x )]
1410- if len (x ) == 0 :
1411- return np .nan
1412- return x [0 ]
1413-
1414- if isinstance (x , DataFrame ):
1415- return x .apply (first , axis = axis )
1416- else :
1417- return first (x )
1418-
1419- def last_compat (x , axis = 0 ):
1420- def last (x ):
1421- x = x .to_numpy ()
1422- x = x [notna (x )]
1423- if len (x ) == 0 :
1424- return np .nan
1425- return x [- 1 ]
1426-
1427- if isinstance (x , DataFrame ):
1428- return x .apply (last , axis = axis )
1429- else :
1430- return last (x )
1396+ if isinstance (x , DataFrame ):
1397+ return x .apply (get_loc_notna , axis = axis , loc = loc )
1398+ else :
1399+ return get_loc_notna (x , loc = loc )
1400+
1401+ def first (self , numeric_only = False , min_count = - 1 ):
1402+ first_compat = partial (self ._get_loc , loc = 0 )
14311403
1432- cls .sum = groupby_function ("sum" , "add" , np .sum , min_count = 0 )
1433- cls .prod = groupby_function ("prod" , "prod" , np .prod , min_count = 0 )
1434- cls .min = groupby_function ("min" , "min" , np .min , numeric_only = False )
1435- cls .max = groupby_function ("max" , "max" , np .max , numeric_only = False )
1436- cls .first = groupby_function ("first" , "first" , first_compat , numeric_only = False )
1437- cls .last = groupby_function ("last" , "last" , last_compat , numeric_only = False )
1404+ return self ._agg_general (
1405+ numeric_only = numeric_only ,
1406+ min_count = min_count ,
1407+ alias = "first" ,
1408+ npfunc = first_compat ,
1409+ )
1410+
1411+ def last (self , numeric_only = False , min_count = - 1 ):
1412+ last_compat = partial (self ._get_loc , loc = - 1 )
1413+
1414+ return self ._agg_general (
1415+ numeric_only = numeric_only ,
1416+ min_count = min_count ,
1417+ alias = "last" ,
1418+ npfunc = last_compat ,
1419+ )
14381420
14391421 @Substitution (name = "groupby" )
14401422 @Appender (_common_see_also )
@@ -2528,9 +2510,6 @@ def _reindex_output(
25282510 return output .reset_index (drop = True )
25292511
25302512
2531- GroupBy ._add_numeric_operations ()
2532-
2533-
25342513@Appender (GroupBy .__doc__ )
25352514def get_groupby (
25362515 obj : NDFrame ,
0 commit comments