From 71fbb11d7338deea1cb8fa5dfec7e1cbdd4c98cf Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 10 Jan 2021 11:37:13 -0500 Subject: [PATCH] ENH: DataFrame.apply to accept numpy ops as strings --- doc/source/whatsnew/v1.3.0.rst | 2 + pandas/core/apply.py | 47 +++++++++++----- pandas/tests/frame/apply/test_frame_apply.py | 53 +++++++++++++++---- .../tests/series/apply/test_series_apply.py | 10 ++-- 4 files changed, 85 insertions(+), 27 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index e978bf102dedd..db2e2ba3a2e1e 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -54,6 +54,8 @@ Other enhancements - Add support for dict-like names in :class:`MultiIndex.set_names` and :class:`MultiIndex.rename` (:issue:`20421`) - :func:`pandas.read_excel` can now auto detect .xlsb files (:issue:`35416`) - :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:`38895`) +- :meth:`DataFrame.apply` can now accept NumPy unary operators as strings, e.g. ``df.apply("sqrt")``, which was already the case for :meth:`Series.apply` (:issue:`39116`) +- :meth:`DataFrame.apply` can now accept non-callable DataFrame properties as strings, e.g. ``df.apply("size")``, which was already the case for :meth:`Series.apply` (:issue:`39116`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/apply.py b/pandas/core/apply.py index a618e2a92551d..f3e759610b784 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -151,9 +151,11 @@ def agg(self) -> Tuple[Optional[FrameOrSeriesUnion], Optional[bool]]: if _axis is None: _axis = getattr(obj, "axis", 0) - if isinstance(arg, str): - return obj._try_aggregate_string_function(arg, *args, **kwargs), None - elif is_dict_like(arg): + result = self.maybe_apply_str() + if result is not None: + return result, None + + if is_dict_like(arg): arg = cast(AggFuncTypeDict, arg) return agg_dict_like(obj, arg, _axis), True elif is_list_like(arg): @@ -171,6 +173,28 @@ def agg(self) -> Tuple[Optional[FrameOrSeriesUnion], Optional[bool]]: # caller can react return result, True + def maybe_apply_str(self) -> Optional[FrameOrSeriesUnion]: + """ + Compute apply in case of a string. + + Returns + ------- + result: Series, DataFrame, or None + Result when self.f is a string, None otherwise. + """ + f = self.f + if not isinstance(f, str): + return None + # Support for `frame.transform('method')` + # Some methods (shift, etc.) require the axis argument, others + # don't, so inspect and insert if necessary. + func = getattr(self.obj, f, None) + if callable(func): + sig = inspect.getfullargspec(func) + if "axis" in sig.args: + self.kwds["axis"] = self.axis + return self.obj._try_aggregate_string_function(f, *self.args, **self.kwds) + class FrameApply(Apply): obj: DataFrame @@ -236,15 +260,9 @@ def apply(self) -> FrameOrSeriesUnion: return self.apply_empty_result() # string dispatch - if isinstance(self.f, str): - # Support for `frame.transform('method')` - # Some methods (shift, etc.) require the axis argument, others - # don't, so inspect and insert if necessary. - func = getattr(self.obj, self.f) - sig = inspect.getfullargspec(func) - if "axis" in sig.args: - self.kwds["axis"] = self.axis - return func(*self.args, **self.kwds) + result = self.maybe_apply_str() + if result is not None: + return result # ufunc elif isinstance(self.f, np.ufunc): @@ -581,8 +599,9 @@ def apply(self) -> FrameOrSeriesUnion: return obj.aggregate(func, *args, **kwds) # if we are a string, try to dispatch - if isinstance(func, str): - return obj._try_aggregate_string_function(func, *args, **kwds) + result = self.maybe_apply_str() + if result is not None: + return result return self.apply_standard() diff --git a/pandas/tests/frame/apply/test_frame_apply.py b/pandas/tests/frame/apply/test_frame_apply.py index 9e5d1dcdea85c..2b72ba3cf2773 100644 --- a/pandas/tests/frame/apply/test_frame_apply.py +++ b/pandas/tests/frame/apply/test_frame_apply.py @@ -166,8 +166,16 @@ def test_apply_standard_nonunique(self): pytest.param([1, None], {"numeric_only": True}, id="args_and_kwds"), ], ) - def test_apply_with_string_funcs(self, float_frame, func, args, kwds): - result = float_frame.apply(func, *args, **kwds) + @pytest.mark.parametrize("how", ["agg", "apply"]) + def test_apply_with_string_funcs(self, request, float_frame, func, args, kwds, how): + if len(args) > 1 and how == "agg": + request.node.add_marker( + pytest.mark.xfail( + reason="agg/apply signature mismatch - agg passes 2nd " + "argument to func" + ) + ) + result = getattr(float_frame, how)(func, *args, **kwds) expected = getattr(float_frame, func)(*args, **kwds) tm.assert_series_equal(result, expected) @@ -1314,30 +1322,32 @@ def test_nuiscance_columns(self): ) tm.assert_frame_equal(result, expected) - def test_non_callable_aggregates(self): + @pytest.mark.parametrize("how", ["agg", "apply"]) + def test_non_callable_aggregates(self, how): # GH 16405 # 'size' is a property of frame/series # validate that this is working + # GH 39116 - expand to apply df = DataFrame( {"A": [None, 2, 3], "B": [1.0, np.nan, 3.0], "C": ["foo", None, "bar"]} ) # Function aggregate - result = df.agg({"A": "count"}) + result = getattr(df, how)({"A": "count"}) expected = Series({"A": 2}) tm.assert_series_equal(result, expected) # Non-function aggregate - result = df.agg({"A": "size"}) + result = getattr(df, how)({"A": "size"}) expected = Series({"A": 3}) tm.assert_series_equal(result, expected) # Mix function and non-function aggs - result1 = df.agg(["count", "size"]) - result2 = df.agg( + result1 = getattr(df, how)(["count", "size"]) + result2 = getattr(df, how)( {"A": ["count", "size"], "B": ["count", "size"], "C": ["count", "size"]} ) expected = DataFrame( @@ -1352,13 +1362,13 @@ def test_non_callable_aggregates(self): tm.assert_frame_equal(result2, expected, check_like=True) # Just functional string arg is same as calling df.arg() - result = df.agg("count") + result = getattr(df, how)("count") expected = df.count() tm.assert_series_equal(result, expected) # Just a string attribute arg same as calling df.arg - result = df.agg("size") + result = getattr(df, how)("size") expected = df.size assert result == expected @@ -1577,3 +1587,28 @@ def test_apply_raw_returns_string(): result = df.apply(lambda x: x[0], axis=1, raw=True) expected = Series(["aa", "bbb"]) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "op", ["abs", "ceil", "cos", "cumsum", "exp", "log", "sqrt", "square"] +) +@pytest.mark.parametrize("how", ["transform", "apply"]) +def test_apply_np_transformer(float_frame, op, how): + # GH 39116 + result = getattr(float_frame, how)(op) + expected = getattr(np, op)(float_frame) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("op", ["mean", "median", "std", "var"]) +@pytest.mark.parametrize("how", ["agg", "apply"]) +def test_apply_np_reducer(float_frame, op, how): + # GH 39116 + float_frame = DataFrame({"a": [1, 2], "b": [3, 4]}) + result = getattr(float_frame, how)(op) + # pandas ddof defaults to 1, numpy to 0 + kwargs = {"ddof": 1} if op in ("std", "var") else {} + expected = Series( + getattr(np, op)(float_frame, axis=0, **kwargs), index=float_frame.columns + ) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/apply/test_series_apply.py b/pandas/tests/series/apply/test_series_apply.py index 1242126c19527..b8c6291415ef7 100644 --- a/pandas/tests/series/apply/test_series_apply.py +++ b/pandas/tests/series/apply/test_series_apply.py @@ -338,19 +338,21 @@ def test_reduce(self, string_series): ) tm.assert_series_equal(result, expected) - def test_non_callable_aggregates(self): + @pytest.mark.parametrize("how", ["agg", "apply"]) + def test_non_callable_aggregates(self, how): # test agg using non-callable series attributes + # GH 39116 - expand to apply s = Series([1, 2, None]) # Calling agg w/ just a string arg same as calling s.arg - result = s.agg("size") + result = getattr(s, how)("size") expected = s.size assert result == expected # test when mixed w/ callable reducers - result = s.agg(["size", "count", "mean"]) + result = getattr(s, how)(["size", "count", "mean"]) expected = Series({"size": 3.0, "count": 2.0, "mean": 1.5}) - tm.assert_series_equal(result[expected.index], expected) + tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "series, func, expected",