44
55import numpy as np
66
7+ from pandas ._config import option_context
8+
79from pandas ._libs import reduction as libreduction
810from pandas ._typing import Axis
911from pandas .util ._decorators import cache_readonly
1012
11- from pandas .core .dtypes .common import (
12- is_dict_like ,
13- is_extension_array_dtype ,
14- is_list_like ,
15- is_sequence ,
16- )
13+ from pandas .core .dtypes .common import is_dict_like , is_list_like , is_sequence
1714from pandas .core .dtypes .generic import ABCSeries
1815
1916from pandas .core .construction import create_series_with_explicit_dtype
@@ -260,53 +257,6 @@ def apply_standard(self):
260257 # partial result that may be returned from reduction
261258 partial_result = None
262259
263- # try to reduce first (by default)
264- # this only matters if the reduction in values is of different dtype
265- # e.g. if we want to apply to a SparseFrame, then can't directly reduce
266-
267- # we cannot reduce using non-numpy dtypes,
268- # as demonstrated in gh-12244
269- if (
270- self .result_type in ["reduce" , None ]
271- and not self .dtypes .apply (is_extension_array_dtype ).any ()
272- # Disallow dtypes where setting _index_data will break
273- # ExtensionArray values, see GH#31182
274- and not self .dtypes .apply (lambda x : x .kind in ["m" , "M" ]).any ()
275- # Disallow complex_internals since libreduction shortcut raises a TypeError
276- and not self .agg_axis ._has_complex_internals
277- ):
278-
279- values = self .values
280- index = self .obj ._get_axis (self .axis )
281- labels = self .agg_axis
282- empty_arr = np .empty (len (index ), dtype = values .dtype )
283-
284- # Preserve subclass for e.g. test_subclassed_apply
285- dummy = self .obj ._constructor_sliced (
286- empty_arr , index = index , dtype = values .dtype
287- )
288-
289- try :
290- result , reduction_success = libreduction .compute_reduction (
291- values , self .f , axis = self .axis , dummy = dummy , labels = labels
292- )
293- except TypeError :
294- # e.g. test_apply_ignore_failures we just ignore
295- if not self .ignore_failures :
296- raise
297- except ZeroDivisionError :
298- # reached via numexpr; fall back to python implementation
299- pass
300- else :
301- if reduction_success :
302- return self .obj ._constructor_sliced (result , index = labels )
303-
304- # no exceptions - however reduction was unsuccessful,
305- # use the computed function result for first element
306- partial_result = result [0 ]
307- if isinstance (partial_result , ABCSeries ):
308- partial_result = partial_result .infer_objects ()
309-
310260 # compute the result using the series generator,
311261 # use the result computed while trying to reduce if available.
312262 results , res_index = self .apply_series_generator (partial_result )
@@ -344,7 +294,14 @@ def apply_series_generator(self, partial_result=None) -> Tuple[ResType, "Index"]
344294 else :
345295 for i , v in series_gen_enumeration :
346296
347- results [i ] = self .f (v )
297+ with option_context ("mode.chained_assignment" , None ):
298+ # ignore SettingWithCopy here in case the user mutates
299+ results [i ] = self .f (v )
300+
301+ if isinstance (results [i ], ABCSeries ):
302+ # If we have a view on v, we need to make a copy because
303+ # series_generator will swap out the underlying data
304+ results [i ] = results [i ].copy (deep = False )
348305
349306 return results , res_index
350307
@@ -355,7 +312,6 @@ def wrap_results(
355312
356313 # see if we can infer the results
357314 if len (results ) > 0 and 0 in results and is_sequence (results [0 ]):
358-
359315 return self .wrap_results_for_axis (results , res_index )
360316
361317 # dict of scalars
@@ -395,9 +351,30 @@ def result_columns(self) -> "Index":
395351
396352 def wrap_results_for_axis (
397353 self , results : ResType , res_index : "Index"
398- ) -> " DataFrame" :
354+ ) -> Union [ "Series" , " DataFrame"] :
399355 """ return the results for the rows """
400- result = self .obj ._constructor (data = results )
356+
357+ if self .result_type == "reduce" :
358+ # e.g. test_apply_dict GH#8735
359+ return self .obj ._constructor_sliced (results )
360+ elif self .result_type is None and all (
361+ isinstance (x , dict ) for x in results .values ()
362+ ):
363+ # Our operation was a to_dict op e.g.
364+ # test_apply_dict GH#8735, test_apply_reduce_rows_to_dict GH#25196
365+ return self .obj ._constructor_sliced (results )
366+
367+ try :
368+ result = self .obj ._constructor (data = results )
369+ except ValueError as err :
370+ if "arrays must all be same length" in str (err ):
371+ # e.g. result = [[2, 3], [1.5], ['foo', 'bar']]
372+ # see test_agg_listlike_result GH#29587
373+ res = self .obj ._constructor_sliced (results )
374+ res .index = res_index
375+ return res
376+ else :
377+ raise
401378
402379 if not isinstance (results [0 ], ABCSeries ):
403380 if len (result .index ) == len (self .res_columns ):
@@ -418,11 +395,19 @@ def apply_broadcast(self, target: "DataFrame") -> "DataFrame":
418395
419396 @property
420397 def series_generator (self ):
421- constructor = self .obj ._constructor_sliced
422- return (
423- constructor (arr , index = self .columns , name = name )
424- for i , (arr , name ) in enumerate (zip (self .values , self .index ))
425- )
398+ values = self .values
399+ assert len (values ) > 0
400+
401+ # We create one Series object, and will swap out the data inside
402+ # of it. Kids: don't do this at home.
403+ ser = self .obj ._ixs (0 , axis = 0 )
404+ mgr = ser ._mgr
405+ blk = mgr .blocks [0 ]
406+
407+ for (arr , name ) in zip (values , self .index ):
408+ blk .values = arr
409+ ser .name = name
410+ yield ser
426411
427412 @property
428413 def result_index (self ) -> "Index" :
@@ -444,9 +429,7 @@ def wrap_results_for_axis(
444429
445430 # we have a non-series and don't want inference
446431 elif not isinstance (results [0 ], ABCSeries ):
447- from pandas import Series
448-
449- result = Series (results )
432+ result = self .obj ._constructor_sliced (results )
450433 result .index = res_index
451434
452435 # we may want to infer results
0 commit comments