11from __future__ import annotations
22
3+ from functools import wraps
34import re
45from typing import (
56 TYPE_CHECKING ,
3031 ArrayLike ,
3132 Dtype ,
3233 DtypeObj ,
34+ F ,
3335 Shape ,
3436 final ,
3537)
121123_dtype_obj = np .dtype ("object" )
122124
123125
126+ def maybe_split (meth : F ) -> F :
127+ """
128+ If we have a multi-column block, split and operate block-wise. Otherwise
129+ use the original method.
130+ """
131+
132+ @wraps (meth )
133+ def newfunc (self , * args , ** kwargs ) -> List [Block ]:
134+
135+ if self .ndim == 1 or self .shape [0 ] == 1 :
136+ return meth (self , * args , ** kwargs )
137+ else :
138+ # Split and operate column-by-column
139+ return self .split_and_operate (meth , * args , ** kwargs )
140+
141+ return cast (F , newfunc )
142+
143+
124144class Block (PandasObject ):
125145 """
126146 Canonical n-dimensional unit of homogeneous dtype contained in a pandas
@@ -464,17 +484,16 @@ def fillna(
464484 # we can't process the value, but nothing to do
465485 return [self ] if inplace else [self .copy ()]
466486
467- # operate column-by-column
468- def f (mask , val , idx ):
469- block = self .coerce_to_target_dtype (value )
470-
471- # slice out our block
472- if idx is not None :
473- # i.e. self.ndim == 2
474- block = block .getitem_block (slice (idx , idx + 1 ))
475- return block .fillna (value , limit = limit , inplace = inplace , downcast = None )
487+ elif self .ndim == 1 or self .shape [0 ] == 1 :
488+ blk = self .coerce_to_target_dtype (value )
489+ # bc we have already cast, inplace=True may avoid an extra copy
490+ return blk .fillna (value , limit = limit , inplace = True , downcast = None )
476491
477- return self .split_and_operate (None , f , inplace )
492+ else :
493+ # operate column-by-column
494+ return self .split_and_operate (
495+ type (self ).fillna , value , limit = limit , inplace = inplace , downcast = None
496+ )
478497
479498 @final
480499 def _split (self ) -> List [Block ]:
@@ -492,75 +511,27 @@ def _split(self) -> List[Block]:
492511 return new_blocks
493512
494513 @final
495- def split_and_operate (
496- self , mask , f , inplace : bool , ignore_failures : bool = False
497- ) -> List [Block ]:
514+ def split_and_operate (self , func , * args , ** kwargs ) -> List [Block ]:
498515 """
499- split the block per-column, and apply the callable f
500- per-column, return a new block for each. Handle
501- masking which will not change a block unless needed.
516+ Split the block and apply func column-by-column.
502517
503518 Parameters
504519 ----------
505- mask : 2-d boolean mask
506- f : callable accepting (1d-mask, 1d values, indexer)
507- inplace : bool
508- ignore_failures : bool, default False
520+ func : Block method
521+ *args
522+ **kwargs
509523
510524 Returns
511525 -------
512- list of blocks
526+ List[Block]
513527 """
514- if mask is None :
515- mask = np .broadcast_to (True , shape = self .shape )
516-
517- new_values = self .values
518-
519- def make_a_block (nv , ref_loc ):
520- if isinstance (nv , list ):
521- assert len (nv ) == 1 , nv
522- assert isinstance (nv [0 ], Block )
523- block = nv [0 ]
524- else :
525- # Put back the dimension that was taken from it and make
526- # a block out of the result.
527- nv = ensure_block_shape (nv , ndim = self .ndim )
528- block = self .make_block (values = nv , placement = ref_loc )
529- return block
530-
531- # ndim == 1
532- if self .ndim == 1 :
533- if mask .any ():
534- nv = f (mask , new_values , None )
535- else :
536- nv = new_values if inplace else new_values .copy ()
537- block = make_a_block (nv , self ._mgr_locs )
538- return [block ]
539-
540- # ndim > 1
541- new_blocks = []
542- for i , ref_loc in enumerate (self ._mgr_locs ):
543- m = mask [i ]
544- v = new_values [i ]
545-
546- # need a new block
547- if m .any () or m .size == 0 :
548- # Apply our function; we may ignore_failures if this is a
549- # reduction that is dropping nuisance columns GH#37827
550- try :
551- nv = f (m , v , i )
552- except TypeError :
553- if ignore_failures :
554- continue
555- else :
556- raise
557- else :
558- nv = v if inplace else v .copy ()
559-
560- block = make_a_block (nv , [ref_loc ])
561- new_blocks .append (block )
528+ assert self .ndim == 2 and self .shape [0 ] != 1
562529
563- return new_blocks
530+ res_blocks = []
531+ for nb in self ._split ():
532+ rbs = func (nb , * args , ** kwargs )
533+ res_blocks .extend (rbs )
534+ return res_blocks
564535
565536 def _maybe_downcast (self , blocks : List [Block ], downcast = None ) -> List [Block ]:
566537
@@ -600,13 +571,17 @@ def downcast(self, dtypes=None) -> List[Block]:
600571 elif dtypes != "infer" :
601572 raise AssertionError ("dtypes as dict is not supported yet" )
602573
603- # operate column-by-column
604- # this is expensive as it splits the blocks items-by-item
605- def f (mask , val , idx ):
606- val = maybe_downcast_to_dtype (val , dtype = "infer" )
607- return val
574+ return self ._downcast_2d ()
608575
609- return self .split_and_operate (None , f , False )
576+ @maybe_split
577+ def _downcast_2d (self ) -> List [Block ]:
578+ """
579+ downcast specialized to 2D case post-validation.
580+
581+ Refactored to allow use of maybe_split.
582+ """
583+ new_values = maybe_downcast_to_dtype (self .values , dtype = "infer" )
584+ return [self .make_block (new_values )]
610585
611586 @final
612587 def astype (self , dtype , copy : bool = False , errors : str = "raise" ):
@@ -735,18 +710,13 @@ def replace(
735710 # bc _can_hold_element is incorrect.
736711 return [self ] if inplace else [self .copy ()]
737712
738- if not self ._can_hold_element (value ):
739- if self .ndim == 2 and self .shape [0 ] > 1 :
740- # split so that we only upcast where necessary
741- nbs = self ._split ()
742- res_blocks = extend_blocks (
743- [
744- blk .replace (to_replace , value , inplace = inplace , regex = regex )
745- for blk in nbs
746- ]
747- )
748- return res_blocks
713+ elif self ._can_hold_element (value ):
714+ blk = self if inplace else self .copy ()
715+ putmask_inplace (blk .values , mask , value )
716+ blocks = blk .convert (numeric = False , copy = False )
717+ return blocks
749718
719+ elif self .ndim == 1 or self .shape [0 ] == 1 :
750720 blk = self .coerce_to_target_dtype (value )
751721 return blk .replace (
752722 to_replace = to_replace ,
@@ -755,10 +725,11 @@ def replace(
755725 regex = regex ,
756726 )
757727
758- blk = self if inplace else self .copy ()
759- putmask_inplace (blk .values , mask , value )
760- blocks = blk .convert (numeric = False , copy = False )
761- return blocks
728+ else :
729+ # split so that we only upcast where necessary
730+ return self .split_and_operate (
731+ type (self ).replace , to_replace , value , inplace = inplace , regex = regex
732+ )
762733
763734 @final
764735 def _replace_regex (
@@ -2048,6 +2019,8 @@ class ObjectBlock(Block):
20482019 is_object = True
20492020 _can_hold_na = True
20502021
2022+ values : np .ndarray
2023+
20512024 @property
20522025 def is_bool (self ):
20532026 """
@@ -2056,26 +2029,15 @@ def is_bool(self):
20562029 """
20572030 return lib .is_bool_array (self .values .ravel ("K" ))
20582031
2032+ @maybe_split
20592033 def reduce (self , func , ignore_failures : bool = False ) -> List [Block ]:
20602034 """
20612035 For object-dtype, we operate column-wise.
20622036 """
20632037 assert self .ndim == 2
20642038
2065- values = self .values
2066- if len (values ) > 1 :
2067- # split_and_operate expects func with signature (mask, values, inplace)
2068- def mask_func (mask , values , inplace ):
2069- if values .ndim == 1 :
2070- values = values .reshape (1 , - 1 )
2071- return func (values )
2072-
2073- return self .split_and_operate (
2074- None , mask_func , False , ignore_failures = ignore_failures
2075- )
2076-
20772039 try :
2078- res = func (values )
2040+ res = func (self . values )
20792041 except TypeError :
20802042 if not ignore_failures :
20812043 raise
@@ -2086,6 +2048,7 @@ def mask_func(mask, values, inplace):
20862048 res = res .reshape (1 , - 1 )
20872049 return [self .make_block_same_class (res )]
20882050
2051+ @maybe_split
20892052 def convert (
20902053 self ,
20912054 copy : bool = True ,
@@ -2097,30 +2060,15 @@ def convert(
20972060 attempt to cast any object types to better types return a copy of
20982061 the block (if copy = True) by definition we ARE an ObjectBlock!!!!!
20992062 """
2100-
2101- # operate column-by-column
2102- def f (mask , val , idx ):
2103- shape = val .shape
2104- values = soft_convert_objects (
2105- val .ravel (),
2106- datetime = datetime ,
2107- numeric = numeric ,
2108- timedelta = timedelta ,
2109- copy = copy ,
2110- )
2111- if isinstance (values , np .ndarray ):
2112- # TODO(EA2D): allow EA once reshape is supported
2113- values = values .reshape (shape )
2114-
2115- return values
2116-
2117- if self .ndim == 2 :
2118- blocks = self .split_and_operate (None , f , False )
2119- else :
2120- values = f (None , self .values .ravel (), None )
2121- blocks = [self .make_block (values )]
2122-
2123- return blocks
2063+ res_values = soft_convert_objects (
2064+ self .values .ravel (),
2065+ datetime = datetime ,
2066+ numeric = numeric ,
2067+ timedelta = timedelta ,
2068+ copy = copy ,
2069+ )
2070+ res_values = ensure_block_shape (res_values , self .ndim )
2071+ return [self .make_block (res_values )]
21242072
21252073 def _maybe_downcast (self , blocks : List [Block ], downcast = None ) -> List [Block ]:
21262074
0 commit comments