327327 values. The options are `None` for the ordinary converter,
328328 `high` for the high-precision converter, and `round_trip` for the
329329 round-trip converter.
330+ cache_dates : boolean, default False
331+ If True, use a cache of unique, converted dates to apply the datetime
332+ conversion. May produce significant speed-up when parsing duplicate
333+ date strings, especially ones with timezone offsets.
334+
335+ .. versionadded:: 0.23.0
330336
331337Returns
332338-------
@@ -476,6 +482,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
476482 'false_values' : None ,
477483 'converters' : None ,
478484 'dtype' : None ,
485+ 'cache_dates' : False ,
479486
480487 'thousands' : None ,
481488 'comment' : None ,
@@ -577,6 +584,7 @@ def parser_f(filepath_or_buffer: FilePathOrBuffer,
577584 keep_date_col = False ,
578585 date_parser = None ,
579586 dayfirst = False ,
587+ cache_dates = False ,
580588
581589 # Iteration
582590 iterator = False ,
@@ -683,6 +691,7 @@ def parser_f(filepath_or_buffer: FilePathOrBuffer,
683691 keep_date_col = keep_date_col ,
684692 dayfirst = dayfirst ,
685693 date_parser = date_parser ,
694+ cache_dates = cache_dates ,
686695
687696 nrows = nrows ,
688697 iterator = iterator ,
@@ -1379,11 +1388,13 @@ def __init__(self, kwds):
13791388 self .tupleize_cols = kwds .get ('tupleize_cols' , False )
13801389 self .mangle_dupe_cols = kwds .get ('mangle_dupe_cols' , True )
13811390 self .infer_datetime_format = kwds .pop ('infer_datetime_format' , False )
1391+ self .cache_dates = kwds .pop ('cache_dates' , False )
13821392
13831393 self ._date_conv = _make_date_converter (
13841394 date_parser = self .date_parser ,
13851395 dayfirst = self .dayfirst ,
1386- infer_datetime_format = self .infer_datetime_format
1396+ infer_datetime_format = self .infer_datetime_format ,
1397+ cache_dates = self .cache_dates
13871398 )
13881399
13891400 # validate header options for mi
@@ -3173,7 +3184,7 @@ def _get_lines(self, rows=None):
31733184
31743185
31753186def _make_date_converter (date_parser = None , dayfirst = False ,
3176- infer_datetime_format = False ):
3187+ infer_datetime_format = False , cache_dates = False ):
31773188 def converter (* date_cols ):
31783189 if date_parser is None :
31793190 strs = _concat_date_cols (date_cols )
@@ -3184,16 +3195,22 @@ def converter(*date_cols):
31843195 utc = None ,
31853196 dayfirst = dayfirst ,
31863197 errors = 'ignore' ,
3187- infer_datetime_format = infer_datetime_format
3198+ infer_datetime_format = infer_datetime_format ,
3199+ cache = cache_dates
31883200 ).to_numpy ()
31893201
31903202 except ValueError :
31913203 return tools .to_datetime (
3192- parsing .try_parse_dates (strs , dayfirst = dayfirst ))
3204+ parsing .try_parse_dates (strs , dayfirst = dayfirst ),
3205+ cache = cache_dates
3206+ )
31933207 else :
31943208 try :
31953209 result = tools .to_datetime (
3196- date_parser (* date_cols ), errors = 'ignore' )
3210+ date_parser (* date_cols ),
3211+ errors = 'ignore' ,
3212+ cache = cache_dates
3213+ )
31973214 if isinstance (result , datetime .datetime ):
31983215 raise Exception ('scalar parser' )
31993216 return result
@@ -3203,6 +3220,7 @@ def converter(*date_cols):
32033220 parsing .try_parse_dates (_concat_date_cols (date_cols ),
32043221 parser = date_parser ,
32053222 dayfirst = dayfirst ),
3223+ cache = cache_dates ,
32063224 errors = 'ignore' )
32073225 except Exception :
32083226 return generic_parser (date_parser , * date_cols )
0 commit comments