@@ -36,9 +36,77 @@ def _guess_datetime_format_for_array(arr, **kwargs):
3636 return _guess_datetime_format (arr [non_nan_elements [0 ]], ** kwargs )
3737
3838
39+ def _maybe_cache (arg , format , cache , tz , convert_listlike ):
40+ """
41+ Create a cache of unique dates from an array of dates
42+
43+ Parameters
44+ ----------
45+ arg : integer, float, string, datetime, list, tuple, 1-d array, Series
46+ format : string
47+ Strftime format to parse time
48+ cache : boolean
49+ True attempts to create a cache of converted values
50+ tz : string
51+ Timezone of the dates
52+ convert_listlike : function
53+ Conversion function to apply on dates
54+
55+ Returns
56+ -------
57+ cache_array : Series
58+ Cache of converted, unique dates. Can be empty
59+ """
60+ from pandas import Series
61+ cache_array = Series ()
62+ if cache :
63+ # Perform a quicker unique check
64+ from pandas import Index
65+ if not Index (arg ).is_unique :
66+ unique_dates = algorithms .unique (arg )
67+ cache_dates = convert_listlike (unique_dates , True , format , tz = tz )
68+ cache_array = Series (cache_dates , index = unique_dates )
69+ return cache_array
70+
71+
72+ def _convert_and_box_cache (arg , cache_array , box , errors , name = None ):
73+ """
74+ Convert array of dates with a cache and box the result
75+
76+ Parameters
77+ ----------
78+ arg : integer, float, string, datetime, list, tuple, 1-d array, Series
79+ cache_array : Series
80+ Cache of converted, unique dates
81+ box : boolean
82+ True boxes result as an Index-like, False returns an ndarray
83+ errors : string
84+ 'ignore' plus box=True will convert result to Index
85+ name : string, default None
86+ Name for a DatetimeIndex
87+
88+ Returns
89+ -------
90+ result : datetime of converted dates
91+ Returns:
92+
93+ - Index-like if box=True
94+ - ndarray if box=False
95+ """
96+ from pandas import Series , DatetimeIndex , Index
97+ result = Series (arg ).map (cache_array )
98+ if box :
99+ if errors == 'ignore' :
100+ return Index (result )
101+ else :
102+ return DatetimeIndex (result , name = name )
103+ return result .values
104+
105+
39106def to_datetime (arg , errors = 'raise' , dayfirst = False , yearfirst = False ,
40107 utc = None , box = True , format = None , exact = True ,
41- unit = None , infer_datetime_format = False , origin = 'unix' ):
108+ unit = None , infer_datetime_format = False , origin = 'unix' ,
109+ cache = False ):
42110 """
43111 Convert argument to datetime.
44112
@@ -111,7 +179,12 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
111179 origin.
112180
113181 .. versionadded: 0.20.0
182+ cache : boolean, default False
183+ If True, use a cache of unique, converted dates to apply the datetime
184+ conversion. May produce sigificant speed-up when parsing duplicate date
185+ strings, especially ones with timezone offsets.
114186
187+ .. versionadded: 0.22.0
115188 Returns
116189 -------
117190 ret : datetime if parsing succeeded.
@@ -369,15 +442,28 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
369442 if isinstance (arg , tslib .Timestamp ):
370443 result = arg
371444 elif isinstance (arg , ABCSeries ):
372- from pandas import Series
373- values = _convert_listlike (arg ._values , True , format )
374- result = Series (values , index = arg .index , name = arg .name )
445+ cache_array = _maybe_cache (arg , format , cache , tz , _convert_listlike )
446+ if not cache_array .empty :
447+ result = arg .map (cache_array )
448+ else :
449+ from pandas import Series
450+ values = _convert_listlike (arg ._values , True , format )
451+ result = Series (values , index = arg .index , name = arg .name )
375452 elif isinstance (arg , (ABCDataFrame , MutableMapping )):
376453 result = _assemble_from_unit_mappings (arg , errors = errors )
377454 elif isinstance (arg , ABCIndexClass ):
378- result = _convert_listlike (arg , box , format , name = arg .name )
455+ cache_array = _maybe_cache (arg , format , cache , tz , _convert_listlike )
456+ if not cache_array .empty :
457+ result = _convert_and_box_cache (arg , cache_array , box , errors ,
458+ name = arg .name )
459+ else :
460+ result = _convert_listlike (arg , box , format , name = arg .name )
379461 elif is_list_like (arg ):
380- result = _convert_listlike (arg , box , format )
462+ cache_array = _maybe_cache (arg , format , cache , tz , _convert_listlike )
463+ if not cache_array .empty :
464+ result = _convert_and_box_cache (arg , cache_array , box , errors )
465+ else :
466+ result = _convert_listlike (arg , box , format )
381467 else :
382468 result = _convert_listlike (np .array ([arg ]), box , format )[0 ]
383469
0 commit comments