@@ -43,7 +43,7 @@ def _guess_datetime_format_for_array(arr, **kwargs):
4343 return _guess_datetime_format (arr [non_nan_elements [0 ]], ** kwargs )
4444
4545
46- def should_cache (arg , check_count : int , unique_share : float ):
46+ def should_cache (arg , unique_share = 0.7 , check_count = None ):
4747 """
4848 Decides whether to do caching.
4949
@@ -53,23 +53,34 @@ def should_cache(arg, check_count: int, unique_share: float):
5353 Parameters
5454 ----------
5555 arg: listlike, tuple, 1-d array, Series
56- check_count: int
57- 0 <= check_count <= len(arg)
58- unique_share: float
56+ unique_share: float or None
5957 0 < unique_share < 1
58+ check_count: int or None
59+ 0 <= check_count <= len(arg)
6060
6161 Returns
6262 -------
6363 do_caching: bool
6464 """
65- assert 0 <= check_count <= len (arg ), ('check_count must be in next bounds:'
66- ' [0; len(arg)]' )
67- assert 0 < unique_share < 1 , 'unique_share must be in next bounds: (0; 1)'
65+ do_caching = True
6866
69- if check_count == 0 :
70- return False
67+ # default realization
68+ if check_count is None :
69+ # in this case, the gain from caching is negligible
70+ if len (arg ) <= 50 :
71+ return False
7172
72- do_caching = True
73+ if len (arg ) <= 5000 :
74+ check_count = int (len (arg ) * 0.1 )
75+ else :
76+ check_count = 500
77+ else :
78+ assert 0 <= check_count <= len (arg ), \
79+ 'check_count must be in next bounds: [0; len(arg)]'
80+ assert 0 < unique_share < 1 , \
81+ 'unique_share must be in next bounds: (0; 1)'
82+ if check_count == 0 :
83+ return False
7384
7485 unique_elements = unique (arg [:check_count ])
7586 if len (unique_elements ) > check_count * unique_share :
@@ -102,7 +113,7 @@ def _maybe_cache(arg, format, cache, convert_listlike):
102113 # Perform a quicker unique check
103114 from pandas import Index
104115
105- if not should_cache (arg , int ( len ( arg ) * 0.1 ), 0.7 ):
116+ if not should_cache (arg ):
106117 return cache_array
107118
108119 unique_dates = Index (arg ).unique ()
0 commit comments