@@ -20,10 +20,10 @@ def _compute_P_ABBA(
2020
2121 The MPdist distance measure considers two time series to be similar if they share
2222 many subsequences, regardless of the order of matching subsequences. MPdist
23- concatenates and sorts the output of an AB-join and a BA-join and returns the value
24- of the `k`th smallest number as the reported distance. Note that MPdist is a
25- measure and not a metric. Therefore, it does not obey the triangular inequality but
26- the method is highly scalable.
23+ concatenates the output of an AB-join and a BA-join and returns the `k`th smallest
24+ value as the reported distance. Note that MPdist is a measure and not a metric.
25+ Therefore, it does not obey the triangular inequality but the method is highly
26+ scalable.
2727
2828 Parameters
2929 ----------
@@ -81,15 +81,15 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None):
8181
8282 The MPdist distance measure considers two time series to be similar if they share
8383 many subsequences, regardless of the order of matching subsequences. MPdist
84- concatenates and sorts the output of an AB-join and a BA-join and returns the value
85- of the `k`th smallest number as the reported distance. Note that MPdist is a
86- measure and not a metric. Therefore, it does not obey the triangular inequality but
87- the method is highly scalable.
84+ concatenates the output of an AB-join and a BA-join and returns the `k`th smallest
85+ value as the reported distance. Note that MPdist is a measure and not a metric.
86+ Therefore, it does not obey the triangular inequality but the method is highly
87+ scalable.
8888
8989 Parameters
9090 ----------
9191 P_ABBA : ndarray
92- A pre-sorted array resulting from the concatenation of the outputs from an
92+ An unsorted array resulting from the concatenation of the outputs from an
9393 AB-joinand BA-join for two time series, `T_A` and `T_B`
9494
9595 k : int
@@ -98,7 +98,7 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None):
9898
9999 custom_func : object, default None
100100 A custom user defined function for selecting the desired value from the
101- sorted `P_ABBA` array. This function may need to leverage `functools.partial`
101+ unsorted `P_ABBA` array. This function may need to leverage `functools.partial`
102102 and should take `P_ABBA` as its only input parameter and return a single
103103 `MPdist` value. The `percentage` and `k` parameters are ignored when
104104 `custom_func` is not None.
@@ -112,10 +112,12 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None):
112112 if custom_func is not None :
113113 MPdist = custom_func (P_ABBA )
114114 else :
115- MPdist = P_ABBA [k ]
115+ partition = np .partition (P_ABBA , k )
116+ MPdist = partition [k ]
116117 if ~ np .isfinite (MPdist ):
117- k = max (0 , np .count_nonzero (np .isfinite (P_ABBA [:k ])) - 1 )
118- MPdist = P_ABBA [k ]
118+ partition [:k ].sort ()
119+ k = max (0 , np .count_nonzero (np .isfinite (partition [:k ])) - 1 )
120+ MPdist = partition [k ]
119121
120122 return MPdist
121123
@@ -137,10 +139,10 @@ def _mpdist(
137139
138140 The MPdist distance measure considers two time series to be similar if they share
139141 many subsequences, regardless of the order of matching subsequences. MPdist
140- concatenates and sorts the output of an AB-join and a BA-join and returns the value
141- of the `k`th smallest number as the reported distance. Note that MPdist is a
142- measure and not a metric. Therefore, it does not obey the triangular inequality but
143- the method is highly scalable.
142+ concatenates the output of an AB-join and a BA-join and returns the `k`th smallest
143+ value as the reported distance. Note that MPdist is a measure and not a metric.
144+ Therefore, it does not obey the triangular inequality but the method is highly
145+ scalable.
144146
145147 Parameters
146148 ----------
@@ -180,7 +182,7 @@ def _mpdist(
180182
181183 custom_func : object, default None
182184 A custom user defined function for selecting the desired value from the
183- sorted `P_ABBA` array. This function may need to leverage `functools.partial`
185+ unsorted `P_ABBA` array. This function may need to leverage `functools.partial`
184186 and should take `P_ABBA` as its only input parameter and return a single
185187 `MPdist` value. The `percentage` and `k` parameters are ignored when
186188 `custom_func` is not None.
@@ -202,13 +204,11 @@ def _mpdist(
202204 P_ABBA = np .empty (n_A - m + 1 + n_B - m + 1 , dtype = np .float64 )
203205
204206 _compute_P_ABBA (T_A , T_B , m , P_ABBA , dask_client , device_id , mp_func )
205- P_ABBA .sort ()
206207
207208 if k is not None :
208209 k = min (int (k ), P_ABBA .shape [0 ] - 1 )
209210 else :
210- percentage = min (percentage , 1.0 )
211- percentage = max (percentage , 0.0 )
211+ percentage = np .clip (percentage , 0.0 , 1.0 )
212212 k = min (math .ceil (percentage * (n_A + n_B )), n_A - m + 1 + n_B - m + 1 - 1 )
213213
214214 MPdist = _select_P_ABBA_value (P_ABBA , k , custom_func )
@@ -252,7 +252,7 @@ def _mpdist_vect(
252252
253253 custom_func : object, default None
254254 A custom user defined function for selecting the desired value from the
255- sorted `P_ABBA` array. This function may need to leverage `functools.partial`
255+ unsorted `P_ABBA` array. This function may need to leverage `functools.partial`
256256 and should take `P_ABBA` as its only input parameter and return a single
257257 `MPdist` value. The `percentage` and `k` parameters are ignored when
258258 `custom_func` is not None.
@@ -267,8 +267,7 @@ def _mpdist_vect(
267267 P_ABBA = np .empty (2 * j )
268268
269269 if k is None :
270- percentage = min (percentage , 1.0 )
271- percentage = max (percentage , 0.0 )
270+ percentage = np .clip (percentage , 0.0 , 1.0 )
272271 k = min (math .ceil (percentage * (2 * Q .shape [0 ])), 2 * j - 1 )
273272
274273 k = min (int (k ), P_ABBA .shape [0 ] - 1 )
@@ -281,7 +280,6 @@ def _mpdist_vect(
281280 for i in range (MPdist_vect .shape [0 ]):
282281 P_ABBA [:j ] = rolling_row_min [:, i ]
283282 P_ABBA [j :] = col_min [i : i + j ]
284- P_ABBA .sort ()
285283 MPdist_vect [i ] = _select_P_ABBA_value (P_ABBA , k , custom_func )
286284
287285 return MPdist_vect
@@ -295,10 +293,10 @@ def mpdist(T_A, T_B, m, percentage=0.05, k=None, normalize=True):
295293
296294 The MPdist distance measure considers two time series to be similar if they share
297295 many subsequences, regardless of the order of matching subsequences. MPdist
298- concatenates and sorts the output of an AB-join and a BA-join and returns the value
299- of the `k`th smallest number as the reported distance. Note that MPdist is a
300- measure and not a metric. Therefore, it does not obey the triangular inequality but
301- the method is highly scalable.
296+ concatenates the output of an AB-join and a BA-join and returns the `k`th smallest
297+ value as the reported distance. Note that MPdist is a measure and not a metric.
298+ Therefore, it does not obey the triangular inequality but the method is highly
299+ scalable.
302300
303301 Parameters
304302 ----------
@@ -349,10 +347,10 @@ def mpdisted(dask_client, T_A, T_B, m, percentage=0.05, k=None, normalize=True):
349347
350348 The MPdist distance measure considers two time series to be similar if they share
351349 many subsequences, regardless of the order of matching subsequences. MPdist
352- concatenates and sorts the output of an AB-join and a BA-join and returns the value
353- of the `k`th smallest number as the reported distance. Note that MPdist is a
354- measure and not a metric. Therefore, it does not obey the triangular inequality but
355- the method is highly scalable.
350+ concatenates the output of an AB-join and a BA-join and returns the `k`th smallest
351+ value as the reported distance. Note that MPdist is a measure and not a metric.
352+ Therefore, it does not obey the triangular inequality but the method is highly
353+ scalable.
356354
357355 Parameters
358356 ----------
0 commit comments