@@ -1437,7 +1437,7 @@ def mass(Q, T, M_T=None, Σ_T=None, normalize=True, p=2.0):
14371437 return distance_profile
14381438
14391439
1440- def _mass_distance_matrix (Q , T , m , distance_matrix ):
1440+ def _mass_distance_matrix (Q , T , m , distance_matrix , μ_Q , σ_Q , M_T , Σ_T ):
14411441 """
14421442 Compute the full distance matrix between all of the subsequences of `Q` and `T`
14431443 using the MASS algorithm
@@ -1456,15 +1456,67 @@ def _mass_distance_matrix(Q, T, m, distance_matrix):
14561456 distance_matrix : numpy.ndarray
14571457 The full output distance matrix. This is mandatory since it may be reused.
14581458
1459+ μ_Q : float
1460+ Mean of `Q`
1461+
1462+ σ_Q : float
1463+ Standard deviation of `Q`
1464+
1465+ M_T : numpy.ndarray
1466+ Sliding mean of `T`
1467+
1468+ Σ_T : numpy.ndarray
1469+ Sliding standard deviation of `T`
1470+
14591471 Returns
14601472 -------
14611473 None
14621474 """
1463- k , l = distance_matrix .shape
1464- T , M_T , Σ_T = preprocess (T , m )
1475+ for i in range (distance_matrix .shape [0 ]):
1476+ if np .any (~ np .isfinite (Q [i : i + m ])): # pragma: no cover
1477+ distance_matrix [i , :] = np .inf
1478+ else :
1479+ QT = _sliding_dot_product (Q [i : i + m ], T )
1480+ distance_matrix [i , :] = _mass (Q [i : i + m ], T , QT , μ_Q [i ], σ_Q [i ], M_T , Σ_T )
14651481
1466- for i in range (k ):
1467- distance_matrix [i , :] = mass (Q [i : i + m ], T , M_T , Σ_T )
1482+
1483+ def mass_distance_matrix (Q , T , m , distance_matrix , M_T = None , Σ_T = None ):
1484+ """
1485+ Compute the full distance matrix between all of the subsequences of `Q` and `T`
1486+ using the MASS algorithm
1487+
1488+ Parameters
1489+ ----------
1490+ Q : numpy.ndarray
1491+ Query array
1492+
1493+ T : numpy.ndarray
1494+ Time series or sequence
1495+
1496+ m : int
1497+ Window size
1498+
1499+ distance_matrix : numpy.ndarray
1500+ The full output distance matrix. This is mandatory since it may be reused.
1501+
1502+ M_T : numpy.ndarray, default None
1503+ Sliding mean of `T`
1504+
1505+ Σ_T : numpy.ndarray, default None
1506+ Sliding standard deviation of `T`
1507+
1508+ Returns
1509+ -------
1510+ None
1511+ """
1512+ Q , μ_Q , σ_Q = preprocess (Q , m )
1513+
1514+ if M_T is None or Σ_T is None :
1515+ T , M_T , Σ_T = preprocess (T , m )
1516+
1517+ check_window_size (m , max_size = min (Q .shape [- 1 ], T .shape [- 1 ]))
1518+
1519+ return _mass_distance_matrix (Q , T , m , distance_matrix , μ_Q , σ_Q , M_T , Σ_T )
14681520
14691521
14701522def _get_QT (start , T_A , T_B , m ):
@@ -2394,3 +2446,51 @@ def _binarize_pan(pan, threshold, bfs_indices, n_processed):
23942446 """
23952447 idx = bfs_indices [:n_processed ]
23962448 pan [idx ] = np .where (pan [idx ] <= threshold , 0.0 , 1.0 )
2449+
2450+
2451+ def _select_P_ABBA_value (P_ABBA , k , custom_func = None ):
2452+ """
2453+ A convenience function for returning the `k`th smallest value from the `P_ABBA`
2454+ array or use a custom function to specify what `P_ABBA` value to return.
2455+
2456+ The MPdist distance measure considers two time series to be similar if they share
2457+ many subsequences, regardless of the order of matching subsequences. MPdist
2458+ concatenates the output of an AB-join and a BA-join and returns the `k`th smallest
2459+ value as the reported distance. Note that MPdist is a measure and not a metric.
2460+ Therefore, it does not obey the triangular inequality but the method is highly
2461+ scalable.
2462+
2463+ Parameters
2464+ ----------
2465+ P_ABBA : numpy.ndarray
2466+ An unsorted array resulting from the concatenation of the outputs from an
2467+ AB-joinand BA-join for two time series, `T_A` and `T_B`
2468+
2469+ k : int
2470+ Specify the `k`th value in the concatenated matrix profiles to return. This
2471+ parameter is ignored when `k_func` is not None.
2472+
2473+ custom_func : object, default None
2474+ A custom user defined function for selecting the desired value from the
2475+ unsorted `P_ABBA` array. This function may need to leverage `functools.partial`
2476+ and should take `P_ABBA` as its only input parameter and return a single
2477+ `MPdist` value. The `percentage` and `k` parameters are ignored when
2478+ `custom_func` is not None.
2479+
2480+ Returns
2481+ -------
2482+ MPdist : float
2483+ The matrix profile distance
2484+ """
2485+ k = min (int (k ), P_ABBA .shape [0 ] - 1 )
2486+ if custom_func is not None :
2487+ MPdist = custom_func (P_ABBA )
2488+ else :
2489+ partition = np .partition (P_ABBA , k )
2490+ MPdist = partition [k ]
2491+ if ~ np .isfinite (MPdist ):
2492+ partition [:k ].sort ()
2493+ k = max (0 , np .count_nonzero (np .isfinite (partition [:k ])) - 1 )
2494+ MPdist = partition [k ]
2495+
2496+ return MPdist
0 commit comments