Skip to content

Commit b05213b

Browse files
committed
fix compatibility with older scikit-learn
1 parent 1142fcc commit b05213b

File tree

3 files changed

+51
-8
lines changed

3 files changed

+51
-8
lines changed

dask_ml/metrics/regression.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,50 @@ def mean_absolute_error(
8181
return result
8282

8383

84-
@derived_from(sklearn.metrics)
8584
def mean_absolute_percentage_error(
8685
y_true: ArrayLike,
8786
y_pred: ArrayLike,
8887
sample_weight: Optional[ArrayLike] = None,
8988
multioutput: Optional[str] = "uniform_average",
9089
compute: bool = True,
9190
) -> ArrayLike:
91+
"""Mean absolute percentage error regression loss.
92+
93+
Note here that we do not represent the output as a percentage in range
94+
[0, 100]. Instead, we represent it in range [0, 1/eps]. Read more in
95+
https://scikit-learn.org/stable/modules/model_evaluation.html#mean-absolute-percentage-error
96+
97+
Parameters
98+
----------
99+
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
100+
Ground truth (correct) target values.
101+
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
102+
Estimated target values.
103+
sample_weight : array-like of shape (n_samples,), default=None
104+
Sample weights.
105+
multioutput : {'raw_values', 'uniform_average'} or array-like
106+
Defines aggregating of multiple output values.
107+
Array-like value defines weights used to average errors.
108+
If input is list then the shape must be (n_outputs,).
109+
'raw_values' :
110+
Returns a full set of errors in case of multioutput input.
111+
'uniform_average' :
112+
Errors of all outputs are averaged with uniform weight.
113+
compute : bool
114+
Whether to compute this result (default ``True``)
115+
116+
Returns
117+
-------
118+
loss : float or array-like of floats in the range [0, 1/eps]
119+
If multioutput is 'raw_values', then mean absolute percentage error
120+
is returned for each output separately.
121+
If multioutput is 'uniform_average' or ``None``, then the
122+
equally-weighted average of all output errors is returned.
123+
MAPE output is non-negative floating point. The best value is 0.0.
124+
But note the fact that bad predictions can lead to arbitarily large
125+
MAPE values, especially if some y_true values are very close to zero.
126+
Note that we return a large value instead of `inf` when y_true is zero.
127+
"""
92128
_check_sample_weight(sample_weight)
93129
epsilon = np.finfo(np.float64).eps
94130
mape = abs(y_pred - y_true) / da.maximum(y_true, epsilon)

docs/source/modules/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ Regression Metrics
245245
:toctree: generated/
246246

247247
metrics.mean_absolute_error
248+
metrics.mean_absolute_percentage_error
248249
metrics.mean_squared_error
249250
metrics.mean_squared_log_error
250251
metrics.r2_score

tests/metrics/test_regression.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,28 @@
55
import sklearn.metrics
66

77
import dask_ml.metrics
8+
from dask_ml._compat import SK_024
9+
10+
_METRICS_TO_TEST = [
11+
"mean_squared_error",
12+
"mean_absolute_error",
13+
"r2_score",
14+
]
15+
16+
# mean_absolute_percentage_error() was added in scikit-learn 0.24.0
17+
if SK_024:
18+
_METRICS_TO_TEST.append("mean_absolute_percentage_error")
819

920

1021
@pytest.fixture(
11-
params=[
12-
"mean_squared_error",
13-
"mean_absolute_error",
14-
"mean_absolute_percentage_error",
15-
"r2_score",
16-
]
22+
params=_METRICS_TO_TEST
1723
)
1824
def metric_pairs(request):
1925
"""Pairs of (dask-ml, sklearn) regression metrics.
2026
2127
* mean_squared_error
2228
* mean_absolute_error
23-
* mean_absolute_percentage_error
29+
* mean_absolute_percentage_error (if scikit-learn >= 0.24.0)
2430
* r2_score
2531
"""
2632
return (

0 commit comments

Comments
 (0)