diff --git a/doc/metrics.rst b/doc/metrics.rst index f1f6a7514..8b9474ca3 100644 --- a/doc/metrics.rst +++ b/doc/metrics.rst @@ -44,3 +44,13 @@ of the classes while keeping these accuracies balanced. The :func:`make_index_balanced_accuracy` :cite:`garcia2012effectiveness` can wrap any metric and give more importance to a specific class using the parameter ``alpha``. + +.. _classification_report: + +Summary of important metrics +---------------------------- + +The :func:`classification_report_imbalanced` will compute a set of metrics +per class and summarize it in a table. The parameter `output_dict` allows +to get a string or a Python dictionary. This dictionary can be reused to create +a Pandas dataframe for instance. diff --git a/doc/whats_new/v0.7.rst b/doc/whats_new/v0.7.rst index ab9fa3943..ad41ce292 100644 --- a/doc/whats_new/v0.7.rst +++ b/doc/whats_new/v0.7.rst @@ -71,6 +71,11 @@ Enhancements - Added Random Over-Sampling Examples (ROSE) class. :pr:`754` by :user:`Andrea Lorenzon `. +- Add option `output_dict` in + :func:`imblearn.metrics.classification_report_imbalanced` to return a + dictionary instead of a string. + :pr:`xx` by :user:`Guillaume Lemaitre `. + Deprecation ........... diff --git a/imblearn/metrics/_classification.py b/imblearn/metrics/_classification.py index 3b8dc7256..870450330 100644 --- a/imblearn/metrics/_classification.py +++ b/imblearn/metrics/_classification.py @@ -806,6 +806,8 @@ def classification_report_imbalanced( sample_weight=None, digits=2, alpha=0.1, + output_dict=False, + zero_division="warn", ): """Build a classification report based on metrics used with imbalanced dataset @@ -816,38 +818,59 @@ def classification_report_imbalanced( mean, and index balanced accuracy of the geometric mean. + Read more in the :ref:`User Guide `. + Parameters ---------- - y_true : ndarray, shape (n_samples, ) + y_true : 1d array-like, or label indicator array / sparse matrix Ground truth (correct) target values. - y_pred : ndarray, shape (n_samples, ) + y_pred : 1d array-like, or label indicator array / sparse matrix Estimated targets as returned by a classifier. - labels : list, optional - The set of labels to include when ``average != 'binary'``, and their - order if ``average is None``. Labels present in the data can be - excluded, for example to calculate a multiclass average ignoring a - majority negative class, while labels not present in the data will - result in 0 components in a macro average. + labels : array-like of shape (n_labels,), default=None + Optional list of label indices to include in the report. - target_names : list of strings, optional + target_names : list of str of shape (n_labels,), default=None Optional display names matching the labels (same order). - sample_weight : ndarray, shape (n_samples, ) + sample_weight : array-like of shape (n_samples,), default=None Sample weights. - digits : int, optional (default=2) - Number of digits for formatting output floating point values + digits : int, default=2 + Number of digits for formatting output floating point values. + When ``output_dict`` is ``True``, this will be ignored and the + returned values will not be rounded. - alpha : float, optional (default=0.1) + alpha : float, default=0.1 Weighting factor. + output_dict : bool, default=False + If True, return output as dict. + + .. versionadded:: 0.7 + + zero_division : "warn" or {0, 1}, default="warn" + Sets the value to return when there is a zero division. If set to + "warn", this acts as 0, but warnings are also raised. + + .. versionadded:: 0.7 + Returns ------- - report : string + report : string / dict Text summary of the precision, recall, specificity, geometric mean, and index balanced accuracy. + Dictionary returned if output_dict is True. Dictionary has the + following structure:: + + {'label 1': {'pre':0.5, + 'rec':1.0, + ... + }, + 'label 2': { ... }, + ... + } Examples -------- @@ -883,7 +906,7 @@ class 2 1.00 0.67 1.00 0.80 0.82 0.64\ last_line_heading = "avg / total" if target_names is None: - target_names = ["%s" % l for l in labels] + target_names = [f"{label}" for label in labels] name_width = max(len(cn) for cn in target_names) width = max(name_width, len(last_line_heading), digits) @@ -905,6 +928,7 @@ class 2 1.00 0.67 1.00 0.80 0.82 0.64\ labels=labels, average=None, sample_weight=sample_weight, + zero_division=zero_division ) # Specificity specificity = specificity_score( @@ -934,33 +958,50 @@ class 2 1.00 0.67 1.00 0.80 0.82 0.64\ sample_weight=sample_weight, ) + report_dict = {} for i, label in enumerate(labels): + report_dict_label = {} values = [target_names[i]] - for v in ( - precision[i], - recall[i], - specificity[i], - f1[i], - geo_mean[i], - iba[i], + for score_name, score_value in zip( + headers[1:-1], + [ + precision[i], + recall[i], + specificity[i], + f1[i], + geo_mean[i], + iba[i], + ] ): - values += ["{0:0.{1}f}".format(v, digits)] - values += ["{}".format(support[i])] + values += ["{0:0.{1}f}".format(score_value, digits)] + report_dict_label[score_name] = score_value + values += [f"{support[i]}"] + report_dict_label[headers[-1]] = support[i] report += fmt % tuple(values) + report_dict[label] = report_dict_label + report += "\n" # compute averages values = [last_line_heading] - for v in ( - np.average(precision, weights=support), - np.average(recall, weights=support), - np.average(specificity, weights=support), - np.average(f1, weights=support), - np.average(geo_mean, weights=support), - np.average(iba, weights=support), + for score_name, score_value in zip( + headers[1:-1], + [ + np.average(precision, weights=support), + np.average(recall, weights=support), + np.average(specificity, weights=support), + np.average(f1, weights=support), + np.average(geo_mean, weights=support), + np.average(iba, weights=support), + ] ): - values += ["{0:0.{1}f}".format(v, digits)] - values += ["{}".format(np.sum(support))] + values += ["{0:0.{1}f}".format(score_value, digits)] + report_dict[f"avg_{score_name}"] = score_value + values += [f"{np.sum(support)}"] report += fmt % tuple(values) + report_dict["total_support"] = np.sum(support) + + if output_dict: + return report_dict return report diff --git a/imblearn/metrics/tests/test_classification.py b/imblearn/metrics/tests/test_classification.py index b4f34a272..b6db641ce 100644 --- a/imblearn/metrics/tests/test_classification.py +++ b/imblearn/metrics/tests/test_classification.py @@ -466,3 +466,35 @@ def test_iba_error_y_score_prob_error(score_loss): aps = make_index_balanced_accuracy(alpha=0.5, squared=True)(score_loss) with pytest.raises(AttributeError): aps(y_true, y_pred) + + +def test_classification_report_imbalanced_dict(): + iris = datasets.load_iris() + y_true, y_pred, _ = make_prediction(dataset=iris, binary=False) + + report = classification_report_imbalanced( + y_true, + y_pred, + labels=np.arange(len(iris.target_names)), + target_names=iris.target_names, + output_dict=True, + ) + outer_keys = set(report.keys()) + inner_keys = set(report[0].keys()) + + expected_outer_keys = { + 0, + 1, + 2, + "avg_pre", + "avg_rec", + "avg_spe", + "avg_f1", + "avg_geo", + "avg_iba", + "total_support", + } + expected_inner_keys = {'spe', 'f1', 'sup', 'rec', 'geo', 'iba', 'pre'} + + assert outer_keys == expected_outer_keys + assert inner_keys == expected_inner_keys