Lightning-AI
diff --git a/‎pytorch_lightning/metrics/classification/precision_recall.py‎
Lines changed: 13 additions & 260 deletions b/‎pytorch_lightning/metrics/classification/precision_recall.py‎
Lines changed: 13 additions & 260 deletions
diff --git a/‎pytorch_lightning/metrics/functional/average_precision.py‎
Lines changed: 2 additions & 2 deletions b/‎pytorch_lightning/metrics/functional/average_precision.py‎
Lines changed: 2 additions & 2 deletions
@@ -15,110 +15,15 @@
 
 import torch
 
-from pytorch_lightning.metrics.classification.stat_scores import StatScores
-from pytorch_lightning.metrics.functional.precision_recall import _precision_compute, _recall_compute
+from torchmetrics import Precision as _Precision
+from torchmetrics import Recall as _Recall
 
+from pytorch_lightning.utilities.deprecation import deprecated
 
-class Precision(StatScores):
-    r"""
-    Computes `Precision <https://en.wikipedia.org/wiki/Precision_and_recall>`_:
 
-    .. math:: \text{Precision} = \frac{\text{TP}}{\text{TP} + \text{FP}}
-
-    Where :math:`\text{TP}` and :math:`\text{FP}` represent the number of true positives and
-    false positives respecitively. With the use of ``top_k`` parameter, this metric can
-    generalize to Precision@K.
-
-    The reduction method (how the precision scores are aggregated) is controlled by the
-    ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the
-    multi-dimensional multi-class case.
-
-    Args:
-        num_classes:
-            Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods.
-        threshold:
-            Threshold probability value for transforming probability predictions to binary
-            (0,1) predictions, in the case of binary or multi-label inputs.
-        average:
-            Defines the reduction that is applied. Should be one of the following:
-
-            - ``'micro'`` [default]: Calculate the metric globally, accross all samples and classes.
-            - ``'macro'``: Calculate the metric for each class separately, and average the
-              metrics accross classes (with equal weights for each class).
-            - ``'weighted'``: Calculate the metric for each class separately, and average the
-              metrics accross classes, weighting each class by its support (``tp + fn``).
-            - ``'none'`` or ``None``: Calculate the metric for each class separately, and return
-              the metric for every class.
-            - ``'samples'``: Calculate the metric for each sample, and average the metrics
-              across samples (with equal weights for each sample).
-
-            Note that what is considered a sample in the multi-dimensional multi-class case
-            depends on the value of ``mdmc_average``.
-        multilabel:
-            .. warning :: This parameter is deprecated and has no effect. Will be removed in v1.4.0.
-
-        mdmc_average:
-            Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
-            ``average`` parameter). Should be one of the following:
-
-            - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional
-              multi-class.
-
-            - ``'samplewise'``: In this case, the statistics are computed separately for each
-              sample on the ``N`` axis, and then averaged over samples.
-              The computation for each sample is done by treating the flattened extra axes ``...``
-              as the ``N`` dimension within the sample, and computing the metric for the sample based on that.
-
-            - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
-              are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
-              were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
-
-        ignore_index:
-            Integer specifying a target class to ignore. If given, this class index does not contribute
-            to the returned score, regardless of reduction method. If an index is ignored, and ``average=None``
-            or ``'none'``, the score for the ignored class will be returned as ``nan``.
-
-        top_k:
-            Number of highest probability entries for each sample to convert to 1s - relevant
-            only for inputs with probability predictions. If this parameter is set for multi-label
-            inputs, it will take precedence over ``threshold``. For (multi-dim) multi-class inputs,
-            this parameter defaults to 1.
-
-            Should be left unset (``None``) for inputs with label predictions.
-        is_multiclass:
-            Used only in certain special cases, where you want to treat inputs as a different type
-            than what they appear to be.
-
-        compute_on_step:
-            Forward only calls ``update()`` and return ``None`` if this is set to ``False``.
-        dist_sync_on_step:
-            Synchronize metric state across processes at each ``forward()``
-            before returning the value at the step
-        process_group:
-            Specify the process group on which synchronization is called.
-            default: ``None`` (which selects the entire world)
-        dist_sync_fn:
-            Callback that performs the allgather operation on the metric state. When ``None``, DDP
-            will be used to perform the allgather.
-
-    Raises:
-        ValueError:
-            If ``average`` is none of ``"micro"``, ``"macro"``, ``"weighted"``, ``"samples"``, ``"none"``, ``None``.
-
-    Example:
-
-        >>> from pytorch_lightning.metrics import Precision
-        >>> preds  = torch.tensor([2, 0, 2, 1])
-        >>> target = torch.tensor([1, 1, 2, 0])
-        >>> precision = Precision(average='macro', num_classes=3)
-        >>> precision(preds, target)
-        tensor(0.1667)
-        >>> precision = Precision(average='micro')
-        >>> precision(preds, target)
-        tensor(0.2500)
-
-    """
+class Precision(_Precision):
 
+    @deprecated(target=_Precision, ver_deprecate="1.3.0", ver_remove="1.5.0")
     def __init__(
         self,
         num_classes: Optional[int] = None,
@@ -134,142 +39,17 @@ def __init__(
         process_group: Optional[Any] = None,
         dist_sync_fn: Callable = None,
     ):
-        allowed_average = ["micro", "macro", "weighted", "samples", "none", None]
-        if average not in allowed_average:
-            raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.")
-
-        super().__init__(
-            reduce="macro" if average in ["weighted", "none", None] else average,
-            mdmc_reduce=mdmc_average,
-            threshold=threshold,
-            top_k=top_k,
-            num_classes=num_classes,
-            is_multiclass=is_multiclass,
-            ignore_index=ignore_index,
-            compute_on_step=compute_on_step,
-            dist_sync_on_step=dist_sync_on_step,
-            process_group=process_group,
-            dist_sync_fn=dist_sync_fn,
-        )
-
-        self.average = average
-
-    def compute(self) -> torch.Tensor:
         """
-        Computes the precision score based on inputs passed in to ``update`` previously.
+        This implementation refers to :class:`~torchmetrics.Precision`.
 
-        Return:
-            The shape of the returned tensor depends on the ``average`` parameter
-
-            - If ``average in ['micro', 'macro', 'weighted', 'samples']``, a one-element tensor will be returned
-            - If ``average in ['none', None]``, the shape will be ``(C,)``, where ``C`` stands  for the number
-              of classes
+        .. deprecated::
+            Use :class:`~torchmetrics.Precision`. Will be removed in v1.5.0.
         """
-        tp, fp, tn, fn = self._get_final_stats()
-        return _precision_compute(tp, fp, tn, fn, self.average, self.mdmc_reduce)
-
 
-class Recall(StatScores):
-    r"""
-    Computes `Recall <https://en.wikipedia.org/wiki/Precision_and_recall>`_:
 
-    .. math:: \text{Recall} = \frac{\text{TP}}{\text{TP} + \text{FN}}
-
-    Where :math:`\text{TP}` and :math:`\text{FN}` represent the number of true positives and
-    false negatives respecitively. With the use of ``top_k`` parameter, this metric can
-    generalize to Recall@K.
-
-    The reduction method (how the recall scores are aggregated) is controlled by the
-    ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the
-    multi-dimensional multi-class case.
-
-    Args:
-        num_classes:
-            Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods.
-        threshold:
-            Threshold probability value for transforming probability predictions to binary
-            (0,1) predictions, in the case of binary or multi-label inputs.
-        average:
-            Defines the reduction that is applied. Should be one of the following:
-
-            - ``'micro'`` [default]: Calculate the metric globally, accross all samples and classes.
-            - ``'macro'``: Calculate the metric for each class separately, and average the
-              metrics accross classes (with equal weights for each class).
-            - ``'weighted'``: Calculate the metric for each class separately, and average the
-              metrics accross classes, weighting each class by its support (``tp + fn``).
-            - ``'none'`` or ``None``: Calculate the metric for each class separately, and return
-              the metric for every class.
-            - ``'samples'``: Calculate the metric for each sample, and average the metrics
-              across samples (with equal weights for each sample).
-
-            Note that what is considered a sample in the multi-dimensional multi-class case
-            depends on the value of ``mdmc_average``.
-        multilabel:
-            .. warning :: This parameter is deprecated and has no effect. Will be removed in v1.4.0.
-
-        mdmc_average:
-            Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
-            ``average`` parameter). Should be one of the following:
-
-            - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional
-              multi-class.
-
-            - ``'samplewise'``: In this case, the statistics are computed separately for each
-              sample on the ``N`` axis, and then averaged over samples.
-              The computation for each sample is done by treating the flattened extra axes ``...``
-              as the ``N`` dimension within the sample, and computing the metric for the sample based on that.
-
-            - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
-              are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
-              were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
-
-        ignore_index:
-            Integer specifying a target class to ignore. If given, this class index does not contribute
-            to the returned score, regardless of reduction method. If an index is ignored, and ``average=None``
-            or ``'none'``, the score for the ignored class will be returned as ``nan``.
-
-        top_k:
-            Number of highest probability entries for each sample to convert to 1s - relevant
-            only for inputs with probability predictions. If this parameter is set for multi-label
-            inputs, it will take precedence over ``threshold``. For (multi-dim) multi-class inputs,
-            this parameter defaults to 1.
-
-            Should be left unset (``None``) for inputs with label predictions.
-
-        is_multiclass:
-            Used only in certain special cases, where you want to treat inputs as a different type
-            than what they appear to be.
-
-        compute_on_step:
-            Forward only calls ``update()`` and return ``None`` if this is set to ``False``.
-        dist_sync_on_step:
-            Synchronize metric state across processes at each ``forward()``
-            before returning the value at the step
-        process_group:
-            Specify the process group on which synchronization is called.
-            default: ``None`` (which selects the entire world)
-        dist_sync_fn:
-            Callback that performs the allgather operation on the metric state. When ``None``, DDP
-            will be used to perform the allgather.
-
-    Raises:
-        ValueError:
-            If ``average`` is none of ``"micro"``, ``"macro"``, ``"weighted"``, ``"samples"``, ``"none"``, ``None``.
-
-    Example:
-
-        >>> from pytorch_lightning.metrics import Recall
-        >>> preds  = torch.tensor([2, 0, 2, 1])
-        >>> target = torch.tensor([1, 1, 2, 0])
-        >>> recall = Recall(average='macro', num_classes=3)
-        >>> recall(preds, target)
-        tensor(0.3333)
-        >>> recall = Recall(average='micro')
-        >>> recall(preds, target)
-        tensor(0.2500)
-
-    """
+class Recall(_Recall):
 
+    @deprecated(target=_Recall, ver_deprecate="1.3.0", ver_remove="1.5.0")
     def __init__(
         self,
         num_classes: Optional[int] = None,
@@ -285,36 +65,9 @@ def __init__(
         process_group: Optional[Any] = None,
         dist_sync_fn: Callable = None,
     ):
-        allowed_average = ["micro", "macro", "weighted", "samples", "none", None]
-        if average not in allowed_average:
-            raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.")
-
-        super().__init__(
-            reduce="macro" if average in ["weighted", "none", None] else average,
-            mdmc_reduce=mdmc_average,
-            threshold=threshold,
-            top_k=top_k,
-            num_classes=num_classes,
-            is_multiclass=is_multiclass,
-            ignore_index=ignore_index,
-            compute_on_step=compute_on_step,
-            dist_sync_on_step=dist_sync_on_step,
-            process_group=process_group,
-            dist_sync_fn=dist_sync_fn,
-        )
-
-        self.average = average
-
-    def compute(self) -> torch.Tensor:
         """
-        Computes the recall score based on inputs passed in to ``update`` previously.
-
-        Return:
-            The shape of the returned tensor depends on the ``average`` parameter
+        This implementation refers to :class:`~torchmetrics.Recall`.
 
-            - If ``average in ['micro', 'macro', 'weighted', 'samples']``, a one-element tensor will be returned
-            - If ``average in ['none', None]``, the shape will be ``(C,)``, where ``C`` stands  for the number
-              of classes
+        .. deprecated::
+            Use :class:`~torchmetrics.Recall`. Will be removed in v1.5.0.
         """
-        tp, fp, tn, fn = self._get_final_stats()
-        return _recall_compute(tp, fp, tn, fn, self.average, self.mdmc_reduce)
@@ -14,12 +14,12 @@
 from typing import List, Optional, Sequence, Union
 
 import torch
-from torchmetrics.functional import accuracy as _accuracy
+from torchmetrics.functional import average_precision as _average_precision
 
 from pytorch_lightning.utilities.deprecation import deprecated
 
 
-@deprecated(target=_accuracy, ver_deprecate="1.3.0", ver_remove="1.5.0")
+@deprecated(target=_average_precision, ver_deprecate="1.3.0", ver_remove="1.5.0")
 def average_precision(
     preds: torch.Tensor,
     target: torch.Tensor,