Lightning-AI
diff --git a/‎CHANGELOG.md‎
Lines changed: 5 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎docs/source/metrics.rst‎
Lines changed: 12 additions & 2 deletions b/‎docs/source/metrics.rst‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎pytorch_lightning/metrics/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎pytorch_lightning/metrics/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pytorch_lightning/metrics/classification/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎pytorch_lightning/metrics/classification/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pytorch_lightning/metrics/classification/accuracy.py‎
Lines changed: 66 additions & 24 deletions b/‎pytorch_lightning/metrics/classification/accuracy.py‎
Lines changed: 66 additions & 24 deletions
diff --git a/‎pytorch_lightning/metrics/classification/hamming_distance.py‎
Lines changed: 105 additions & 0 deletions b/‎pytorch_lightning/metrics/classification/hamming_distance.py‎
Lines changed: 105 additions & 0 deletions
diff --git a/‎pytorch_lightning/metrics/classification/helpers.py‎
Lines changed: 5 additions & 0 deletions b/‎pytorch_lightning/metrics/classification/helpers.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎pytorch_lightning/metrics/functional/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎pytorch_lightning/metrics/functional/__init__.py‎
Lines changed: 2 additions & 1 deletion
@@ -9,6 +9,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Added
 
+- `Accuracy` metric now generalizes to Top-k accuracy for (multi-dimensional) multi-class inputs using the `top_k` parameter ([#4838](https://github.com/PyTorchLightning/pytorch-lightning/pull/4838))
+
+- `Accuracy` metric now enables the computation of subset accuracy for multi-label or multi-dimensional multi-class inputs with the `subset_accuracy` parameter ([#4838](https://github.com/PyTorchLightning/pytorch-lightning/pull/4838))
+
+- `HammingDistance` metric to compute the hamming distance (loss) ([#4838](https://github.com/PyTorchLightning/pytorch-lightning/pull/4838))
 
 ### Changed
 
 
@@ -292,6 +292,12 @@ FBeta
 .. autoclass:: pytorch_lightning.metrics.classification.FBeta
     :noindex:
 
+Hamming Distance
+~~~~~~~~~~~~~~~~
+
+.. autoclass:: pytorch_lightning.metrics.classification.HammingDistance
+    :noindex:
+
 Precision
 ~~~~~~~~~
 
@@ -323,10 +329,9 @@ Functional Metrics (Classification)
 accuracy [func]
 ~~~~~~~~~~~~~~~
 
-.. autofunction:: pytorch_lightning.metrics.functional.classification.accuracy
+.. autofunction:: pytorch_lightning.metrics.functional.accuracy
     :noindex:
 
-
 auc [func]
 ~~~~~~~~~~
 
@@ -382,6 +387,11 @@ fbeta [func]
 .. autofunction:: pytorch_lightning.metrics.functional.fbeta
     :noindex:
 
+hamming_distance [func]
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.metrics.functional.hamming_distance
+    :noindex:
 
 iou [func]
 ~~~~~~~~~~
 
@@ -15,6 +15,7 @@
 
 from pytorch_lightning.metrics.classification import (  # noqa: F401
     Accuracy,
+    HammingDistance,
     Precision,
     Recall,
     ConfusionMatrix,
 
@@ -15,6 +15,7 @@
 from pytorch_lightning.metrics.classification.average_precision import AveragePrecision  # noqa: F401
 from pytorch_lightning.metrics.classification.confusion_matrix import ConfusionMatrix  # noqa: F401
 from pytorch_lightning.metrics.classification.f_beta import FBeta, Fbeta, F1  # noqa: F401
+from pytorch_lightning.metrics.classification.hamming_distance import HammingDistance  # noqa: F401
 from pytorch_lightning.metrics.classification.precision_recall import Precision, Recall  # noqa: F401
 from pytorch_lightning.metrics.classification.precision_recall_curve import PrecisionRecallCurve  # noqa: F401
 from pytorch_lightning.metrics.classification.roc import ROC  # noqa: F401
@@ -16,35 +16,57 @@
 import torch
 
 from pytorch_lightning.metrics.metric import Metric
-from pytorch_lightning.metrics.utils import _input_format_classification
+from pytorch_lightning.metrics.functional.accuracy import _accuracy_update, _accuracy_compute
 
 
 class Accuracy(Metric):
     r"""
     Computes `Accuracy <https://en.wikipedia.org/wiki/Accuracy_and_precision>`_:
 
-    .. math:: \text{Accuracy} = \frac{1}{N}\sum_i^N 1(y_i = \hat{y_i})
+    .. math::
+        \text{Accuracy} = \frac{1}{N}\sum_i^N 1(y_i = \hat{y}_i)
 
     Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a
-    tensor of predictions.  Works with binary, multiclass, and multilabel
-    data.  Accepts logits from a model output or integer class values in
-    prediction.  Works with multi-dimensional preds and target.
+    tensor of predictions.
 
-    Forward accepts
+    For multi-class and multi-dimensional multi-class data with probability predictions, the
+    parameter ``top_k`` generalizes this metric to a Top-K accuracy metric: for each sample the
+    top-K highest probability items are considered to find the correct label.
 
-    - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes
-    - ``target`` (long tensor): ``(N, ...)``
+    For multi-label and multi-dimensional multi-class inputs, this metric computes the "global"
+    accuracy by default, which counts all labels or sub-samples separately. This can be
+    changed to subset accuracy (which requires all labels or sub-samples in the sample to
+    be correctly predicted) by setting ``subset_accuracy=True``.
 
-    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument.
-    This is the case for binary and multi-label logits.
-
-    If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.
+    Accepts all input types listed in :ref:`metrics:Input types`.
 
     Args:
         threshold:
-            Threshold value for binary or multi-label logits. default: 0.5
+            Threshold probability value for transforming probability predictions to binary
+            `(0,1)` predictions, in the case of binary or multi-label inputs.
+        top_k:
+            Number of highest probability predictions considered to find the correct label, relevant
+            only for (multi-dimensional) multi-class inputs with probability predictions. The
+            default value (``None``) will be interpreted as 1 for these inputs.
+
+            Should be left at default (``None``) for all other types of inputs.
+        subset_accuracy:
+            Whether to compute subset accuracy for multi-label and multi-dimensional
+            multi-class inputs (has no effect for other input types).
+
+            For multi-label inputs, if the parameter is set to `True`, then all labels for
+            each sample must be correctly predicted for the sample to count as correct. If it
+            is set to `False`, then all labels are counted separately - this is equivalent to
+            flattening inputs beforehand (i.e. ``preds = preds.flatten()`` and same for ``target``).
+
+            For multi-dimensional multi-class inputs, if the parameter is set to `True`, then all
+            sub-sample (on the extra axis) must be correct for the sample to be counted as correct.
+            If it is set to `False`, then all sub-samples are counter separately - this is equivalent,
+            in the case of label predictions, to flattening the inputs beforehand (i.e.
+            ``preds = preds.flatten()`` and same for ``target``). Note that the ``top_k`` parameter
+            still applies in both cases, if set.
         compute_on_step:
-            Forward only calls ``update()`` and return None if this is set to False. default: True
+            Forward only calls ``update()`` and return None if this is set to False.
         dist_sync_on_step:
             Synchronize metric state across processes at each ``forward()``
             before returning the value at the step. default: False
@@ -63,10 +85,19 @@ class Accuracy(Metric):
         >>> accuracy(preds, target)
         tensor(0.5000)
 
+        >>> target = torch.tensor([0, 1, 2])
+        >>> preds = torch.tensor([[0.1, 0.9, 0], [0.3, 0.1, 0.6], [0.2, 0.5, 0.3]])
+        >>> accuracy = Accuracy(top_k=2)
+        >>> accuracy(preds, target)
+        tensor(0.6667)
+
     """
+
     def __init__(
         self,
         threshold: float = 0.5,
+        top_k: Optional[int] = None,
+        subset_accuracy: bool = False,
         compute_on_step: bool = True,
         dist_sync_on_step: bool = False,
         process_group: Optional[Any] = None,
@@ -82,24 +113,35 @@ def __init__(
         self.add_state("correct", default=torch.tensor(0), dist_reduce_fx="sum")
         self.add_state("total", default=torch.tensor(0), dist_reduce_fx="sum")
 
+        if not 0 <= threshold <= 1:
+            raise ValueError("The `threshold` should lie in the [0,1] interval.")
+
+        if top_k is not None and top_k <= 0:
+            raise ValueError("The `top_k` should be an integer larger than 1.")
+
         self.threshold = threshold
+        self.top_k = top_k
+        self.subset_accuracy = subset_accuracy
 
     def update(self, preds: torch.Tensor, target: torch.Tensor):
         """
-        Update state with predictions and targets.
+        Update state with predictions and targets. See :ref:`metrics:Input types` for more information
+        on input types.
 
         Args:
-            preds: Predictions from model
-            target: Ground truth values
+            preds: Predictions from model (probabilities, or labels)
+            target: Ground truth labels
         """
-        preds, target = _input_format_classification(preds, target, self.threshold)
-        assert preds.shape == target.shape
 
-        self.correct += torch.sum(preds == target)
-        self.total += target.numel()
+        correct, total = _accuracy_update(
+            preds, target, threshold=self.threshold, top_k=self.top_k, subset_accuracy=self.subset_accuracy
+        )
+
+        self.correct += correct
+        self.total += total
 
-    def compute(self):
+    def compute(self) -> torch.Tensor:
         """
-        Computes accuracy over state.
+        Computes accuracy based on inputs passed in to ``update`` previously.
         """
-        return self.correct.float() / self.total
+        return _accuracy_compute(self.correct, self.total)
@@ -0,0 +1,105 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+import torch
+from pytorch_lightning.metrics.metric import Metric
+from pytorch_lightning.metrics.functional.hamming_distance import _hamming_distance_update, _hamming_distance_compute
+
+
+class HammingDistance(Metric):
+    r"""
+    Computes the average `Hamming distance <https://en.wikipedia.org/wiki/Hamming_distance>`_ (also
+    known as Hamming loss) between targets and predictions:
+
+    .. math::
+        \text{Hamming distance} = \frac{1}{N \cdot L}\sum_i^N \sum_l^L 1(y_{il} \neq \hat{y_{il}})
+
+    Where :math:`y` is a tensor of target values, :math:`\hat{y}` is a tensor of predictions,
+    and :math:`\bullet_{il}` refers to the :math:`l`-th label of the :math:`i`-th sample of that
+    tensor.
+
+    This is the same as ``1-accuracy`` for binary data, while for all other types of inputs it
+    treats each possible label separately - meaning that, for example, multi-class data is
+    treated as if it were multi-label.
+
+    Accepts all input types listed in :ref:`metrics:Input types`.
+
+    Args:
+        threshold:
+            Threshold probability value for transforming probability predictions to binary
+            `(0,1)` predictions, in the case of binary or multi-label inputs.
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False.
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step.
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+        dist_sync_fn:
+            Callback that performs the allgather operation on the metric state. When ``None``, DDP
+            will be used to perform the all gather.
+
+    Example:
+
+        >>> from pytorch_lightning.metrics import HammingDistance
+        >>> target = torch.tensor([[0, 1], [1, 1]])
+        >>> preds = torch.tensor([[0, 1], [0, 1]])
+        >>> hamming_distance = HammingDistance()
+        >>> hamming_distance(preds, target)
+        tensor(0.2500)
+
+    """
+
+    def __init__(
+        self,
+        threshold: float = 0.5,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+        dist_sync_fn: Callable = None,
+    ):
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+            dist_sync_fn=dist_sync_fn,
+        )
+
+        self.add_state("correct", default=torch.tensor(0), dist_reduce_fx="sum")
+        self.add_state("total", default=torch.tensor(0), dist_reduce_fx="sum")
+
+        if not 0 <= threshold <= 1:
+            raise ValueError("The `threshold` should lie in the [0,1] interval.")
+        self.threshold = threshold
+
+    def update(self, preds: torch.Tensor, target: torch.Tensor):
+        """
+        Update state with predictions and targets. See :ref:`metrics:Input types` for more information
+        on input types.
+
+        Args:
+            preds: Predictions from model (probabilities, or labels)
+            target: Ground truth labels
+        """
+        correct, total = _hamming_distance_update(preds, target, self.threshold)
+
+        self.correct += correct
+        self.total += total
+
+    def compute(self) -> torch.Tensor:
+        """
+        Computes hamming distance based on inputs passed in to ``update`` previously.
+        """
+        return _hamming_distance_compute(self.correct, self.total)
@@ -405,6 +405,11 @@ def _input_format_classification(
     else:
         preds, target = preds.squeeze(), target.squeeze()
 
+    # Convert half precision tensors to full precision, as not all ops are supported
+    # for example, min() is not supported
+    if preds.dtype == torch.float16:
+        preds = preds.float()
+
     case = _check_classification_inputs(
         preds,
         target,
 
@@ -13,7 +13,6 @@
 # limitations under the License.
 from pytorch_lightning.metrics.functional.average_precision import average_precision  # noqa: F401
 from pytorch_lightning.metrics.functional.classification import (  # noqa: F401
-    accuracy,
     auc,
     auroc,
     dice_score,
@@ -32,8 +31,10 @@
 )
 from pytorch_lightning.metrics.functional.confusion_matrix import confusion_matrix  # noqa: F401
 # TODO: unify metrics between class and functional, add below
+from pytorch_lightning.metrics.functional.accuracy import accuracy  # noqa: F401
 from pytorch_lightning.metrics.functional.explained_variance import explained_variance  # noqa: F401
 from pytorch_lightning.metrics.functional.f_beta import fbeta, f1  # noqa: F401
+from pytorch_lightning.metrics.functional.hamming_distance import hamming_distance  # noqa: F401
 from pytorch_lightning.metrics.functional.mean_absolute_error import mean_absolute_error  # noqa: F401
 from pytorch_lightning.metrics.functional.mean_squared_error import mean_squared_error  # noqa: F401
 from pytorch_lightning.metrics.functional.mean_squared_log_error import mean_squared_log_error  # noqa: F401